diff --git a/runtime/hsa-runtime/core/CMakeLists.txt b/runtime/hsa-runtime/core/CMakeLists.txt deleted file mode 100644 index 3ce04a041d..0000000000 --- a/runtime/hsa-runtime/core/CMakeLists.txt +++ /dev/null @@ -1,171 +0,0 @@ -################################################################################ -## -## The University of Illinois/NCSA -## Open Source License (NCSA) -## -## Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -## -## Developed by: -## -## AMD Research and AMD HSA Software Development -## -## Advanced Micro Devices, Inc. -## -## www.amd.com -## -## Permission is hereby granted, free of charge, to any person obtaining a copy -## of this software and associated documentation files (the "Software"), to -## deal with the Software without restriction, including without limitation -## the rights to use, copy, modify, merge, publish, distribute, sublicense, -## and#or sell copies of the Software, and to permit persons to whom the -## Software is furnished to do so, subject to the following conditions: -## -## - Redistributions of source code must retain the above copyright notice, -## this list of conditions and the following disclaimers. -## - Redistributions in binary form must reproduce the above copyright -## notice, this list of conditions and the following disclaimers in -## the documentation and#or other materials provided with the distribution. -## - Neither the names of Advanced Micro Devices, Inc, -## nor the names of its contributors may be used to endorse or promote -## products derived from this Software without specific prior written -## permission. -## -## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -## DEALINGS WITH THE SOFTWARE. -## -################################################################################ - -cmake_minimum_required ( VERSION 2.8.0 ) -## GCC 4.8 or higher compiler required. - -if ( WIN32 ) - MESSAGE ( FATAL_ERROR "Windows build is not supported." ) -endif () - -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake_modules") - -## Process environment variables. -if ( "$ENV{HSATHK_BUILD_TARGET_BITS}" STREQUAL 32 ) - set ( ONLY64STR "" ) - set ( IS64BIT 0 ) -else () - set ( ONLY64STR "64" ) - set ( IS64BIT 1 ) -endif () - -if ( NOT EXISTS $ENV{HSATHK_BUILD_INC_PATH}/hsakmt.h ) - MESSAGE ( FATAL_ERROR "Environment variable HSATHK_BUILD_INC_PATH is not set to point to the location where KFD Thunk header file hsakmt.h (and rest of the thunk headers) could be found." ) -endif () - -if ( NOT EXISTS $ENV{HSATHK_BUILD_LIB_PATH}/libhsakmt.so.1 ) - MESSAGE ( FATAL_ERROR "Environment variable HSATHK_BUILD_LIB_PATH is not set to point to the location where KFD Thunk library libhsakmt.so.1 could be found." ) -endif () - -if ( EXISTS $ENV{LIBSP3_BUILD_INC_PATH}/sp3.h ) - set ( LIBSP3_BUILD_INC_PATH $ENV{LIBSP3_BUILD_INC_PATH} ) -else () - set ( LIBSP3_BUILD_INC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../utils/sp3 ) -endif () - -if ( EXISTS $ENV{LIBSP3_BUILD_LIB_PATH}/libsp3.a ) - set ( LIBSP3_BUILD_LIB_PATH $ENV{LIBSP3_BUILD_LIB_PATH} ) -else () - set ( LIBSP3_BUILD_LIB_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../utils/sp3 ) -endif () - -MESSAGE ( ------IS64BIT: ${IS64BIT} ) -MESSAGE ( ------Compiler: ${CMAKE_CXX_COMPILER} ) -MESSAGE ( ------Version: ${CMAKE_CXX_COMPILER_VERSION} ) - -## Set core runtime module name and project name. -set ( CORE_RUNTIME_NAME "hsa-runtime" ) -set ( CORE_RUNTIME_COMPONENT "lib${CORE_RUNTIME_NAME}" ) -set ( CORE_RUNTIME_TARGET "${CORE_RUNTIME_NAME}${ONLY64STR}" ) -project ( ${CORE_RUNTIME_TARGET} ) - -## Verbose output. -set ( CMAKE_VERBOSE_MAKEFILE on ) - -## Compiler preproc definitions. -add_definitions ( -D__linux__ ) -add_definitions ( -DHSA_EXPORT=1 ) -add_definitions ( -DHSA_EXPORT_FINALIZER=1 ) -add_definitions ( -DHSA_EXPORT_IMAGES=1 ) - -## ------------------------- Linux Compiler and Linker options ------------------------- -set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-deprecated-declarations -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -Wno-error=unused-variable -Wno-error=unused-but-set-variable -Wno-error=unused-function" ) - -set ( DRVDEF "${CMAKE_CURRENT_SOURCE_DIR}/hsacore.so.def" ) - -set ( CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack -Wl,--version-script=${DRVDEF}" ) - -set ( CMAKE_SKIP_BUILD_RPATH TRUE) - -## ------------------------- End Compiler and Linker options ---------------------------- - -## Source files. -set ( CORE_SRCS util/lnx/os_linux.cpp ) -set ( CORE_SRCS ${CORE_SRCS} util/small_heap.cpp ) -set ( CORE_SRCS ${CORE_SRCS} util/timer.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/amd_blit_kernel.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/amd_blit_sdma.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/amd_cpu_agent.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/amd_gpu_agent.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/amd_aql_queue.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/amd_loader_context.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/amd_load_map.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/amd_memory_region.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/amd_topology.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/default_signal.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/host_queue.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/hsa.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/hsa_api_trace.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/hsa_ext_amd.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/hsa_ext_interface.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/interrupt_signal.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/isa.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/runtime.cpp ) -set ( CORE_SRCS ${CORE_SRCS} runtime/signal.cpp ) -set ( CORE_SRCS ${CORE_SRCS} common/shared.cpp ) -set ( CORE_SRCS ${CORE_SRCS} common/hsa_table_interface.cpp ) - -## Include path(s). -include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/.. ) -include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/../inc ) -include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/inc ) -include_directories ( $ENV{HSATHK_BUILD_INC_PATH} ) -include_directories ( ${LIBSP3_BUILD_INC_PATH} ) - -## Library path(s). -link_directories ( $ENV{HSATHK_BUILD_LIB_PATH} ) -link_directories ( ${LIBSP3_BUILD_LIB_PATH} ) - -add_library ( ${CORE_RUNTIME_TARGET} SHARED ${CORE_SRCS} ) - -## Set the VERSION and SOVERSION values -if ( DEFINED VERSION_STRING ) - set_property ( TARGET ${CORE_RUNTIME_TARGET} PROPERTY VERSION "${VERSION_STRING}" ) -endif () - -set_property ( TARGET ${CORE_RUNTIME_TARGET} PROPERTY SOVERSION "${VERSION_MAJOR}" ) - -target_link_libraries ( ${CORE_RUNTIME_TARGET} - PRIVATE amdhsaloader - PRIVATE amdhsacode - PRIVATE hsakmt - PRIVATE sp3 - dl pthread rt -) - -## If the build is Release, strip the target library -if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release ) - add_custom_command ( TARGET ${CORE_RUNTIME_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so ) -endif () - -## Set install information -install ( TARGETS ${CORE_RUNTIME_TARGET} LIBRARY DESTINATION lib COMPONENT ${CORE_RUNTIME_COMPONENT}) diff --git a/runtime/hsa-runtime/core/common/hsa_table_interface.cpp b/runtime/hsa-runtime/core/common/hsa_table_interface.cpp deleted file mode 100644 index 4e1b6d44b0..0000000000 --- a/runtime/hsa-runtime/core/common/hsa_table_interface.cpp +++ /dev/null @@ -1,604 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "hsa_api_trace.h" - -static const ApiTable* HsaApiTable; - -void hsa_table_interface_init(const ApiTable* Table) { HsaApiTable = Table; } - -const ApiTable* hsa_table_interface_get_table() { return HsaApiTable; } - -// Pass through stub functions -hsa_status_t HSA_API hsa_init() { return HsaApiTable->hsa_init_fn(); } - -hsa_status_t HSA_API hsa_shut_down() { return HsaApiTable->hsa_shut_down_fn(); } - -hsa_status_t HSA_API - hsa_system_get_info(hsa_system_info_t attribute, void* value) { - return HsaApiTable->hsa_system_get_info_fn(attribute, value); -} - -hsa_status_t HSA_API - hsa_system_extension_supported(uint16_t extension, uint16_t version_major, - uint16_t version_minor, bool* result) { - return HsaApiTable->hsa_system_extension_supported_fn( - extension, version_major, version_minor, result); -} - -hsa_status_t HSA_API - hsa_system_get_extension_table(uint16_t extension, uint16_t version_major, - uint16_t version_minor, void* table) { - return HsaApiTable->hsa_system_get_extension_table_fn( - extension, version_major, version_minor, table); -} - -hsa_status_t HSA_API - hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void* data), - void* data) { - return HsaApiTable->hsa_iterate_agents_fn(callback, data); -} - -hsa_status_t HSA_API hsa_agent_get_info(hsa_agent_t agent, - hsa_agent_info_t attribute, - void* value) { - return HsaApiTable->hsa_agent_get_info_fn(agent, attribute, value); -} - -hsa_status_t HSA_API hsa_agent_get_exception_policies(hsa_agent_t agent, - hsa_profile_t profile, - uint16_t* mask) { - return HsaApiTable->hsa_agent_get_exception_policies_fn(agent, profile, mask); -} - -hsa_status_t HSA_API - hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent, - uint16_t version_major, - uint16_t version_minor, bool* result) { - return HsaApiTable->hsa_agent_extension_supported_fn( - extension, agent, version_major, version_minor, result); -} - -hsa_status_t HSA_API - hsa_queue_create(hsa_agent_t agent, uint32_t size, hsa_queue_type_t type, - void (*callback)(hsa_status_t status, hsa_queue_t* source, - void* data), - void* data, uint32_t private_segment_size, - uint32_t group_segment_size, hsa_queue_t** queue) { - return HsaApiTable->hsa_queue_create_fn(agent, size, type, callback, data, - private_segment_size, - group_segment_size, queue); -} - -hsa_status_t HSA_API - hsa_soft_queue_create(hsa_region_t region, uint32_t size, - hsa_queue_type_t type, uint32_t features, - hsa_signal_t completion_signal, hsa_queue_t** queue) { - return HsaApiTable->hsa_soft_queue_create_fn(region, size, type, features, - completion_signal, queue); -} - -hsa_status_t HSA_API hsa_queue_destroy(hsa_queue_t* queue) { - return HsaApiTable->hsa_queue_destroy_fn(queue); -} - -hsa_status_t HSA_API hsa_queue_inactivate(hsa_queue_t* queue) { - return HsaApiTable->hsa_queue_inactivate_fn(queue); -} - -uint64_t HSA_API hsa_queue_load_read_index_acquire(const hsa_queue_t* queue) { - return HsaApiTable->hsa_queue_load_read_index_acquire_fn(queue); -} - -uint64_t HSA_API hsa_queue_load_read_index_relaxed(const hsa_queue_t* queue) { - return HsaApiTable->hsa_queue_load_read_index_relaxed_fn(queue); -} - -uint64_t HSA_API hsa_queue_load_write_index_acquire(const hsa_queue_t* queue) { - return HsaApiTable->hsa_queue_load_write_index_acquire_fn(queue); -} - -uint64_t HSA_API hsa_queue_load_write_index_relaxed(const hsa_queue_t* queue) { - return HsaApiTable->hsa_queue_load_write_index_relaxed_fn(queue); -} - -void HSA_API hsa_queue_store_write_index_relaxed(const hsa_queue_t* queue, - uint64_t value) { - return HsaApiTable->hsa_queue_store_write_index_relaxed_fn(queue, value); -} - -void HSA_API hsa_queue_store_write_index_release(const hsa_queue_t* queue, - uint64_t value) { - return HsaApiTable->hsa_queue_store_write_index_release_fn(queue, value); -} - -uint64_t HSA_API hsa_queue_cas_write_index_acq_rel(const hsa_queue_t* queue, - uint64_t expected, - uint64_t value) { - return HsaApiTable->hsa_queue_cas_write_index_acq_rel_fn(queue, expected, - value); -} - -uint64_t HSA_API hsa_queue_cas_write_index_acquire(const hsa_queue_t* queue, - uint64_t expected, - uint64_t value) { - return HsaApiTable->hsa_queue_cas_write_index_acquire_fn(queue, expected, - value); -} - -uint64_t HSA_API hsa_queue_cas_write_index_relaxed(const hsa_queue_t* queue, - uint64_t expected, - uint64_t value) { - return HsaApiTable->hsa_queue_cas_write_index_relaxed_fn(queue, expected, - value); -} - -uint64_t HSA_API hsa_queue_cas_write_index_release(const hsa_queue_t* queue, - uint64_t expected, - uint64_t value) { - return HsaApiTable->hsa_queue_cas_write_index_release_fn(queue, expected, - value); -} - -uint64_t HSA_API hsa_queue_add_write_index_acq_rel(const hsa_queue_t* queue, - uint64_t value) { - return HsaApiTable->hsa_queue_add_write_index_acq_rel_fn(queue, value); -} - -uint64_t HSA_API hsa_queue_add_write_index_acquire(const hsa_queue_t* queue, - uint64_t value) { - return HsaApiTable->hsa_queue_add_write_index_acquire_fn(queue, value); -} - -uint64_t HSA_API hsa_queue_add_write_index_relaxed(const hsa_queue_t* queue, - uint64_t value) { - return HsaApiTable->hsa_queue_add_write_index_relaxed_fn(queue, value); -} - -uint64_t HSA_API hsa_queue_add_write_index_release(const hsa_queue_t* queue, - uint64_t value) { - return HsaApiTable->hsa_queue_add_write_index_release_fn(queue, value); -} - -void HSA_API hsa_queue_store_read_index_relaxed(const hsa_queue_t* queue, - uint64_t value) { - return HsaApiTable->hsa_queue_store_read_index_relaxed_fn(queue, value); -} - -void HSA_API hsa_queue_store_read_index_release(const hsa_queue_t* queue, - uint64_t value) { - return HsaApiTable->hsa_queue_store_read_index_release_fn(queue, value); -} - -hsa_status_t HSA_API hsa_agent_iterate_regions( - hsa_agent_t agent, - hsa_status_t (*callback)(hsa_region_t region, void* data), void* data) { - return HsaApiTable->hsa_agent_iterate_regions_fn(agent, callback, data); -} - -hsa_status_t HSA_API hsa_region_get_info(hsa_region_t region, - hsa_region_info_t attribute, - void* value) { - return HsaApiTable->hsa_region_get_info_fn(region, attribute, value); -} - -hsa_status_t HSA_API hsa_memory_register(void* address, size_t size) { - return HsaApiTable->hsa_memory_register_fn(address, size); -} - -hsa_status_t HSA_API hsa_memory_deregister(void* address, size_t size) { - return HsaApiTable->hsa_memory_deregister_fn(address, size); -} - -hsa_status_t HSA_API - hsa_memory_allocate(hsa_region_t region, size_t size, void** ptr) { - return HsaApiTable->hsa_memory_allocate_fn(region, size, ptr); -} - -hsa_status_t HSA_API hsa_memory_free(void* ptr) { - return HsaApiTable->hsa_memory_free_fn(ptr); -} - -hsa_status_t HSA_API hsa_memory_copy(void* dst, const void* src, size_t size) { - return HsaApiTable->hsa_memory_copy_fn(dst, src, size); -} - -hsa_status_t HSA_API hsa_memory_assign_agent(void* ptr, hsa_agent_t agent, - hsa_access_permission_t access) { - return HsaApiTable->hsa_memory_assign_agent_fn(ptr, agent, access); -} - -hsa_status_t HSA_API - hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers, - const hsa_agent_t* consumers, hsa_signal_t* signal) { - return HsaApiTable->hsa_signal_create_fn(initial_value, num_consumers, - consumers, signal); -} - -hsa_status_t HSA_API hsa_signal_destroy(hsa_signal_t signal) { - return HsaApiTable->hsa_signal_destroy_fn(signal); -} - -hsa_signal_value_t HSA_API hsa_signal_load_relaxed(hsa_signal_t signal) { - return HsaApiTable->hsa_signal_load_relaxed_fn(signal); -} - -hsa_signal_value_t HSA_API hsa_signal_load_acquire(hsa_signal_t signal) { - return HsaApiTable->hsa_signal_load_acquire_fn(signal); -} - -void HSA_API - hsa_signal_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_store_relaxed_fn(signal, value); -} - -void HSA_API - hsa_signal_store_release(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_store_release_fn(signal, value); -} - -hsa_signal_value_t HSA_API - hsa_signal_wait_relaxed(hsa_signal_t signal, - hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout_hint, - hsa_wait_state_t wait_expectancy_hint) { - return HsaApiTable->hsa_signal_wait_relaxed_fn( - signal, condition, compare_value, timeout_hint, wait_expectancy_hint); -} - -hsa_signal_value_t HSA_API - hsa_signal_wait_acquire(hsa_signal_t signal, - hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout_hint, - hsa_wait_state_t wait_expectancy_hint) { - return HsaApiTable->hsa_signal_wait_acquire_fn( - signal, condition, compare_value, timeout_hint, wait_expectancy_hint); -} - -void HSA_API - hsa_signal_and_relaxed(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_and_relaxed_fn(signal, value); -} - -void HSA_API - hsa_signal_and_acquire(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_and_acquire_fn(signal, value); -} - -void HSA_API - hsa_signal_and_release(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_and_release_fn(signal, value); -} - -void HSA_API - hsa_signal_and_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_and_acq_rel_fn(signal, value); -} - -void HSA_API - hsa_signal_or_relaxed(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_or_relaxed_fn(signal, value); -} - -void HSA_API - hsa_signal_or_acquire(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_or_acquire_fn(signal, value); -} - -void HSA_API - hsa_signal_or_release(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_or_release_fn(signal, value); -} - -void HSA_API - hsa_signal_or_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_or_acq_rel_fn(signal, value); -} - -void HSA_API - hsa_signal_xor_relaxed(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_xor_relaxed_fn(signal, value); -} - -void HSA_API - hsa_signal_xor_acquire(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_xor_acquire_fn(signal, value); -} - -void HSA_API - hsa_signal_xor_release(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_xor_release_fn(signal, value); -} - -void HSA_API - hsa_signal_xor_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_xor_acq_rel_fn(signal, value); -} - -void HSA_API - hsa_signal_add_relaxed(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_add_relaxed_fn(signal, value); -} - -void HSA_API - hsa_signal_add_acquire(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_add_acquire_fn(signal, value); -} - -void HSA_API - hsa_signal_add_release(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_add_release_fn(signal, value); -} - -void HSA_API - hsa_signal_add_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_add_acq_rel_fn(signal, value); -} - -void HSA_API - hsa_signal_subtract_relaxed(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_subtract_relaxed_fn(signal, value); -} - -void HSA_API - hsa_signal_subtract_acquire(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_subtract_acquire_fn(signal, value); -} - -void HSA_API - hsa_signal_subtract_release(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_subtract_release_fn(signal, value); -} - -void HSA_API - hsa_signal_subtract_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_subtract_acq_rel_fn(signal, value); -} - -hsa_signal_value_t HSA_API - hsa_signal_exchange_relaxed(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_exchange_relaxed_fn(signal, value); -} - -hsa_signal_value_t HSA_API - hsa_signal_exchange_acquire(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_exchange_acquire_fn(signal, value); -} - -hsa_signal_value_t HSA_API - hsa_signal_exchange_release(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_exchange_release_fn(signal, value); -} - -hsa_signal_value_t HSA_API - hsa_signal_exchange_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_exchange_acq_rel_fn(signal, value); -} - -hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_cas_relaxed_fn(signal, expected, value); -} - -hsa_signal_value_t HSA_API hsa_signal_cas_acquire(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_cas_acquire_fn(signal, expected, value); -} - -hsa_signal_value_t HSA_API hsa_signal_cas_release(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_cas_release_fn(signal, expected, value); -} - -hsa_signal_value_t HSA_API hsa_signal_cas_acq_rel(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value) { - return HsaApiTable->hsa_signal_cas_acq_rel_fn(signal, expected, value); -} - -hsa_status_t hsa_isa_from_name(const char* name, hsa_isa_t* isa) { - return HsaApiTable->hsa_isa_from_name_fn(name, isa); -} - -hsa_status_t HSA_API hsa_isa_get_info(hsa_isa_t isa, hsa_isa_info_t attribute, - uint32_t index, void* value) { - return HsaApiTable->hsa_isa_get_info_fn(isa, attribute, index, value); -} - -hsa_status_t hsa_isa_compatible(hsa_isa_t code_object_isa, hsa_isa_t agent_isa, - bool* result) { - return HsaApiTable->hsa_isa_compatible_fn(code_object_isa, agent_isa, result); -} - -hsa_status_t HSA_API hsa_code_object_serialize( - hsa_code_object_t code_object, - hsa_status_t (*alloc_callback)(size_t size, hsa_callback_data_t data, - void** address), - hsa_callback_data_t callback_data, const char* options, - void** serialized_code_object, size_t* serialized_code_object_size) { - return HsaApiTable->hsa_code_object_serialize_fn( - code_object, alloc_callback, callback_data, options, - serialized_code_object, serialized_code_object_size); -} - -hsa_status_t HSA_API - hsa_code_object_deserialize(void* serialized_code_object, - size_t serialized_code_object_size, - const char* options, - hsa_code_object_t* code_object) { - return HsaApiTable->hsa_code_object_deserialize_fn( - serialized_code_object, serialized_code_object_size, options, - code_object); -} - -hsa_status_t HSA_API hsa_code_object_destroy(hsa_code_object_t code_object) { - return HsaApiTable->hsa_code_object_destroy_fn(code_object); -} - -hsa_status_t HSA_API hsa_code_object_get_info(hsa_code_object_t code_object, - hsa_code_object_info_t attribute, - void* value) { - return HsaApiTable->hsa_code_object_get_info_fn(code_object, attribute, - value); -} - -hsa_status_t HSA_API hsa_code_object_get_symbol(hsa_code_object_t code_object, - const char* symbol_name, - hsa_code_symbol_t* symbol) { - return HsaApiTable->hsa_code_object_get_symbol_fn(code_object, symbol_name, - symbol); -} - -hsa_status_t HSA_API hsa_code_symbol_get_info(hsa_code_symbol_t code_symbol, - hsa_code_symbol_info_t attribute, - void* value) { - return HsaApiTable->hsa_code_symbol_get_info_fn(code_symbol, attribute, - value); -} - -hsa_status_t HSA_API hsa_code_object_iterate_symbols( - hsa_code_object_t code_object, - hsa_status_t (*callback)(hsa_code_object_t code_object, - hsa_code_symbol_t symbol, void* data), - void* data) { - return HsaApiTable->hsa_code_object_iterate_symbols_fn(code_object, callback, - data); -} - -hsa_status_t HSA_API - hsa_executable_create(hsa_profile_t profile, - hsa_executable_state_t executable_state, - const char* options, hsa_executable_t* executable) { - return HsaApiTable->hsa_executable_create_fn(profile, executable_state, - options, executable); -} - -hsa_status_t HSA_API hsa_executable_destroy(hsa_executable_t executable) { - return HsaApiTable->hsa_executable_destroy_fn(executable); -} - -hsa_status_t HSA_API - hsa_executable_load_code_object(hsa_executable_t executable, - hsa_agent_t agent, - hsa_code_object_t code_object, - const char* options) { - return HsaApiTable->hsa_executable_load_code_object_fn(executable, agent, - code_object, options); -} - -hsa_status_t HSA_API - hsa_executable_freeze(hsa_executable_t executable, const char* options) { - return HsaApiTable->hsa_executable_freeze_fn(executable, options); -} - -hsa_status_t HSA_API hsa_executable_get_info(hsa_executable_t executable, - hsa_executable_info_t attribute, - void* value) { - return HsaApiTable->hsa_executable_get_info_fn(executable, attribute, value); -} - -hsa_status_t HSA_API - hsa_executable_global_variable_define(hsa_executable_t executable, - const char* variable_name, - void* address) { - return HsaApiTable->hsa_executable_global_variable_define_fn( - executable, variable_name, address); -} - -hsa_status_t HSA_API - hsa_executable_agent_global_variable_define(hsa_executable_t executable, - hsa_agent_t agent, - const char* variable_name, - void* address) { - return HsaApiTable->hsa_executable_agent_global_variable_define_fn( - executable, agent, variable_name, address); -} - -hsa_status_t HSA_API - hsa_executable_readonly_variable_define(hsa_executable_t executable, - hsa_agent_t agent, - const char* variable_name, - void* address) { - return HsaApiTable->hsa_executable_readonly_variable_define_fn( - executable, agent, variable_name, address); -} - -hsa_status_t HSA_API - hsa_executable_validate(hsa_executable_t executable, uint32_t* result) { - return HsaApiTable->hsa_executable_validate_fn(executable, result); -} - -hsa_status_t HSA_API - hsa_executable_get_symbol(hsa_executable_t executable, - const char* module_name, const char* symbol_name, - hsa_agent_t agent, int32_t call_convention, - hsa_executable_symbol_t* symbol) { - return HsaApiTable->hsa_executable_get_symbol_fn( - executable, module_name, symbol_name, agent, call_convention, symbol); -} - -hsa_status_t HSA_API - hsa_executable_symbol_get_info(hsa_executable_symbol_t executable_symbol, - hsa_executable_symbol_info_t attribute, - void* value) { - return HsaApiTable->hsa_executable_symbol_get_info_fn(executable_symbol, - attribute, value); -} - -hsa_status_t HSA_API hsa_executable_iterate_symbols( - hsa_executable_t executable, - hsa_status_t (*callback)(hsa_executable_t executable, - hsa_executable_symbol_t symbol, void* data), - void* data) { - return HsaApiTable->hsa_executable_iterate_symbols_fn(executable, callback, - data); -} - -hsa_status_t HSA_API - hsa_status_string(hsa_status_t status, const char** status_string) { - return HsaApiTable->hsa_status_string_fn(status, status_string); -} diff --git a/runtime/hsa-runtime/core/common/shared.cpp b/runtime/hsa-runtime/core/common/shared.cpp deleted file mode 100644 index 07dbc89f19..0000000000 --- a/runtime/hsa-runtime/core/common/shared.cpp +++ /dev/null @@ -1,48 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/common/shared.h" - -namespace core { -std::function BaseShared::allocate_=nullptr; -std::function BaseShared::free_=nullptr; -} diff --git a/runtime/hsa-runtime/core/common/shared.h b/runtime/hsa-runtime/core/common/shared.h deleted file mode 100644 index 36edaa078c..0000000000 --- a/runtime/hsa-runtime/core/common/shared.h +++ /dev/null @@ -1,109 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTME_CORE_INC_SHARED_H_ -#define HSA_RUNTME_CORE_INC_SHARED_H_ - -#include "core/util/utils.h" -#include - -#include -#include - -namespace core { -/// @brief Base class encapsulating the allocator and deallocator for -/// shared shared object. -class BaseShared { - public: - static void SetAllocateAndFree( - const std::function& allocate, - const std::function& free) { - allocate_ = allocate; - free_ = free; - } - - protected: - static std::function allocate_; - static std::function free_; -}; - -/// @brief Base class for classes that encapsulates object shared between -/// host and agents. Alignment defaults to __alignof(T) but may be increased. -template -class Shared : public BaseShared { - public: - Shared() { - assert(allocate_ != nullptr && free_ != nullptr && - "Shared object allocator is not set"); - static_assert((__alignof(T) <= Align) || (Align == 0), - "Align is less than alignof(T)"); - - shared_object_ = - reinterpret_cast(allocate_(sizeof(T), Max(__alignof(T), Align))); - - assert(shared_object_ != NULL && "Failed on allocating shared_object_"); - - if (shared_object_ != NULL) new (shared_object_) T; - } - - virtual ~Shared() { - assert(allocate_ != nullptr && free_ != nullptr && - "Shared object allocator is not set"); - - if (IsSharedObjectAllocationValid()) { - shared_object_->~T(); - free_(shared_object_); - } - } - - T* shared_object() const { return shared_object_; } - - bool IsSharedObjectAllocationValid() const { - return (shared_object_ != NULL); - } - - private: - T* shared_object_; -}; - -} // namespace core -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/agent.h b/runtime/hsa-runtime/core/inc/agent.h deleted file mode 100644 index 53ecd355b6..0000000000 --- a/runtime/hsa-runtime/core/inc/agent.h +++ /dev/null @@ -1,264 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// HSA runtime C++ interface file. - -#ifndef HSA_RUNTME_CORE_INC_AGENT_H_ -#define HSA_RUNTME_CORE_INC_AGENT_H_ - -#include - -#include - -#include "core/inc/runtime.h" -#include "core/inc/checked.h" -#include "core/inc/isa.h" -#include "core/inc/queue.h" -#include "core/inc/memory_region.h" -#include "core/util/utils.h" - -namespace core { -class Signal; - -typedef void (*HsaEventCallback)(hsa_status_t status, hsa_queue_t* source, - void* data); - -class MemoryRegion; - -// Agent is intended to be an pure interface class and may be wrapped or -// replaced by tools libraries. All funtions other than Convert, node_id, -// device_type, and public_handle must be virtual. -class Agent : public Checked<0xF6BC25EB17E6F917> { - public: - // @brief Convert agent object into hsa_agent_t. - // - // @param [in] agent Pointer to an agent. - // - // @retval hsa_agent_t - static __forceinline hsa_agent_t Convert(Agent* agent) { - const hsa_agent_t agent_handle = { - static_cast(reinterpret_cast(agent))}; - return agent_handle; - } - - // @brief Convert agent object into const hsa_agent_t. - // - // @param [in] agent Pointer to an agent. - // - // @retval const hsa_agent_t - static __forceinline const hsa_agent_t Convert(const Agent* agent) { - const hsa_agent_t agent_handle = { - static_cast(reinterpret_cast(agent))}; - return agent_handle; - } - - // @brief Convert hsa_agent_t handle into Agent*. - // - // @param [in] agent An hsa_agent_t handle. - // - // @retval Agent* - static __forceinline Agent* Convert(hsa_agent_t agent) { - return reinterpret_cast(agent.handle); - } - - // Lightweight RTTI for vendor specific implementations. - enum DeviceType { kAmdGpuDevice = 0, kAmdCpuDevice = 1, kUnknownDevice = 2 }; - - // @brief Agent class contructor. - // - // @param [in] type CPU or GPU or other. - explicit Agent(uint32_t node_id, DeviceType type) - : node_id_(node_id), device_type_(uint32_t(type)) { - public_handle_ = Convert(this); - } - - // @brief Agent class contructor. - // - // @param [in] type CPU or GPU or other. - explicit Agent(uint32_t node_id, uint32_t type) - : node_id_(node_id), device_type_(type) { - public_handle_ = Convert(this); - } - - // @brief Agent class destructor. - virtual ~Agent() {} - - // @brief Submit DMA copy command to move data from src to dst and wait - // until it is finished. - // - // @details The agent must be able to access @p dst and @p src. - // - // @param [in] dst Memory address of the destination. - // @param [in] src Memory address of the source. - // @param [in] size Copy size in bytes. - // - // @retval HSA_STATUS_SUCCESS The memory copy is finished and successful. - virtual hsa_status_t DmaCopy(void* dst, const void* src, size_t size) { - return HSA_STATUS_ERROR; - } - - // @brief Submit DMA copy command to move data from src to dst. This call - // does not wait until the copy is finished - // - // @details The agent must be able to access @p dst and @p src. Memory copy - // will be performed after all signals in @p dep_signals have value of 0. - // On memory copy completion, the value of out_signal is decremented. - // - // @param [in] dst Memory address of the destination. - // @param [in] src Memory address of the source. - // @param [in] size Copy size in bytes. - // @param [in] dep_signals Array of signal dependency. - // @param [in] out_signal Completion signal. - // - // @retval HSA_STATUS_SUCCESS The memory copy is finished and successful. - virtual hsa_status_t DmaCopy(void* dst, const void* src, size_t size, - std::vector& dep_signals, - core::Signal& out_signal) { - return HSA_STATUS_ERROR; - } - - // @brief Submit DMA command to set the content of a pointer and wait - // until it is finished. - // - // @details The agent must be able to access @p ptr - // - // @param [in] ptr Address of the memory to be set. - // @param [in] value The value/pattern that will be used to set @p ptr. - // @param [in] count Number of uint32_t element to be set. - // - // @retval HSA_STATUS_SUCCESS The memory fill is finished and successful. - virtual hsa_status_t DmaFill(void* ptr, uint32_t value, size_t count) { - return HSA_STATUS_ERROR; - } - - // @brief Invoke the user provided callback for each region accessible by - // this agent. - // - // @param [in] callback User provided callback function. - // @param [in] data User provided pointer as input for @p callback. - // - // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed - // region returns ::HSA_STATUS_SUCCESS. - virtual hsa_status_t IterateRegion( - hsa_status_t (*callback)(hsa_region_t region, void* data), - void* data) const = 0; - - // @brief Create queue. - // - // @param [in] size Number of packets the queue is expected to hold. Must be a - // power of 2 greater than 0. - // @param [in] queue_type Queue type. - // @param [in] event_callback Callback invoked for every - // asynchronous event related to the newly created queue. May be NULL.The HSA - // runtime passes three arguments to the callback : a code identifying the - // event that triggered the invocation, a pointer to the queue where the event - // originated, and the application data. - // @param [in] data Application data that is passed to @p callback. - // @param [in] private_segment_size A hint to indicate the maximum expected - // private segment usage per work-item, in bytes. - // @param [in] group_segment_size A hint to indicate the maximum expected - // group segment usage per work-group, in bytes. - // @param[out] queue Memory location where the HSA runtime stores a pointer - // to the newly created queue. - // - // @retval HSA_STATUS_SUCCESS The queue has been created successfully. - virtual hsa_status_t QueueCreate(size_t size, hsa_queue_type_t queue_type, - HsaEventCallback event_callback, void* data, - uint32_t private_segment_size, - uint32_t group_segment_size, - Queue** queue) = 0; - - // @brief Query the value of an attribute. - // - // @param [in] attribute Attribute to query. - // @param [out] value Pointer to store the value of the attribute. - // - // @param HSA_STATUS_SUCCESS @p value has been filled with the value of the - // attribute. - virtual hsa_status_t GetInfo(hsa_agent_info_t attribute, - void* value) const = 0; - - // @brief Returns an array of regions owned by the agent. - virtual const std::vector& regions() const = 0; - - // @details Returns the agent's instruction set architecture. - virtual const Isa* isa() const = 0; - - // @brief Returns the device type (CPU/GPU/Others). - __forceinline uint32_t device_type() const { return device_type_; } - - // @brief Returns hsa_agent_t handle exposed to end user. - // - // @details Only matters when tools library need to intercept HSA calls. - __forceinline hsa_agent_t public_handle() const { return public_handle_; } - - // @brief Returns node id associated with this agent. - __forceinline uint32_t node_id() const { return node_id_; } - - protected: - // Intention here is to have a polymorphic update procedure for public_handle_ - // which is callable on any Agent* but only from some class dervied from - // Agent*. do_set_public_handle should remain protected or private in all - // derived types. - static __forceinline void set_public_handle(Agent* agent, - hsa_agent_t handle) { - agent->do_set_public_handle(handle); - } - - virtual void do_set_public_handle(hsa_agent_t handle) { - public_handle_ = handle; - } - - hsa_agent_t public_handle_; - - private: - // @brief Node id. - const uint32_t node_id_; - - const uint32_t device_type_; - - // Forbid copying and moving of this object - DISALLOW_COPY_AND_ASSIGN(Agent); -}; -} // namespace core - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/amd_aql_queue.h b/runtime/hsa-runtime/core/inc/amd_aql_queue.h deleted file mode 100644 index 25cb252f84..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_aql_queue.h +++ /dev/null @@ -1,412 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_AMD_HW_AQL_COMMAND_PROCESSOR_H_ -#define HSA_RUNTIME_CORE_INC_AMD_HW_AQL_COMMAND_PROCESSOR_H_ - -#include "core/inc/runtime.h" -#include "core/inc/signal.h" -#include "core/inc/queue.h" -#include "core/inc/amd_gpu_agent.h" - -namespace amd { -/// @brief Encapsulates HW Aql Command Processor functionality. It -/// provide the interface for things such as Doorbell register, read, -/// write pointers and a buffer. -class AqlQueue : public core::Queue, public core::Signal { - public: - static __forceinline bool IsType(core::Signal* signal) { - return signal->IsType(&rtti_id_); - } - - // Acquires/releases queue resources and requests HW schedule/deschedule. - AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id, - ScratchInfo& scratch, core::HsaEventCallback callback, - void* err_data, bool is_kv = false); - - ~AqlQueue(); - - /// @brief Indicates if queue is valid or not - bool IsValid() const { return valid_; } - - /// @brief Queue interfaces - hsa_status_t Inactivate(); - - /// @brief Atomically reads the Read index of with Acquire semantics - /// - /// @return uint64_t Value of read index - uint64_t LoadReadIndexAcquire(); - - /// @brief Atomically reads the Read index of with Relaxed semantics - /// - /// @return uint64_t Value of read index - uint64_t LoadReadIndexRelaxed(); - - /// @brief Atomically reads the Write index of with Acquire semantics - /// - /// @return uint64_t Value of write index - uint64_t LoadWriteIndexAcquire(); - - /// @brief Atomically reads the Write index of with Relaxed semantics - /// - /// @return uint64_t Value of write index - uint64_t LoadWriteIndexRelaxed(); - - /// @brief This operation is illegal - void StoreReadIndexRelaxed(uint64_t value) { assert(false); } - - /// @brief This operation is illegal - void StoreReadIndexRelease(uint64_t value) { assert(false); } - - /// @brief Atomically writes the Write index of with Relaxed semantics - /// - /// @param value New value of write index to update with - void StoreWriteIndexRelaxed(uint64_t value); - - /// @brief Atomically writes the Write index of with Release semantics - /// - /// @param value New value of write index to update with - void StoreWriteIndexRelease(uint64_t value); - - /// @brief Compares and swaps Write index using Acquire and Release semantics - /// - /// @param expected Current value of write index - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value); - - /// @brief Compares and swaps Write index using Acquire semantics - /// - /// @param expected Current value of write index - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value); - - /// @brief Compares and swaps Write index using Relaxed semantics - /// - /// @param expected Current value of write index - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value); - - /// @brief Compares and swaps Write index using Release semantics - /// - /// @param expected Current value of write index - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value); - - /// @brief Updates the Write index using Acquire and Release semantics - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - uint64_t AddWriteIndexAcqRel(uint64_t value); - - /// @brief Updates the Write index using Acquire semantics - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - uint64_t AddWriteIndexAcquire(uint64_t value); - - /// @brief Updates the Write index using Relaxed semantics - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - uint64_t AddWriteIndexRelaxed(uint64_t value); - - /// @brief Updates the Write index using Release semantics - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - uint64_t AddWriteIndexRelease(uint64_t value); - - /// @brief Set CU Masking - /// - /// @param num_cu_mask_count size of mask bit array - /// - /// @param cu_mask pointer to cu mask - /// - /// @return hsa_status_t - hsa_status_t SetCUMasking(const uint32_t num_cu_mask_count, - const uint32_t* cu_mask); - - /// @brief This operation is illegal - hsa_signal_value_t LoadRelaxed() { - assert(false); - return 0; - } - - /// @brief This operation is illegal - hsa_signal_value_t LoadAcquire() { - assert(false); - return 0; - } - - /// @brief Update signal value using Relaxed semantics - void StoreRelaxed(hsa_signal_value_t value); - - /// @brief Update signal value using Release semantics - void StoreRelease(hsa_signal_value_t value); - - /// @brief This operation is illegal - hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout, hsa_wait_state_t wait_hint) { - assert(false); - return 0; - } - - /// @brief This operation is illegal - hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout, hsa_wait_state_t wait_hint) { - assert(false); - return 0; - } - - /// @brief This operation is illegal - void AndRelaxed(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void AndAcquire(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void AndRelease(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void AndAcqRel(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void OrRelaxed(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void OrAcquire(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void OrRelease(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void OrAcqRel(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void XorRelaxed(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void XorAcquire(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void XorRelease(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void XorAcqRel(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void AddRelaxed(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void AddAcquire(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void AddRelease(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void AddAcqRel(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void SubRelaxed(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void SubAcquire(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void SubRelease(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - void SubAcqRel(hsa_signal_value_t value) { assert(false); } - - /// @brief This operation is illegal - hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value) { - assert(false); - return 0; - } - - /// @brief This operation is illegal - hsa_signal_value_t ExchAcquire(hsa_signal_value_t value) { - assert(false); - return 0; - } - - /// @brief This operation is illegal - hsa_signal_value_t ExchRelease(hsa_signal_value_t value) { - assert(false); - return 0; - } - - /// @brief This operation is illegal - hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value) { - assert(false); - return 0; - } - - /// @brief This operation is illegal - hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected, - hsa_signal_value_t value) { - assert(false); - return 0; - } - - /// @brief This operation is illegal - hsa_signal_value_t CasAcquire(hsa_signal_value_t expected, - hsa_signal_value_t value) { - assert(false); - return 0; - } - - /// @brief This operation is illegal - hsa_signal_value_t CasRelease(hsa_signal_value_t expected, - hsa_signal_value_t value) { - assert(false); - return 0; - } - - /// @brief This operation is illegal - hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected, - hsa_signal_value_t value) { - assert(false); - return 0; - } - - /// @brief This operation is illegal - hsa_signal_value_t* ValueLocation() const { - assert(false); - return NULL; - } - - /// @brief This operation is illegal - HsaEvent* EopEvent() { - assert(false); - return NULL; - } - - // 64 byte-aligned allocation and release, for Queue::amd_queue_. - void* operator new(size_t size); - void* operator new(size_t size, void* ptr) { return ptr; } - void operator delete(void* ptr); - void operator delete(void*, void*) {} - - protected: - bool _IsA(rtti_t id) const { return id == &rtti_id_; } - - private: - uint32_t ComputeRingBufferMinPkts(); - uint32_t ComputeRingBufferMaxPkts(); - - // (De)allocates and (de)registers ring_buf_. - void AllocRegisteredRingBuffer(uint32_t queue_size_pkts); - void FreeRegisteredRingBuffer(); - - static bool DynamicScratchHandler(hsa_signal_value_t error_code, void* arg); - - // AQL packet ring buffer - void* ring_buf_; - - // Size of ring_buf_ allocation. - // This may be larger than (amd_queue_.hsa_queue.size * sizeof(AqlPacket)). - uint32_t ring_buf_alloc_bytes_; - - // Id of the Queue used in communication with thunk - HSA_QUEUEID queue_id_; - - // Indicates is queue is valid - bool valid_; - - // Indicates if queue is inactive - int32_t active_; - - // Cached value of HsaNodeProperties.HSA_CAPABILITY.DoorbellType - int doorbell_type_; - - // Handle of agent, which queue is attached to - GpuAgent* agent_; - - hsa_profile_t agent_profile_; - - uint32_t queue_full_workaround_; - - // Handle of scratch memory descriptor - ScratchInfo queue_scratch_; - - core::HsaEventCallback errors_callback_; - - void* errors_data_; - - // Is KV device queue - bool is_kv_queue_; - - // Shared event used for queue errors - static HsaEvent* queue_event_; - - // Queue count - used to ref count queue_event_ - static volatile uint32_t queue_count_; - - // Mutex for queue_event_ manipulation - static KernelMutex queue_lock_; - - static int rtti_id_; - - // Forbid copying and moving of this object - DISALLOW_COPY_AND_ASSIGN(AqlQueue); -}; -} // namespace amd -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/amd_blit_kernel.h b/runtime/hsa-runtime/core/inc/amd_blit_kernel.h deleted file mode 100644 index 665b7e7dc3..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_blit_kernel.h +++ /dev/null @@ -1,174 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_H_ -#define HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_H_ - -#include - -#include "core/inc/blit.h" - -namespace amd { -class BlitKernel : public core::Blit { - public: - explicit BlitKernel(); - virtual ~BlitKernel() override; - - /// @brief Initialize a blit kernel object. - /// - /// @param agent Pointer to the agent that will execute the AQL packets. - /// - /// @return hsa_status_t - virtual hsa_status_t Initialize(const core::Agent& agent) override; - - /// @brief Marks the blit kernel object as invalid and uncouples its link with - /// the underlying AQL kernel queue. Use of the blit object - /// once it has been release is illegal and any behavior is indeterminate - /// - /// @note: The call will block until all AQL packets have been executed. - /// - /// @return hsa_status_t - virtual hsa_status_t Destroy() override; - - /// @brief Submit an AQL packet to perform vector copy. The call is blocking - /// until the command execution is finished. - /// - /// @param dst Memory address of the copy destination. - /// @param src Memory address of the copy source. - /// @param size Size of the data to be copied. - virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src, - size_t size) override; - - /// @brief Submit a linear copy command to the the underlying compute device's - /// control block. The call is non blocking. The memory transfer will start - /// after all dependent signals are satisfied. After the transfer is - /// completed, the out signal will be decremented. - /// - /// @param dst Memory address of the copy destination. - /// @param src Memory address of the copy source. - /// @param size Size of the data to be copied. - /// @param dep_signals Arrays of dependent signal. - /// @param out_signal Output signal. - virtual hsa_status_t SubmitLinearCopyCommand( - void* dst, const void* src, size_t size, - std::vector& dep_signals, - core::Signal& out_signal) override; - - /// @brief Submit an AQL packet to perform memory fill. The call is blocking - /// until the command execution is finished. - /// - /// @param ptr Memory address of the fill destination. - /// @param value Value to be set. - /// @param count Number of uint32_t element to be set to the value. - virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value, - size_t count) override; - - private: - struct __ALIGNED__(16) KernelCopyArgs { - const void* src; - void* dst; - uint64_t size; - uint32_t use_vector; - }; - - struct __ALIGNED__(16) KernelFillArgs { - void* ptr; - uint64_t num; - uint32_t value; - }; - - /// Reserve a slot in the queue buffer. The call will wait until the queue - /// buffer has a room. - uint64_t AcquireWriteIndex(uint32_t num_packet); - - /// Update the queue doorbell register with ::write_index. This - /// function also serializes concurrent doorbell update to ensure that the - /// packet processor doesn't get invalid packet. - void ReleaseWriteIndex(uint64_t write_index, uint32_t num_packet); - - /// Wait until all packets are finished. - hsa_status_t FenceRelease(uint64_t write_index, uint32_t num_copy_packet, - hsa_fence_scope_t fence); - - void PopulateQueue(uint64_t index, uint64_t code_handle, void* args, - uint32_t grid_size_x, hsa_signal_t completion_signal); - - KernelCopyArgs* ObtainAsyncKernelCopyArg(); - - /// Handles to the vector copy kernel. - uint64_t copy_code_handle_; - - /// Handles to the vector copy aligned kernel. - uint64_t copy_aligned_code_handle_; - - /// Handles to the fill memory kernel. - uint64_t fill_code_handle_; - - /// AQL queue for submitting the vector copy kernel. - hsa_queue_t* queue_; - uint32_t queue_bitmask_; - - /// Index to track concurrent kernel launch. - volatile uint64_t cached_index_; - - /// Pointer to the kernel argument buffer. - void* kernarg_; - KernelCopyArgs* kernarg_async_; - uint32_t kernarg_async_mask_; - volatile uint32_t kernarg_async_counter_; - - /// Completion signal for every kernel dispatched. - hsa_signal_t completion_signal_; - - /// Lock to synchronize access to kernarg_ and completion_signal_ - std::mutex lock_; - - /// Pointer to memory containing the ISA and argument buffer. - void* code_arg_buffer_; - - static const size_t kMaxCopyCount; - static const size_t kMaxFillCount; - static const uint32_t kGroupSize; -}; -} // namespace amd - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/amd_blit_kernel_kv.h b/runtime/hsa-runtime/core/inc/amd_blit_kernel_kv.h deleted file mode 100644 index 55ab3c8031..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_blit_kernel_kv.h +++ /dev/null @@ -1,479 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_KV_H_ -#define HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_KV_H_ - -#include - -#define HSA_VECTOR_COPY_KV_AKC_SIZE 368 -#define HSA_VECTOR_COPY_KV_AKC_OFFSET 256 - -/*****HSAIL code of the ISA in ::kVectorCopyRawKv. -module &m:1:0:$full:$large:$default; - -prog kernel &__vector_copy_kernel( - kernarg_u64 %src, - kernarg_u64 %dst, - kernarg_u64 %size) -{ - @__vector_copy_kernel_entry: - // BB#0: // %entry - workitemabsid_u32 $s0, 0; - cvt_u64_u32 $d0, $s0; - ld_kernarg_align(8)_width(all)_u64 $d1, [%size]; - cmp_ge_b1_u64 $c0, $d0, $d1; - cbr_b1 $c0, @BB0_2; - // BB#1: // %if.end - ld_kernarg_align(8)_width(all)_u64 $d1, [%src]; - ld_kernarg_align(8)_width(all)_u64 $d2, [%dst]; - add_u64 $d2, $d2, $d0; - add_u64 $d0, $d1, $d0; - ld_global_u8 $s0, [$d0]; - st_global_u8 $s0, [$d2]; - - @BB0_2: - // %return - ret; -}; -*/ - -static char kVectorCopyRawKv[] = { - 127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, - 0, -104, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 64, 0, 56, 0, 1, 0, 64, 0, 6, 0, 5, 0, 3, - 0, 0, 96, 6, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 112, 1, 0, 0, 0, 0, 0, 0, - 112, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 65, 0, -116, 0, -112, 0, 0, 0, - 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 11, 0, 5, 0, 5, 0, 0, 0, 9, 0, 0, - 0, 0, 0, 0, 0, 3, 0, 0, 6, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 5, 0, -64, 127, 0, -116, -65, - 0, -1, -128, -109, 0, 0, 16, 0, 0, 8, 0, -109, 0, - 0, 0, 74, 4, 7, 64, -64, -128, 2, 2, 126, 127, 0, - -116, -65, 0, 0, -56, 125, 106, 36, -128, -66, 15, 0, -120, - -65, 0, 7, -126, -64, 127, 0, -116, -65, 4, 0, 2, 74, - 5, 2, 4, 126, 2, 106, 80, -46, 2, 1, -87, 1, 0, - 0, 32, -36, 1, 0, 0, 1, 6, 0, 6, 74, 7, 2, - 4, 126, 4, 106, 80, -46, 2, 1, -87, 1, 112, 0, -116, - -65, 0, 0, 96, -36, 3, 1, 0, 0, 0, 0, -127, -65, - 3, 0, 0, 0, 8, 0, 0, 0, 1, 0, 0, 0, 65, - 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, - 0, 0, 12, 0, 0, 0, 2, 0, 0, 0, 65, 77, 68, - 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, - 3, 0, 0, 0, 28, 0, 0, 0, 3, 0, 0, 0, 65, - 77, 68, 0, 4, 0, 7, 0, 7, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 65, 77, 68, 0, 65, 77, 68, - 71, 80, 85, 0, 0, 3, 0, 0, 0, 40, 0, 0, 0, - 4, 0, 0, 0, 65, 77, 68, 0, 26, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 65, 77, 68, 32, 72, 83, - 65, 32, 82, 117, 110, 116, 105, 109, 101, 32, 70, 105, 110, - 97, 108, 105, 122, 101, 114, 0, 0, 0, 38, 95, 95, 118, - 101, 99, 116, 111, 114, 95, 99, 111, 112, 121, 95, 107, 101, - 114, 110, 101, 108, 0, 95, 95, 104, 115, 97, 95, 115, 101, - 99, 116, 105, 111, 110, 46, 104, 115, 97, 116, 101, 120, 116, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 22, 0, 0, 0, 3, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 46, 104, 115, 97, 116, 101, 120, 116, 0, 46, 110, - 111, 116, 101, 0, 46, 115, 116, 114, 116, 97, 98, 0, 46, - 115, 121, 109, 116, 97, 98, 0, 46, 115, 104, 115, 116, 114, - 116, 97, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 7, 0, -64, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 112, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 7, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 112, 2, 0, 0, 0, 0, 0, - 0, -104, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 3, 0, - 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 8, 3, 0, 0, 0, 0, 0, 0, - 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 2, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 56, 3, 0, 0, 0, 0, 0, 0, 48, - 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, - 0, 0, 0, 0, 0, 32, 0, 0, 0, 3, 0, 0, 0, - 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 104, 3, 0, 0, 0, 0, 0, 0, 42, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, -}; -extern char* const kVectorCopyKvObject = &kVectorCopyRawKv[0]; -extern size_t const kVectorCopyKvObjectSize = sizeof(kVectorCopyRawKv); - -#define HSA_VECTOR_COPY_ALIGNED_KV_AKC_SIZE 436 -#define HSA_VECTOR_COPY_ALIGNED_KV_AKC_OFFSET 256 - -/*****HSAIL code of the ISA in ::kVectorCopyAlignedRawKv. -module &m:1:0:$full:$large:$default; -extension "amd:gcn"; - -prog kernel &__copy_buffer_aligned_kernel( - kernarg_u64 %src, - kernarg_u64 %dst, - kernarg_u64 %size, - kernarg_u32 %use_vector) -{ - @__copy_buffer_aligned_kernel_entry: - // BB#0: // %entry - workitemabsid_u32 $s0, 0; - cvt_u64_u32 $d0, $s0; - ld_kernarg_align(8)_width(all)_u64 $d1, [%size]; - cmp_ge_b1_u64 $c0, $d0, $d1; - cbr_b1 $c0, @LBB0_4; - // BB#1: // %if.end - ld_kernarg_align(8)_width(all)_u64 $d2, [%dst]; - ld_kernarg_align(8)_width(all)_u64 $d1, [%src]; - ld_kernarg_align(4)_width(all)_u32 $s0, [%use_vector]; - cmp_ne_b1_s32 $c0, $s0, 1; - cbr_b1 $c0, @LBB0_3; - // BB#2: // %if.then2 - shl_u64 $d0, $d0, 4; - add_u64 $d2, $d2, $d0; - add_u64 $d0, $d1, $d0; - ld_v4_global_align(16)_const_u32 ($s0, $s1, $s2, $s3), [$d0]; - st_v4_global_align(16)_u32 ($s0, $s1, $s2, $s3), [$d2]; - br @LBB0_4; - - @LBB0_3: - // %if.else - shl_u64 $d0, $d0, 2; - add_u64 $d2, $d2, $d0; - add_u64 $d0, $d1, $d0; - ld_global_align(4)_const_u32 $s0, [$d0]; - st_global_align(4)_u32 $s0, [$d2]; - - @LBB0_4: - // %if.end6 - ret; -}; -*/ - -static char kVectorCopyAlignedRawKv[] = { - 127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, - 0, -8, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 64, 0, 56, 0, 1, 0, 64, 0, 6, 0, 5, 0, 3, - 0, 0, 96, 6, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -76, 1, 0, 0, 0, 0, 0, 0, - -76, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 65, 0, -84, 0, -112, 0, 0, 0, - 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 11, 0, 7, 0, 7, 0, 0, 0, 9, 0, 0, - 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 5, 0, -64, 127, 0, -116, -65, - 0, -1, -128, -109, 0, 0, 16, 0, 0, 8, 0, -109, 0, - 0, 0, 74, 4, 7, 64, -64, -128, 2, 2, 126, 127, 0, - -116, -65, 0, 0, -56, 125, 106, 36, -128, -66, 32, 0, -120, - -65, 6, 7, 1, -64, 0, 7, -126, -64, 127, 0, -116, -65, - 2, -127, 0, -65, 14, 0, -124, -65, 0, 0, -62, -46, 0, - 9, 1, 0, 4, 0, 4, 74, 5, 2, 6, 126, 3, 3, - 6, 80, 0, 0, 56, -36, 2, 0, 0, 2, 6, 0, 0, - 74, 7, 2, 12, 126, 6, 3, 2, 80, 112, 0, -116, -65, - 0, 0, 120, -36, 0, 2, 0, 0, 13, 0, -126, -65, 0, - 0, -62, -46, 0, 5, 1, 0, 4, 0, 4, 74, 5, 2, - 6, 126, 3, 3, 6, 80, 0, 0, 48, -36, 2, 0, 0, - 2, 6, 0, 0, 74, 7, 2, 6, 126, 3, 3, 2, 80, - 112, 0, -116, -65, 0, 0, 112, -36, 0, 2, 0, 0, 0, - 0, -127, -65, 0, 0, 0, 0, 4, 0, 0, 0, 8, 0, - 0, 0, 1, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 4, 0, 0, 0, 12, 0, 0, 0, - 2, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 1, 0, 4, 0, 0, 0, 25, 0, - 0, 0, 5, 0, 0, 0, 65, 77, 68, 0, 22, 0, 45, - 104, 115, 97, 95, 99, 97, 108, 108, 95, 99, 111, 110, 118, - 101, 110, 116, 105, 111, 110, 61, 0, 0, 0, 0, 0, 4, - 0, 0, 0, 30, 0, 0, 0, 3, 0, 0, 0, 65, 77, - 68, 0, 4, 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 65, 77, 68, 0, 65, 77, 68, 71, - 80, 85, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 8, - 0, 0, 0, 4, 0, 0, 0, 65, 77, 68, 0, -32, 101, - -118, -12, -1, 127, 0, 0, 38, 95, 95, 99, 111, 112, 121, - 95, 98, 117, 102, 102, 101, 114, 95, 97, 108, 105, 103, 110, - 101, 100, 95, 107, 101, 114, 110, 101, 108, 0, 95, 95, 104, - 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, 104, 115, - 97, 116, 101, 120, 116, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 26, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -76, 1, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 3, - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 46, 104, 115, 97, 116, 101, - 120, 116, 0, 46, 110, 111, 116, 101, 0, 46, 115, 116, 114, - 116, 97, 98, 0, 46, 115, 121, 109, 116, 97, 98, 0, 46, - 115, 104, 115, 116, 114, 116, 97, 98, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 0, 1, 0, 0, 0, 7, 0, -64, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, -76, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 10, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -72, 2, - 0, 0, 0, 0, 0, 0, -88, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, - 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 3, 0, - 0, 0, 0, 0, 0, 52, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, - 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, -104, 3, 0, 0, - 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 3, - 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, - 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, - 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, -56, 3, 0, 0, 0, - 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; -extern char* const kVectorCopyAlignedKvObject = &kVectorCopyAlignedRawKv[0]; -extern size_t const kVectorCopyAlignedKvObjectSize = - sizeof(kVectorCopyAlignedRawKv); - -#define HSA_FILL_MEMORY_KV_AKC_SIZE 352 -#define HSA_FILL_MEMORY_KV_AKC_OFFSET 256 - -/*****HSAIL code of the ISA in ::kFillMemoryRawKv. -module &m:1:0:$full:$large:$default; -extension "amd:gcn"; - -prog kernel &__fill_memory_kernel( -kernarg_u64 %ptr, -kernarg_u64 %num, -kernarg_u32 %value) -{ -@__fill_memory_kernel_entry: -// BB#0: // %entry -workitemabsid_u32 $s0, 0; -cvt_u64_u32 $d0, $s0; -ld_kernarg_align(8)_width(all)_u64 $d1, [%num]; -cmp_ge_b1_u64 $c0, $d0, $d1; -cbr_b1 $c0, @LBB0_2; -// BB#1: // %if.end -ld_kernarg_align(8)_width(all)_u64 $d1, [%ptr]; -ld_kernarg_align(4)_width(all)_u32 $s0, [%value]; -shl_u64 $d0, $d0, 2; -add_u64 $d0, $d1, $d0; -st_global_align(4)_u32 $s0, [$d0]; - -@LBB0_2: -// %return -ret; -}; -*/ - -static char kFillMemoryRawKv[] = { - 127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, -104, 3, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 56, 0, - 1, 0, 64, 0, 6, 0, 5, 0, 3, 0, 0, 96, 6, 0, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 1, - 0, 0, 0, 0, 0, 0, 96, 1, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, -84, 0, - -112, 0, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 11, 0, 3, 0, 3, 0, 0, 0, 9, 0, - 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 0, -64, 127, 0, - -116, -65, 0, -1, -128, -109, 0, 0, 16, 0, 0, 8, 0, -109, - 0, 0, 0, 74, 2, 7, 64, -64, -128, 2, 2, 126, 127, 0, - -116, -65, 0, 0, -56, 125, 106, 36, -128, -66, 11, 0, -120, -65, - 0, 7, 65, -64, 4, 7, 2, -64, 0, 0, -62, -46, 0, 5, - 1, 0, 127, 0, -116, -65, 2, 0, 0, 74, 3, 2, 4, 126, - 2, 3, 2, 80, 4, 2, 4, 126, 0, 0, 112, -36, 0, 2, - 0, 0, 0, 0, -127, -65, 4, 0, 0, 0, 8, 0, 0, 0, - 1, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 12, 0, 0, 0, 2, 0, 0, 0, - 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, - 1, 0, 4, 0, 0, 0, 25, 0, 0, 0, 5, 0, 0, 0, - 65, 77, 68, 0, 22, 0, 45, 104, 115, 97, 95, 99, 97, 108, - 108, 95, 99, 111, 110, 118, 101, 110, 116, 105, 111, 110, 61, 0, - 0, 0, 0, 0, 4, 0, 0, 0, 30, 0, 0, 0, 3, 0, - 0, 0, 65, 77, 68, 0, 4, 0, 7, 0, 7, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 65, 77, 68, 0, 65, 77, - 68, 71, 80, 85, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, - 8, 0, 0, 0, 4, 0, 0, 0, 65, 77, 68, 0, 48, 123, - 44, -103, -4, 127, 0, 0, 38, 95, 95, 102, 105, 108, 108, 95, - 109, 101, 109, 111, 114, 121, 95, 107, 101, 114, 110, 101, 108, 0, - 95, 95, 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, - 104, 115, 97, 116, 101, 120, 116, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 26, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 96, 1, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, 3, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 46, 104, 115, 97, 116, 101, 120, 116, 0, - 46, 110, 111, 116, 101, 0, 46, 115, 116, 114, 116, 97, 98, 0, - 46, 115, 121, 109, 116, 97, 98, 0, 46, 115, 104, 115, 116, 114, - 116, 97, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 7, 0, - -64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 0, 96, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, - 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 96, 2, 0, 0, 0, 0, - 0, 0, -88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 3, 0, 0, 0, - 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 8, 3, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 24, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 3, 0, 0, - 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 3, 0, - 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, - 24, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 3, 0, - 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 104, 3, 0, 0, 0, 0, 0, 0, 42, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, -}; - -extern char* const kFillMemoryKvObject = &kFillMemoryRawKv[0]; -extern size_t const kFillMemoryKvObjectSize = sizeof(kFillMemoryRawKv); -#endif // header guard \ No newline at end of file diff --git a/runtime/hsa-runtime/core/inc/amd_blit_kernel_vi.h b/runtime/hsa-runtime/core/inc/amd_blit_kernel_vi.h deleted file mode 100644 index ca03cd8dae..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_blit_kernel_vi.h +++ /dev/null @@ -1,490 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_VI_H_ -#define HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_VI_H_ - -#include - -#define HSA_VECTOR_COPY_VI_AKC_SIZE 380 -#define HSA_VECTOR_COPY_VI_AKC_OFFSET 256 - -/*****HSAIL code of the ISA in ::kVectorCopyRawVi. -module &m:1:0:$full:$large:$default; - -prog kernel &__vector_copy_kernel( - kernarg_u64 %src, - kernarg_u64 %dst, - kernarg_u64 %size) -{ - @__vector_copy_kernel_entry: - // BB#0: // %entry - workitemabsid_u32 $s0, 0; - cvt_u64_u32 $d0, $s0; - ld_kernarg_align(8)_width(all)_u64 $d1, [%size]; - cmp_ge_b1_u64 $c0, $d0, $d1; - cbr_b1 $c0, @BB0_2; - // BB#1: // %if.end - ld_kernarg_align(8)_width(all)_u64 $d1, [%src]; - ld_kernarg_align(8)_width(all)_u64 $d2, [%dst]; - add_u64 $d2, $d2, $d0; - add_u64 $d0, $d1, $d0; - ld_global_u8 $s0, [$d0]; - st_global_u8 $s0, [$d2]; - - @BB0_2: - // %return - ret; -}; -*/ - -static char kVectorCopyRawVi[] = { - 127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, - 0, -72, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 64, 0, 56, 0, 1, 0, 64, 0, 6, 0, 5, 0, 3, - 0, 0, 96, 6, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 124, 1, 0, 0, 0, 0, 0, 0, - 124, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -63, 2, -84, 0, -112, 0, 0, 0, - 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 96, 0, 5, 0, 5, 0, 0, 0, 9, 0, 0, - 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 2, -64, 4, 0, 0, 0, - 127, 0, -116, -65, 0, -1, -128, -110, 0, 0, 16, 0, 0, - 8, 0, -110, 0, 0, 0, 50, 3, 0, 6, -64, 16, 0, - 0, 0, -128, 2, 2, 126, 127, 0, -116, -65, 0, 0, -40, - 125, 106, 32, -128, -66, 16, 0, -120, -65, 3, 1, 10, -64, - 0, 0, 0, 0, 127, 0, -116, -65, 4, 0, 2, 50, 5, - 2, 4, 126, 2, 106, 28, -47, 2, 1, -87, 1, 0, 0, - 64, -36, 1, 0, 0, 1, 6, 0, 6, 50, 7, 2, 4, - 126, 4, 106, 28, -47, 2, 1, -87, 1, 112, 0, -116, -65, - 0, 0, 96, -36, 3, 1, 0, 0, 0, 0, -127, -65, 0, - 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 1, 0, - 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 4, 0, 0, 0, 12, 0, 0, 0, 2, 0, 0, 0, - 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, - 1, 1, 0, 4, 0, 0, 0, 25, 0, 0, 0, 5, 0, - 0, 0, 65, 77, 68, 0, 22, 0, 45, 104, 115, 97, 95, - 99, 97, 108, 108, 95, 99, 111, 110, 118, 101, 110, 116, 105, - 111, 110, 61, 0, 0, 0, 0, 0, 4, 0, 0, 0, 30, - 0, 0, 0, 3, 0, 0, 0, 65, 77, 68, 0, 4, 0, - 7, 0, 8, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, - 0, 65, 77, 68, 0, 65, 77, 68, 71, 80, 85, 0, 0, - 0, 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 4, - 0, 0, 0, 65, 77, 68, 0, 32, 103, -72, 81, -3, 127, - 0, 0, 38, 95, 95, 118, 101, 99, 116, 111, 114, 95, 99, - 111, 112, 121, 95, 107, 101, 114, 110, 101, 108, 0, 95, 95, - 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, 104, - 115, 97, 116, 101, 120, 116, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 26, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 124, 1, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, - 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 46, 104, 115, 97, 116, - 101, 120, 116, 0, 46, 110, 111, 116, 101, 0, 46, 115, 116, - 114, 116, 97, 98, 0, 46, 115, 121, 109, 116, 97, 98, 0, - 46, 115, 104, 115, 116, 114, 116, 97, 98, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 7, 0, -64, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 0, 124, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, - 2, 0, 0, 0, 0, 0, 0, -88, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 16, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 3, - 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, - 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 3, 0, - 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, - 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 32, 0, - 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, -120, 3, 0, 0, - 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; -extern char* const kVectorCopyViObject = &kVectorCopyRawVi[0]; -extern size_t const kVectorCopyViObjectSize = sizeof(kVectorCopyRawVi); - -#define HSA_VECTOR_COPY_ALIGNED_VI_AKC_SIZE 452 -#define HSA_VECTOR_COPY_ALIGNED_VI_AKC_OFFSET 256 - -/*****HSAIL code of the ISA in ::kVectorCopyAlignedRawVi. -module &m:1:0:$full:$large:$default; -extension "amd:gcn"; - -prog kernel &__copy_buffer_aligned_kernel( - kernarg_u64 %src, - kernarg_u64 %dst, - kernarg_u64 %size, - kernarg_u32 %use_vector) -{ - @__copy_buffer_aligned_kernel_entry: - // BB#0: // %entry - workitemabsid_u32 $s0, 0; - cvt_u64_u32 $d0, $s0; - ld_kernarg_align(8)_width(all)_u64 $d1, [%size]; - cmp_ge_b1_u64 $c0, $d0, $d1; - cbr_b1 $c0, @LBB0_4; - // BB#1: // %if.end - ld_kernarg_align(8)_width(all)_u64 $d2, [%dst]; - ld_kernarg_align(8)_width(all)_u64 $d1, [%src]; - ld_kernarg_align(4)_width(all)_u32 $s0, [%use_vector]; - cmp_ne_b1_s32 $c0, $s0, 1; - cbr_b1 $c0, @LBB0_3; - // BB#2: // %if.then2 - shl_u64 $d0, $d0, 4; - add_u64 $d2, $d2, $d0; - add_u64 $d0, $d1, $d0; - ld_v4_global_align(16)_const_u32 ($s0, $s1, $s2, $s3), [$d0]; - st_v4_global_align(16)_u32 ($s0, $s1, $s2, $s3), [$d2]; - br @LBB0_4; - - @LBB0_3: - // %if.else - shl_u64 $d0, $d0, 2; - add_u64 $d2, $d2, $d0; - add_u64 $d0, $d1, $d0; - ld_global_align(4)_const_u32 $s0, [$d0]; - st_global_align(4)_u32 $s0, [$d2]; - - @LBB0_4: - // %if.end6 - ret; -}; -*/ - -static char kVectorCopyAlignedRawVi[] = { - 127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, - 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 64, 0, 56, 0, 1, 0, 64, 0, 6, 0, 5, 0, 3, - 0, 0, 96, 6, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -60, 1, 0, 0, 0, 0, 0, 0, - -60, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 65, 0, -84, 0, -112, 0, 0, 0, - 11, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 16, 0, 8, 0, 8, 0, 0, 0, 12, 0, 0, - 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 2, -64, 4, 0, 0, 0, - 127, 0, -116, -65, 0, -1, -128, -110, 0, 0, 16, 0, 0, - 8, 0, -110, 0, 0, 0, 50, 3, 0, 6, -64, 16, 0, - 0, 0, -128, 2, 2, 126, 127, 0, -116, -65, 0, 0, -40, - 125, 106, 32, -128, -66, 34, 0, -120, -65, -125, 0, 2, -64, - 24, 0, 0, 0, 3, 2, 10, -64, 0, 0, 0, 0, 127, - 0, -116, -65, 2, -127, 0, -65, 14, 0, -124, -65, 0, 0, - -113, -46, -124, 0, 2, 0, 8, 0, 4, 50, 9, 2, 6, - 126, 3, 3, 6, 56, 0, 0, 92, -36, 2, 0, 0, 4, - 10, 0, 0, 50, 11, 2, 4, 126, 2, 3, 2, 56, 112, - 0, -116, -65, 0, 0, 124, -36, 0, 4, 0, 0, 13, 0, - -126, -65, 0, 0, -113, -46, -126, 0, 2, 0, 8, 0, 4, - 50, 9, 2, 6, 126, 3, 3, 6, 56, 0, 0, 80, -36, - 2, 0, 0, 4, 10, 0, 0, 50, 11, 2, 4, 126, 2, - 3, 2, 56, 112, 0, -116, -65, 0, 0, 112, -36, 0, 4, - 0, 0, 0, 0, -127, -65, 0, 0, 0, 0, 4, 0, 0, - 0, 8, 0, 0, 0, 1, 0, 0, 0, 65, 77, 68, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 12, - 0, 0, 0, 2, 0, 0, 0, 65, 77, 68, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 4, 0, 0, - 0, 25, 0, 0, 0, 5, 0, 0, 0, 65, 77, 68, 0, - 22, 0, 45, 104, 115, 97, 95, 99, 97, 108, 108, 95, 99, - 111, 110, 118, 101, 110, 116, 105, 111, 110, 61, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 30, 0, 0, 0, 3, 0, 0, - 0, 65, 77, 68, 0, 4, 0, 7, 0, 8, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 65, 77, 68, 0, 65, - 77, 68, 71, 80, 85, 0, 0, 0, 0, 0, 0, 4, 0, - 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 65, 77, 68, - 0, 96, 62, -27, 85, -1, 127, 0, 0, 38, 95, 95, 99, - 111, 112, 121, 95, 98, 117, 102, 102, 101, 114, 95, 97, 108, - 105, 103, 110, 101, 100, 95, 107, 101, 114, 110, 101, 108, 0, - 95, 95, 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, - 46, 104, 115, 97, 116, 101, 120, 116, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 26, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, -60, 1, 0, 0, 0, 0, 0, 0, 30, 0, - 0, 0, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 104, 115, - 97, 116, 101, 120, 116, 0, 46, 110, 111, 116, 101, 0, 46, - 115, 116, 114, 116, 97, 98, 0, 46, 115, 121, 109, 116, 97, - 98, 0, 46, 115, 104, 115, 116, 114, 116, 97, 98, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 7, - 0, -64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, -60, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 10, 0, 0, 0, 7, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, -56, 2, 0, 0, 0, 0, 0, 0, -88, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 16, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 112, 3, 0, 0, 0, 0, 0, 0, 52, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 24, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -88, - 3, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, - 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, - 32, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 3, - 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; -extern char* const kVectorCopyAlignedViObject = &kVectorCopyAlignedRawVi[0]; -extern size_t const kVectorCopyAlignedViObjectSize = - sizeof(kVectorCopyAlignedRawVi); - -#define HSA_FILL_MEMORY_VI_AKC_SIZE 368 -#define HSA_FILL_MEMORY_VI_AKC_OFFSET 256 - -/*****HSAIL code of the ISA in ::kFillMemoryRawVi. -module &m:1:0:$full:$large:$default; -extension "amd:gcn"; - -prog kernel &__fill_memory_kernel( - kernarg_u64 %ptr, - kernarg_u64 %num, - kernarg_u32 %value) -{ - @__fill_memory_kernel_entry: - // BB#0: // %entry - workitemabsid_u32 $s0, 0; - cvt_u64_u32 $d0, $s0; - ld_kernarg_align(8)_width(all)_u64 $d1, [%num]; - cmp_ge_b1_u64 $c0, $d0, $d1; - cbr_b1 $c0, @LBB0_2; - // BB#1: // %if.end - ld_kernarg_align(8)_width(all)_u64 $d1, [%ptr]; - ld_kernarg_align(4)_width(all)_u32 $s0, [%value]; - shl_u64 $d0, $d0, 2; - add_u64 $d0, $d1, $d0; - st_global_align(4)_u32 $s0, [$d0]; - - @LBB0_2: - // %return - ret; -}; -*/ - -static char kFillMemoryRawVi[] = { - 127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, - 0, -88, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 64, 0, 56, 0, 1, 0, 64, 0, 6, 0, 5, 0, 3, - 0, 0, 96, 6, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 112, 1, 0, 0, 0, 0, 0, 0, - 112, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 64, 0, -84, 0, -112, 0, 0, 0, - 11, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 13, 0, 3, 0, 3, 0, 0, 0, 9, 0, 0, - 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 2, -64, 4, 0, 0, 0, - 127, 0, -116, -65, 0, -1, -128, -110, 0, 0, 16, 0, 0, - 8, 0, -110, 0, 0, 0, 50, 3, 0, 6, -64, 8, 0, - 0, 0, -128, 2, 2, 126, 127, 0, -116, -65, 0, 0, -40, - 125, 106, 32, -128, -66, 13, 0, -120, -65, -125, 0, 6, -64, - 0, 0, 0, 0, 3, 1, 2, -64, 16, 0, 0, 0, 0, - 0, -113, -46, -126, 0, 2, 0, 127, 0, -116, -65, 2, 0, - 0, 50, 3, 2, 4, 126, 2, 3, 2, 56, 4, 2, 4, - 126, 0, 0, 112, -36, 0, 2, 0, 0, 0, 0, -127, -65, - 4, 0, 0, 0, 8, 0, 0, 0, 1, 0, 0, 0, 65, - 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, - 0, 0, 12, 0, 0, 0, 2, 0, 0, 0, 65, 77, 68, - 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, - 4, 0, 0, 0, 25, 0, 0, 0, 5, 0, 0, 0, 65, - 77, 68, 0, 22, 0, 45, 104, 115, 97, 95, 99, 97, 108, - 108, 95, 99, 111, 110, 118, 101, 110, 116, 105, 111, 110, 61, - 0, 0, 0, 0, 0, 4, 0, 0, 0, 30, 0, 0, 0, - 3, 0, 0, 0, 65, 77, 68, 0, 4, 0, 7, 0, 8, - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 65, 77, - 68, 0, 65, 77, 68, 71, 80, 85, 0, 0, 0, 0, 0, - 0, 4, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, - 65, 77, 68, 0, 16, -20, 88, 97, -4, 127, 0, 0, 38, - 95, 95, 102, 105, 108, 108, 95, 109, 101, 109, 111, 114, 121, - 95, 107, 101, 114, 110, 101, 108, 0, 95, 95, 104, 115, 97, - 95, 115, 101, 99, 116, 105, 111, 110, 46, 104, 115, 97, 116, - 101, 120, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 1, - 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, 3, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 46, 104, 115, 97, 116, 101, 120, 116, - 0, 46, 110, 111, 116, 101, 0, 46, 115, 116, 114, 116, 97, - 98, 0, 46, 115, 121, 109, 116, 97, 98, 0, 46, 115, 104, - 115, 116, 114, 116, 97, 98, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 1, 0, 0, 0, 7, 0, -64, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 112, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, - 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 2, 0, 0, - 0, 0, 0, 0, -88, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, - 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 24, 3, 0, 0, 0, - 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 72, 3, 0, 0, 0, 0, - 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, - 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, - 24, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 3, - 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 120, 3, 0, 0, 0, 0, 0, - 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, -}; - -extern char* const kFillMemoryViObject = &kFillMemoryRawVi[0]; -extern size_t const kFillMemoryViObjectSize = sizeof(kFillMemoryRawVi); -#endif // header guard \ No newline at end of file diff --git a/runtime/hsa-runtime/core/inc/amd_blit_sdma.h b/runtime/hsa-runtime/core/inc/amd_blit_sdma.h deleted file mode 100644 index db851ea49b..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_blit_sdma.h +++ /dev/null @@ -1,218 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_SDMA_H_ -#define HSA_RUNTIME_CORE_INC_AMD_BLIT_SDMA_H_ - -#include - -#include "hsakmt.h" - -#include "core/inc/blit.h" -#include "core/inc/runtime.h" -#include "core/inc/signal.h" -#include "core/util/utils.h" - -namespace amd { -class BlitSdma : public core::Blit { - public: - explicit BlitSdma(); - - virtual ~BlitSdma() override; - - /// @brief Initialize a User Mode SDMA Queue object. Input parameters specify - /// properties of queue being created. - /// - /// @param agent Pointer to the agent that will execute the PM4 commands. - /// - /// @return hsa_status_t - virtual hsa_status_t Initialize(const core::Agent& agent) override; - - /// @brief Marks the queue object as invalid and uncouples its link with - /// the underlying compute device's control block. Use of queue object - /// once it has been release is illegal and any behavior is indeterminate - /// - /// @note: The call will block until all packets have executed. - /// - /// @return hsa_status_t - virtual hsa_status_t Destroy() override; - - /// @brief Submit a linear copy command to the queue buffer. - /// - /// @param dst Memory address of the copy destination. - /// @param src Memory address of the copy source. - /// @param size Size of the data to be copied. - virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src, - size_t size) override; - - /// @brief Submit a linear copy command to the the underlying compute device's - /// control block. The call is non blocking. The memory transfer will start - /// after all dependent signals are satisfied. After the transfer is - /// completed, the out signal will be decremented. - /// - /// @param dst Memory address of the copy destination. - /// @param src Memory address of the copy source. - /// @param size Size of the data to be copied. - /// @param dep_signals Arrays of dependent signal. - /// @param out_signal Output signal. - virtual hsa_status_t SubmitLinearCopyCommand( - void* dst, const void* src, size_t size, - std::vector& dep_signals, - core::Signal& out_signal) override; - - /// @brief Submit a linear fill command to the queue buffer - /// - /// @param ptr Memory address of the fill destination. - /// @param value Value to be set. - /// @param count Number of uint32_t element to be set to the value. - virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value, - size_t count) override; - - protected: - /// @brief Acquires the address into queue buffer where a new command - /// packet of specified size could be written. The address that is - /// returned is guaranteed to be unique even in a multi-threaded access - /// scenario. This function is guaranteed to return a pointer for writing - /// data into the queue buffer. - /// - /// @param cmd_size Command packet size in bytes. - /// - /// @return pointer into the queue buffer where a PM4 packet of specified size - /// could be written. NULL if input size is greater than the size of queue - /// buffer. - char* AcquireWriteAddress(uint32_t cmd_size); - - void UpdateWriteAndDoorbellRegister(uint32_t current_offset, - uint32_t new_offset); - - /// @brief Updates the Write Register of compute device to the end of - /// SDMA packet written into queue buffer. The update to Write Register - /// will be safe under multi-threaded usage scenario. Furthermore, updates - /// to Write Register are blocking until all prior updates are completed - /// i.e. if two threads T1 & T2 were to call release, then updates by T2 - /// will block until T1 has completed its update (assumes T1 acquired the - /// write address first). - /// - /// @param cmd_addr pointer into the queue buffer where a PM4 packet was - /// written. - /// - /// @param cmd_size Command packet size in bytes. - void ReleaseWriteAddress(char* cmd_addr, uint32_t cmd_size); - - /// @brief Writes NO-OP words into queue buffer in case writing a command - /// causes the queue buffer to wrap. - /// - /// @param cmd_size Size in bytes of command causing queue buffer to wrap. - void WrapQueue(uint32_t cmd_size); - - /// @brief Build fence command - void BuildFenceCommand(char* fence_command_addr, uint32_t* fence, - uint32_t fence_value); - - uint32_t* ObtainFenceObject(); - - void WaitFence(uint32_t* fence, uint32_t fence_value); - - void BuildCopyCommand(char* cmd_addr, uint32_t num_copy_command, void* dst, - const void* src, size_t size); - - void BuildPollCommand(char* cmd_addr, void* addr, uint32_t reference); - - void BuildAtomicDecrementCommand(char* cmd_addr, void* addr); - - /// Indicates size of Queue buffer in bytes. - uint32_t queue_size_; - - /// Base address of the Queue buffer at construction time. - char* queue_start_addr_; - - uint32_t* fence_base_addr_; - uint32_t fence_pool_size_; - uint32_t fence_pool_mask_; - volatile uint32_t fence_pool_counter_; - - /// Queue resource descriptor for doorbell, read - /// and write indices - HsaQueueResource queue_resource_; - - /// @brief Current address of execution in Queue buffer. - /// - /// @note: The value of address is obtained by reading - /// the value of Write Register of the compute device. - /// Users should write to the Queue buffer at the current - /// address, else it will lead to execution error and potentially - /// a hang. - /// - /// @note: The value of Write Register does not always begin - /// with Zero after a Queue has been created. This needs to be - /// understood better. This means that current address number of - /// words of Queue buffer is unavailable for use. - volatile uint32_t cached_reserve_offset_; - volatile uint32_t cached_commit_offset_; - - uint32_t linear_copy_command_size_; - - uint32_t fill_command_size_; - - uint32_t fence_command_size_; - - uint32_t poll_command_size_; - - uint32_t atomic_command_size_; - - // Max copy size of a single linear copy command packet. - size_t max_single_linear_copy_size_; - - /// Max total copy size supported by the queue. - size_t max_total_linear_copy_size_; - - /// Max count of uint32_t of a single fill command packet. - size_t max_single_fill_size_; - - /// Max total fill count supported by the queue. - size_t max_total_fill_size_; - - std::mutex wrap_lock_; -}; -} // namespace amd - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/amd_cpu_agent.h b/runtime/hsa-runtime/core/inc/amd_cpu_agent.h deleted file mode 100644 index 1ad4ec0b72..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_cpu_agent.h +++ /dev/null @@ -1,154 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// AMD specific HSA backend. - -#ifndef HSA_RUNTIME_CORE_INC_AMD_CPU_AGENT_H_ -#define HSA_RUNTIME_CORE_INC_AMD_CPU_AGENT_H_ - -#include - -#include "hsakmt.h" - -#include "core/inc/runtime.h" -#include "core/inc/agent.h" -#include "core/inc/queue.h" - -namespace amd { -// @brief Class to represent a CPU device. -class CpuAgent : public core::Agent { - public: - // @brief CpuAgent constructor. - // - // @param [in] node Node id. Each CPU in different socket will get distinct - // id. - // @param [in] node_props Node property. - CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props); - - // @brief CpuAgent destructor. - ~CpuAgent(); - - // @brief Invoke the user provided callback for each region accessible by - // this agent. - // - // @param [in] include_peer If true, the callback will be also invoked on each - // peer memory region accessible by this agent. If false, only invoke the - // callback on memory region owned by this agent. - // @param [in] callback User provided callback function. - // @param [in] data User provided pointer as input for @p callback. - // - // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed - // region returns ::HSA_STATUS_SUCCESS. - hsa_status_t VisitRegion(bool include_peer, - hsa_status_t (*callback)(hsa_region_t region, - void* data), - void* data) const; - - // @brief Override from core::Agent. - hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region, - void* data), - void* data) const override; - - // @brief Override from core::Agent. - hsa_status_t GetInfo(hsa_agent_info_t attribute, void* value) const override; - - // @brief Override from core::Agent. - hsa_status_t QueueCreate(size_t size, hsa_queue_type_t queue_type, - core::HsaEventCallback event_callback, void* data, - uint32_t private_segment_size, - uint32_t group_segment_size, - core::Queue** queue) override; - - // @brief Returns number of data caches. - __forceinline size_t num_cache() const { return cache_props_.size(); } - - // @brief Returns data cache property. - // - // @param [in] idx Cache level. - __forceinline const HsaCacheProperties& cache_prop(int idx) const { - return cache_props_[idx]; - } - - // @brief Override from core::Agent. - const std::vector& regions() const override { - return regions_; - } - - // @brief OVerride from core::Agent. - const core::Isa* isa() const override { return NULL; } - - private: - // @brief Query the driver to get the region list owned by this agent. - void InitRegionList(); - - // @brief Query the driver to get the cache properties. - void InitCacheList(); - - // @brief Invoke the user provided callback for every region in @p regions. - // - // @param [in] regions Array of region object. - // @param [in] callback User provided callback function. - // @param [in] data User provided pointer as input for @p callback. - // - // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed - // region returns ::HSA_STATUS_SUCCESS. - hsa_status_t VisitRegion( - const std::vector& regions, - hsa_status_t (*callback)(hsa_region_t region, void* data), - void* data) const; - - // @brief Node property. - const HsaNodeProperties properties_; - - // @brief Array of data cache property. The array index represents the cache - // level. - std::vector cache_props_; - - // @brief Array of regions owned by this agent. - std::vector regions_; - - DISALLOW_COPY_AND_ASSIGN(CpuAgent); -}; - -} // namespace amd - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/amd_elf_image.hpp b/runtime/hsa-runtime/core/inc/amd_elf_image.hpp deleted file mode 100644 index 99f8a1c3ae..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_elf_image.hpp +++ /dev/null @@ -1,222 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef AMD_ELF_IMAGE_HPP_ -#define AMD_ELF_IMAGE_HPP_ - -#include -#include -#include -#include - -namespace amd { - namespace elf { - class Symbol; - class SymbolTable; - class Section; - class RelocationSection; - - class Segment { - public: - virtual ~Segment() { } - virtual uint64_t type() const = 0; - virtual uint64_t memSize() const = 0; - virtual uint64_t align() const = 0; - virtual uint64_t imageSize() const = 0; - virtual uint64_t vaddr() const = 0; - virtual uint64_t flags() const = 0; - virtual const char* data() const = 0; - virtual uint16_t getSegmentIndex() = 0; - virtual bool updateAddSection(Section *section) = 0; - }; - - class Section { - public: - virtual ~Section() { } - virtual uint16_t getSectionIndex() const = 0; - virtual uint32_t type() const = 0; - virtual std::string Name() const = 0; - virtual uint64_t offset() const = 0; - virtual uint64_t addr() const = 0; - virtual bool updateAddr(uint64_t addr) = 0; - virtual uint64_t addralign() const = 0; - virtual uint64_t flags() const = 0; - virtual uint64_t size() const = 0; - virtual uint64_t nextDataOffset(uint64_t align) const = 0; - virtual uint64_t addData(const void *src, uint64_t size, uint64_t align) = 0; - virtual bool getData(uint64_t offset, void* dest, uint64_t size) = 0; - virtual Segment* segment() = 0; - virtual RelocationSection* asRelocationSection() = 0; - virtual bool hasRelocationSection() const = 0; - virtual RelocationSection* relocationSection(SymbolTable* symtab = 0) = 0; - virtual bool setMemSize(uint64_t s) = 0; - virtual uint64_t memSize() const = 0; - virtual bool setAlign(uint64_t a) = 0; - virtual uint64_t memAlign() const = 0; - }; - - class Relocation { - public: - virtual ~Relocation() { } - virtual RelocationSection* section() = 0; - virtual uint32_t type() = 0; - virtual uint32_t symbolIndex() = 0; - virtual Symbol* symbol() = 0; - virtual uint64_t offset() = 0; - virtual int64_t addend() = 0; - }; - - class RelocationSection : public virtual Section { - public: - virtual Relocation* addRelocation(uint32_t type, Symbol* symbol, uint64_t offset, int64_t addend) = 0; - virtual size_t relocationCount() const = 0; - virtual Relocation* relocation(size_t i) = 0; - virtual Section* targetSection() = 0; - }; - - class StringTable : public virtual Section { - public: - virtual const char* addString(const std::string& s) = 0; - virtual size_t addString1(const std::string& s) = 0; - virtual const char* getString(size_t ndx) = 0; - virtual size_t getStringIndex(const char* name) = 0; - }; - - class Symbol { - public: - virtual ~Symbol() { } - virtual uint32_t index() = 0; - virtual uint32_t type() = 0; - virtual uint32_t binding() = 0; - virtual uint64_t size() = 0; - virtual uint64_t value() = 0; - virtual unsigned char other() = 0; - virtual std::string name() = 0; - virtual Section* section() = 0; - virtual void setValue(uint64_t value) = 0; - virtual void setSize(uint64_t size) = 0; - }; - - class SymbolTable : public virtual Section { - public: - virtual Symbol* addSymbol(Section* section, const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, unsigned char other = 0) = 0; - virtual size_t symbolCount() = 0; - virtual Symbol* symbol(size_t i) = 0; - }; - - class NoteSection : public virtual Section { - public: - virtual bool addNote(const std::string& name, uint32_t type, const void* desc = 0, uint32_t desc_size = 0) = 0; - virtual bool getNote(const std::string& name, uint32_t type, void** desc, uint32_t* desc_size) = 0; - }; - - class Image { - public: - virtual ~Image() { } - - virtual bool initNew(uint16_t machine, uint16_t type, uint8_t os_abi = 0, uint8_t abi_version = 0, uint32_t e_flags = 0) = 0; - virtual bool loadFromFile(const std::string& filename) = 0; - virtual bool saveToFile(const std::string& filename) = 0; - virtual bool initFromBuffer(const void* buffer, size_t size) = 0; - virtual bool initAsBuffer(const void* buffer, size_t size) = 0; - virtual bool writeTo(const std::string& filename) = 0; - virtual bool copyToBuffer(void** buf, size_t* size = 0) = 0; // Copy to new buffer allocated with malloc - virtual bool copyToBuffer(void* buf, size_t size) = 0; // Copy to existing buffer of given size. - - virtual const char* data() = 0; - virtual uint64_t size() = 0; - - virtual uint16_t Machine() = 0; - virtual uint16_t Type() = 0; - - std::string output() { return out.str(); } - - virtual bool Freeze() = 0; - virtual bool Validate() = 0; - - virtual StringTable* shstrtab() = 0; - virtual StringTable* strtab() = 0; - virtual SymbolTable* symtab() = 0; - virtual SymbolTable* getSymtab(uint16_t index) = 0; - - virtual StringTable* addStringTable(const std::string& name) = 0; - virtual StringTable* getStringTable(uint16_t index) = 0; - - virtual SymbolTable* addSymbolTable(const std::string& name, StringTable* stab = 0) = 0; - - virtual size_t segmentCount() = 0; - virtual Segment* segment(size_t i) = 0; - virtual Segment* segmentByVAddr(uint64_t vaddr) = 0; - - virtual size_t sectionCount() = 0; - virtual Section* section(size_t i) = 0; - virtual Section* sectionByVAddr(uint64_t vaddr) = 0; - - virtual NoteSection* note() = 0; - virtual NoteSection* addNoteSection(const std::string& name) = 0; - - virtual Segment* initSegment(uint32_t type, uint32_t flags, uint64_t paddr = 0) = 0; - virtual bool addSegments() = 0; - - virtual Section* addSection(const std::string &name, - uint32_t type, - uint64_t flags = 0, - uint64_t entsize = 0, - Segment* segment = 0) = 0; - - virtual RelocationSection* relocationSection(Section* sec, SymbolTable* symtab = 0) = 0; - - protected: - std::ostringstream out; - }; - - Image* NewElf32Image(); - Image* NewElf64Image(); - - uint64_t ElfSize(const void* buffer); - - std::string GetNoteString(uint32_t s_size, const char* s); - - } -} - -#endif // AMD_ELF_IMAGE_HPP_ diff --git a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h deleted file mode 100644 index 446e556f21..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ /dev/null @@ -1,354 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// AMD specific HSA backend. - -#ifndef HSA_RUNTIME_CORE_INC_AMD_GPU_AGENT_H_ -#define HSA_RUNTIME_CORE_INC_AMD_GPU_AGENT_H_ - -#include - -#include "hsakmt.h" - -#include "core/inc/runtime.h" -#include "core/inc/agent.h" -#include "core/inc/blit.h" -#include "core/inc/signal.h" -#include "core/util/small_heap.h" -#include "core/util/locks.h" - -namespace amd { -// @brief Contains scratch memory information. -struct ScratchInfo { - void* queue_base; - size_t size; - size_t size_per_thread; - ptrdiff_t queue_process_offset; -}; - -// @brief Interface to represent a GPU agent. -class GpuAgentInt : public core::Agent { - public: - // @brief Constructor - GpuAgentInt(uint32_t node_id) - : core::Agent(node_id, core::Agent::DeviceType::kAmdGpuDevice) {} - - // @brief Invoke the user provided callback for each region accessible by - // this agent. - // - // @param [in] include_peer If true, the callback will be also invoked on each - // peer memory region accessible by this agent. If false, only invoke the - // callback on memory region owned by this agent. - // @param [in] callback User provided callback function. - // @param [in] data User provided pointer as input for @p callback. - // - // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed - // region returns ::HSA_STATUS_SUCCESS. - virtual hsa_status_t VisitRegion(bool include_peer, - hsa_status_t (*callback)(hsa_region_t region, - void* data), - void* data) const = 0; - - // @brief Carve scratch memory from scratch pool. - // - // @param [out] scratch Structure to be populated with the carved memory - // information. - virtual void AcquireQueueScratch(ScratchInfo& scratch) = 0; - - // @brief Release scratch memory back to scratch pool. - // - // @param [in] base Address of scratch memory previously acquired with - // call to ::AcquireQueueScratch. - virtual void ReleaseQueueScratch(void* base) = 0; - - // @brief Translate the kernel start and end dispatch timestamp from agent - // domain to host domain. - // - // @param [in] signal Pointer to signal that provides the dispatch timing. - // @param [out] time Structure to be populated with the host domain value. - virtual void TranslateTime(core::Signal* signal, - hsa_amd_profiling_dispatch_time_t& time) = 0; - - // @brief Translate timestamp agent domain to host domain. - // - // @param [out] time Timestamp in agent domain. - virtual uint64_t TranslateTime(uint64_t tick) = 0; - - // @brief Sets the coherency type of this agent. - // - // @param [in] type New coherency type. - // - // @retval true The new coherency type is set successfuly. - virtual bool current_coherency_type(hsa_amd_coherency_type_t type) = 0; - - // @brief Returns the current coherency type of this agent. - // - // @retval Coherency type. - virtual hsa_amd_coherency_type_t current_coherency_type() const = 0; - - // @brief Query if agent represent Kaveri GPU. - // - // @retval true if agent is Kaveri GPU. - virtual bool is_kv_device() const = 0; - - // @brief Query the agent HSA profile. - // - // @retval HSA profile. - virtual hsa_profile_t profile() const = 0; -}; - -class GpuAgent : public GpuAgentInt { - public: - // @brief GPU agent constructor. - // - // @param [in] node Node id. Each CPU in different socket will get distinct - // id. - // @param [in] node_props Node property. - GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props); - - // @brief GPU agent destructor. - ~GpuAgent(); - - // @brief Initialize DMA queue. - // - // @retval HSA_STATUS_SUCCESS DMA queue initialization is successful. - hsa_status_t InitDma(); - - uint16_t GetMicrocodeVersion() const; - - // @brief Assembles SP3 shader source into executable code. - // - // @param [in] src_sp3 SP3 shader source text representation. - // @param [in] func_name Name of the SP3 function to assemble. - // @param [out] code_buf Executable code buffer. - // @param [out] code_buf_size Size of executable code buffer in bytes. - void AssembleShader(const char* src_sp3, const char* func_name, - void*& code_buf, size_t& code_buf_size); - - // @brief Frees executable code created by AssembleShader. - // - // @param [in] code_buf Executable code buffer. - // @param [in] code_buf_size Size of executable code buffer in bytes. - void ReleaseShader(void* code_buf, size_t code_buf_size); - - // @brief Override from core::Agent. - hsa_status_t VisitRegion(bool include_peer, - hsa_status_t (*callback)(hsa_region_t region, - void* data), - void* data) const override; - - // @brief Override from core::Agent. - hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region, - void* data), - void* data) const override; - - // @brief Override from core::Agent. - hsa_status_t DmaCopy(void* dst, const void* src, size_t size) override; - - // @brief Override from core::Agent. - hsa_status_t DmaCopy(void* dst, const void* src, size_t size, - std::vector& dep_signals, - core::Signal& out_signal) override; - - // @brief Override from core::Agent. - hsa_status_t DmaFill(void* ptr, uint32_t value, size_t count) override; - - // @brief Override from core::Agent. - hsa_status_t GetInfo(hsa_agent_info_t attribute, void* value) const override; - - // @brief Override from core::Agent. - hsa_status_t QueueCreate(size_t size, hsa_queue_type_t queue_type, - core::HsaEventCallback event_callback, void* data, - uint32_t private_segment_size, - uint32_t group_segment_size, - core::Queue** queue) override; - - // @brief Override from amd::GpuAgentInt. - void AcquireQueueScratch(ScratchInfo& scratch) override; - - // @brief Override from amd::GpuAgentInt. - void ReleaseQueueScratch(void* base) override; - - // @brief Override from amd::GpuAgentInt. - void TranslateTime(core::Signal* signal, - hsa_amd_profiling_dispatch_time_t& time) override; - - // @brief Override from amd::GpuAgentInt. - uint64_t TranslateTime(uint64_t tick) override; - - // @brief Override from amd::GpuAgentInt. - bool current_coherency_type(hsa_amd_coherency_type_t type) override; - - // @brief Override from amd::GpuAgentInt. - hsa_amd_coherency_type_t current_coherency_type() const override { - return current_coherency_type_; - } - - // Getter & setters. - - // @brief Returns node property. - __forceinline const HsaNodeProperties& properties() const { - return properties_; - } - - // @brief Returns number of data caches. - __forceinline size_t num_cache() const { return cache_props_.size(); } - - // @brief Returns data cache property. - // - // @param [in] idx Cache level. - __forceinline const HsaCacheProperties& cache_prop(int idx) const { - return cache_props_[idx]; - } - - // @brief Override from core::Agent. - const std::vector& regions() const override { - return regions_; - } - - // @brief OVerride from core::Agent. - const core::Isa* isa() const override { return isa_; } - - // @brief Override from amd::GpuAgentInt. - __forceinline bool is_kv_device() const override { return is_kv_device_; } - - // @brief Override from amd::GpuAgentInt. - __forceinline hsa_profile_t profile() const override { return profile_; } - - protected: - static const uint32_t minAqlSize_ = 0x1000; // 4KB min - static const uint32_t maxAqlSize_ = 0x20000; // 8MB max - - // @brief Invoke the user provided callback for every region in @p regions. - // - // @param [in] regions Array of region object. - // @param [in] callback User provided callback function. - // @param [in] data User provided pointer as input for @p callback. - // - // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed - // region returns ::HSA_STATUS_SUCCESS. - hsa_status_t VisitRegion( - const std::vector& regions, - hsa_status_t (*callback)(hsa_region_t region, void* data), - void* data) const; - - // @brief Update ::t1_ tick count. - void SyncClocks(); - - // @brief Binds the second-level trap handler to this node. - void BindTrapHandler(); - - // @brief Node properties. - const HsaNodeProperties properties_; - - // @brief Current coherency type. - hsa_amd_coherency_type_t current_coherency_type_; - - // @brief Maximum number of queues that can be created. - uint32_t max_queues_; - - // @brief Object to manage scratch memory. - SmallHeap scratch_pool_; - - // @brief Default scratch size per queue. - size_t queue_scratch_len_; - - // @brief Default scratch size per work item. - size_t scratch_per_thread_; - - // @brief Blit object to handle memory copy/fill. - core::Blit* blit_; - - // @brief Mutex to protect the update to coherency type. - KernelMutex coherency_lock_; - - // @brief Mutex to protect access to scratch pool. - KernelMutex scratch_lock_; - - // @brief Mutex to protect access to ::t1_. - KernelMutex t1_lock_; - - // @brief GPU tick on initialization. - HsaClockCounters t0_; - - HsaClockCounters t1_; - - // @brief Array of GPU cache property. - std::vector cache_props_; - - // @brief Array of regions owned by this agent. - std::vector regions_; - - core::Isa* isa_; - - // @brief HSA profile. - hsa_profile_t profile_; - - bool is_kv_device_; - - void* trap_code_buf_; - - size_t trap_code_buf_size_; - - private: - // @brief Query the driver to get the region list owned by this agent. - void InitRegionList(); - - // @brief Reserve memory for scratch pool to be used by AQL queue of this - // agent. - void InitScratchPool(); - - // @brief Query the driver to get the cache properties. - void InitCacheList(); - - // @brief Alternative aperture base address. Only on KV. - uintptr_t ape1_base_; - - // @brief Alternative aperture size. Only on KV. - size_t ape1_size_; - - DISALLOW_COPY_AND_ASSIGN(GpuAgent); -}; - -} // namespace - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/amd_hsa_code.hpp b/runtime/hsa-runtime/core/inc/amd_hsa_code.hpp deleted file mode 100644 index 8431b5963c..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_hsa_code.hpp +++ /dev/null @@ -1,387 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef AMD_HSA_CODE_HPP_ -#define AMD_HSA_CODE_HPP_ - -#include "amd_elf_image.hpp" -#include "amd_hsa_elf.h" -#include "amd_hsa_kernel_code.h" -#include "hsa.h" -#include "hsa_ext_finalize.h" -#include -#include -#include -#include - -namespace amd { -namespace hsa { -namespace common { - -template -class Signed { -public: - static const uint64_t CT_SIGNATURE; - const uint64_t RT_SIGNATURE; - -protected: - Signed(): RT_SIGNATURE(signature) {} - virtual ~Signed() {} -}; - -template -const uint64_t Signed::CT_SIGNATURE = signature; - -bool IsAccessibleMemoryAddress(uint64_t address); - -template -size_t OffsetOf(member_type class_type::*member) -{ - return (char*)&((class_type*)nullptr->*member) - (char*)nullptr; -} - -template -class_type* ObjectAt(uint64_t address) -{ - if (!IsAccessibleMemoryAddress(address)) { - return nullptr; - } - - const uint64_t *rt_signature = - (const uint64_t*)(address + OffsetOf(&class_type::RT_SIGNATURE)); - if (nullptr == rt_signature) { - return nullptr; - } - if (class_type::CT_SIGNATURE != *rt_signature) { - return nullptr; - } - - return (class_type*)address; -} - -} - -namespace code { - - typedef amd::elf::Segment Segment; - typedef amd::elf::Section Section; - typedef amd::elf::RelocationSection RelocationSection; - typedef amd::elf::Relocation Relocation; - - class KernelSymbol; - class VariableSymbol; - - class Symbol { - protected: - amd::elf::Symbol* elfsym; - - public: - explicit Symbol(amd::elf::Symbol* elfsym_) - : elfsym(elfsym_) { } - virtual ~Symbol() { } - virtual bool IsKernelSymbol() const { return false; } - virtual KernelSymbol* AsKernelSymbol() { assert(false); return 0; } - virtual bool IsVariableSymbol() const { return false; } - virtual VariableSymbol* AsVariableSymbol() { assert(false); return 0; } - amd::elf::Symbol* elfSym() { return elfsym; } - std::string Name() const { return elfsym ? elfsym->name() : ""; } - Section* GetSection() { return elfsym->section(); } - virtual uint64_t SectionOffset() const { return elfsym->value(); } - virtual uint64_t VAddr() const { return elfsym->section()->addr() + elfsym->value(); } - uint32_t Index() const { return elfsym ? elfsym->index() : 0; } - bool IsDeclaration() const; - bool IsDefinition() const; - virtual bool IsAgent() const; - virtual hsa_symbol_kind_t Kind() const = 0; - hsa_symbol_linkage_t Linkage() const; - hsa_variable_allocation_t Allocation() const; - hsa_variable_segment_t Segment() const; - uint64_t Size() const; - uint32_t Size32() const; - uint32_t Alignment() const; - bool IsConst() const; - virtual hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value); - static hsa_code_symbol_t ToHandle(Symbol* sym); - static Symbol* FromHandle(hsa_code_symbol_t handle); - void setValue(uint64_t value) { elfsym->setValue(value); } - void setSize(uint32_t size) { elfsym->setSize(size); } - }; - - class KernelSymbol : public Symbol { - private: - uint32_t kernarg_segment_size, kernarg_segment_alignment; - uint32_t group_segment_size, private_segment_size; - bool is_dynamic_callstack; - - public: - explicit KernelSymbol(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc); - bool IsKernelSymbol() const override { return true; } - KernelSymbol* AsKernelSymbol() override { return this; } - hsa_symbol_kind_t Kind() const override { return HSA_SYMBOL_KIND_KERNEL; } - hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value) override; - }; - - class VariableSymbol : public Symbol { - public: - explicit VariableSymbol(amd::elf::Symbol* elfsym_) - : Symbol(elfsym_) { } - bool IsVariableSymbol() const override { return true; } - VariableSymbol* AsVariableSymbol() override { return this; } - hsa_symbol_kind_t Kind() const override { return HSA_SYMBOL_KIND_VARIABLE; } - hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value) override; - }; - - class AmdHsaCode { - private: - std::ostringstream out; - std::unique_ptr img; - std::vector dataSegments; - std::vector dataSections; - std::vector relocationSections; - std::vector symbols; - bool combineDataSegments; - Segment* hsaSegments[AMDGPU_HSA_SEGMENT_LAST][2]; - Section* hsaSections[AMDGPU_HSA_SECTION_LAST]; - - amd::elf::Section* hsatext; - amd::elf::Section* imageInit; - amd::elf::Section* samplerInit; - amd::elf::Section* debugInfo; - amd::elf::Section* debugLine; - amd::elf::Section* debugAbbrev; - - bool PullElf(); - bool PullElfV1(); - bool PullElfV2(); - - void AddAmdNote(uint32_t type, const void* desc, uint32_t desc_size); - template - bool GetAmdNote(uint32_t type, S** desc) - { - uint32_t desc_size; - if (!img->note()->getNote("AMD", type, (void**) desc, &desc_size)) { - out << "Failed to find note, type: " << type << std::endl; - return false; - } - if (desc_size < sizeof(S)) { - out << "Note size mismatch, type: " << type << " size: " << desc_size << " expected at least " << sizeof(S) << std::endl; - return false; - } - return true; - } - - void PrintSegment(std::ostream& out, Segment* segment); - void PrintSection(std::ostream& out, Section* section); - void PrintRawData(std::ostream& out, Section* section); - void PrintRawData(std::ostream& out, const unsigned char *data, size_t size); - void PrintRelocationData(std::ostream& out, RelocationSection* section); - void PrintSymbol(std::ostream& out, Symbol* sym); - void PrintDisassembly(std::ostream& out, const unsigned char *isa, size_t size, uint32_t isa_offset = 0); - std::string MangleSymbolName(const std::string& module_name, const std::string symbol_name); - bool ElfImageError(); - - public: - bool HasHsaText() const { return hsatext != 0; } - amd::elf::Section* HsaText() { assert(hsatext); return hsatext; } - const amd::elf::Section* HsaText() const { assert(hsatext); return hsatext; } - amd::elf::SymbolTable* Symtab() { assert(img); return img->symtab(); } - uint16_t Machine() { return img->Machine(); } - - AmdHsaCode(bool combineDataSegments = true); - virtual ~AmdHsaCode(); - - std::string output() { return out.str(); } - bool LoadFromFile(const std::string& filename); - bool SaveToFile(const std::string& filename); - bool WriteToBuffer(void* buffer); - bool InitFromBuffer(const void* buffer, size_t size); - bool InitAsBuffer(const void* buffer, size_t size); - bool InitAsHandle(hsa_code_object_t code_handle); - bool InitNew(bool xnack = false); - bool Freeze(); - hsa_code_object_t GetHandle(); - const char* ElfData(); - uint64_t ElfSize(); - bool Validate(); - void Print(std::ostream& out); - void PrintNotes(std::ostream& out); - void PrintSegments(std::ostream& out); - void PrintSections(std::ostream& out); - void PrintSymbols(std::ostream& out); - void PrintMachineCode(std::ostream& out); - void PrintMachineCode(std::ostream& out, KernelSymbol* sym); - bool PrintToFile(const std::string& filename); - - void AddNoteCodeObjectVersion(uint32_t major, uint32_t minor); - bool GetNoteCodeObjectVersion(uint32_t* major, uint32_t* minor); - bool GetNoteCodeObjectVersion(std::string& version); - void AddNoteHsail(uint32_t hsail_major, uint32_t hsail_minor, hsa_profile_t profile, hsa_machine_model_t machine_model, hsa_default_float_rounding_mode_t rounding_mode); - bool GetNoteHsail(uint32_t* hsail_major, uint32_t* hsail_minor, hsa_profile_t* profile, hsa_machine_model_t* machine_model, hsa_default_float_rounding_mode_t* default_float_round); - void AddNoteIsa(const std::string& vendor_name, const std::string& architecture_name, uint32_t major, uint32_t minor, uint32_t stepping); - bool GetNoteIsa(std::string& vendor_name, std::string& architecture_name, uint32_t* major_version, uint32_t* minor_version, uint32_t* stepping); - bool GetNoteIsa(std::string& isaName); - void AddNoteProducer(uint32_t major, uint32_t minor, const std::string& producer); - bool GetNoteProducer(uint32_t* major, uint32_t* minor, std::string& producer_name); - void AddNoteProducerOptions(const std::string& options); - void AddNoteProducerOptions(int32_t call_convention, const hsa_ext_control_directives_t& user_directives, const std::string& user_options); - bool GetNoteProducerOptions(std::string& options); - - hsa_status_t GetInfo(hsa_code_object_info_t attribute, void *value); - hsa_status_t GetSymbol(const char *module_name, const char *symbol_name, hsa_code_symbol_t *sym); - hsa_status_t IterateSymbols(hsa_code_object_t code_object, - hsa_status_t (*callback)( - hsa_code_object_t code_object, - hsa_code_symbol_t symbol, - void* data), - void* data); - - void AddHsaTextData(const void* buffer, size_t size); - uint64_t NextKernelCodeOffset() const; - bool AddKernelCode(KernelSymbol* sym, const void* code, size_t size); - - Symbol* AddKernelDefinition(const std::string& name, const void* isa, size_t isa_size); - - size_t DataSegmentCount() { return dataSegments.size(); } - Segment* DataSegment(size_t i) { return dataSegments[i]; } - - size_t DataSectionCount() { return dataSections.size(); } - Section* DataSection(size_t i) { return dataSections[i]; } - - Section* AddEmptySection(); - Section* AddCodeSection(Segment* segment); - Section* AddDataSection(const std::string &name, - uint32_t type, - uint64_t flags, - Segment* segment); - - bool HasImageInitSection() const { return imageInit != 0; } - Section* ImageInitSection(); - void AddImageInitializer(Symbol* image, uint64_t destOffset, const amdgpu_hsa_image_descriptor_t& init); - void AddImageInitializer(Symbol* image, uint64_t destOffset, - amdgpu_hsa_metadata_kind16_t kind, - amdgpu_hsa_image_geometry8_t geometry, - amdgpu_hsa_image_channel_order8_t channel_order, amdgpu_hsa_image_channel_type8_t channel_type, - uint64_t width, uint64_t height, uint64_t depth, uint64_t array); - - - bool HasSamplerInitSection() const { return samplerInit != 0; } - amd::elf::Section* SamplerInitSection(); - amd::elf::Section* AddSamplerInit(); - void AddSamplerInitializer(Symbol* sampler, uint64_t destOffset, const amdgpu_hsa_sampler_descriptor_t& init); - void AddSamplerInitializer(Symbol* sampler, uint64_t destOffset, - amdgpu_hsa_sampler_coord8_t coord, - amdgpu_hsa_sampler_filter8_t filter, - amdgpu_hsa_sampler_addressing8_t addressing); - - void AddInitVarWithAddress(bool large, Symbol* dest, uint64_t destOffset, Symbol* addrOf, uint64_t addrAddend); - - void InitHsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable); - bool AddHsaSegments(); - Segment* HsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable); - - void InitHsaSectionSegment(amdgpu_hsa_elf_section_t section, bool combineSegments = true); - Section* HsaDataSection(amdgpu_hsa_elf_section_t section, bool combineSegments = true); - - Symbol* AddExecutableSymbol(const std::string &name, - unsigned char type, - unsigned char binding, - unsigned char other, - Section *section = 0); - - Symbol* AddVariableSymbol(const std::string &name, - unsigned char type, - unsigned char binding, - unsigned char other, - Section *section, - uint64_t value, - uint64_t size); - void AddSectionSymbols(); - - size_t RelocationSectionCount() { return relocationSections.size(); } - RelocationSection* GetRelocationSection(size_t i) { return relocationSections[i]; } - - size_t SymbolCount() { return symbols.size(); } - Symbol* GetSymbol(size_t i) { return symbols[i]; } - Symbol* GetSymbolByElfIndex(size_t index); - Symbol* FindSymbol(const std::string &n); - - void AddData(amdgpu_hsa_elf_section_t section, const void* data = 0, size_t size = 0); - - Section* DebugInfo(); - Section* DebugLine(); - Section* DebugAbbrev(); - - Section* AddHsaHlDebug(const std::string& name, const void* data, size_t size); - }; - - class AmdHsaCodeManager { - private: - typedef std::unordered_map CodeMap; - CodeMap codeMap; - - public: - AmdHsaCode* FromHandle(hsa_code_object_t handle); - bool Destroy(hsa_code_object_t handle); - }; - - class KernelSymbolV2 : public KernelSymbol { - private: - public: - explicit KernelSymbolV2(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc); - bool IsAgent() const override { return true; } - uint64_t SectionOffset() const override { return elfsym->value() - elfsym->section()->addr(); } - uint64_t VAddr() const override { return elfsym->value(); } - }; - - class VariableSymbolV2 : public VariableSymbol { - private: - public: - explicit VariableSymbolV2(amd::elf::Symbol* elfsym_) : VariableSymbol(elfsym_) { } - bool IsAgent() const override { return false; } - uint64_t SectionOffset() const override { return elfsym->value() - elfsym->section()->addr(); } - uint64_t VAddr() const override { return elfsym->value(); } - }; -} -} -} - -#endif // AMD_HSA_CODE_HPP_ diff --git a/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp b/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp deleted file mode 100644 index 8e29df1496..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp +++ /dev/null @@ -1,358 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef AMD_HSA_LOADER_HPP -#define AMD_HSA_LOADER_HPP - -#include -#include -#include "hsa.h" -#include "hsa_ext_image.h" -#include "amd_hsa_elf.h" -#include "amd_load_map.h" -#include -#include -#include - -/// @brief Major version of the AMD HSA Loader. Major versions are not backwards -/// compatible. -#define AMD_HSA_LOADER_VERSION_MAJOR 0 - -/// @brief Minor version of the AMD HSA Loader. Minor versions are backwards -/// compatible. -#define AMD_HSA_LOADER_VERSION_MINOR 5 - -/// @brief Descriptive version of the AMD HSA Loader. -#define AMD_HSA_LOADER_VERSION "AMD HSA Loader v0.05 (June 16, 2015)" - -enum hsa_ext_symbol_info_t { - HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE = 100, - HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_ALIGN = 101, -}; - -typedef uint32_t hsa_symbol_info32_t; -typedef hsa_executable_symbol_t hsa_symbol_t; -typedef hsa_executable_symbol_info_t hsa_symbol_info_t; - -namespace amd { -namespace hsa { -namespace loader { - -//===----------------------------------------------------------------------===// -// Context. // -//===----------------------------------------------------------------------===// - -class Context { -public: - virtual ~Context() {} - - virtual hsa_isa_t IsaFromName(const char *name) = 0; - - virtual bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) = 0; - - virtual void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) = 0; - - virtual bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) = 0; - - virtual void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) = 0; - - virtual void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) = 0; - - virtual void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) = 0; - - virtual bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) = 0; - - virtual bool ImageExtensionSupported() = 0; - - virtual hsa_status_t ImageCreate( - hsa_agent_t agent, - hsa_access_permission_t image_permission, - const hsa_ext_image_descriptor_t *image_descriptor, - const void *image_data, - hsa_ext_image_t *image_handle) = 0; - - virtual hsa_status_t ImageDestroy( - hsa_agent_t agent, hsa_ext_image_t image_handle) = 0; - - virtual hsa_status_t SamplerCreate( - hsa_agent_t agent, - const hsa_ext_sampler_descriptor_t *sampler_descriptor, - hsa_ext_sampler_t *sampler_handle) = 0; - - virtual hsa_status_t SamplerDestroy( - hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) = 0; - -protected: - Context() {} - -private: - Context(const Context &c); - Context& operator=(const Context &c); -}; - -//===----------------------------------------------------------------------===// -// Symbol. // -//===----------------------------------------------------------------------===// - -class Symbol { -public: - static hsa_symbol_t Handle(Symbol *symbol) { - hsa_symbol_t symbol_handle = - {reinterpret_cast(symbol)}; - return symbol_handle; - } - - static Symbol* Object(hsa_symbol_t symbol_handle) { - Symbol *symbol = - reinterpret_cast(symbol_handle.handle); - return symbol; - } - - virtual ~Symbol() {} - - virtual bool GetInfo(hsa_symbol_info32_t symbol_info, void *value) = 0; - -protected: - Symbol() {} - -private: - Symbol(const Symbol &s); - Symbol& operator=(const Symbol &s); -}; - -//===----------------------------------------------------------------------===// -// LoadedCodeObject. // -//===----------------------------------------------------------------------===// - -class LoadedCodeObject { -public: - static amd_loaded_code_object_t Handle(LoadedCodeObject *object) { - amd_loaded_code_object_t handle = - {reinterpret_cast(object)}; - return handle; - } - - static LoadedCodeObject* Object(amd_loaded_code_object_t handle) { - LoadedCodeObject *object = - reinterpret_cast(handle.handle); - return object; - } - - virtual ~LoadedCodeObject() {} - - virtual bool GetInfo(amd_loaded_code_object_info_t attribute, void *value) = 0; - - virtual hsa_status_t IterateLoadedSegments( - hsa_status_t (*callback)( - amd_loaded_segment_t loaded_segment, - void *data), - void *data) = 0; - -protected: - LoadedCodeObject() {} - -private: - LoadedCodeObject(const LoadedCodeObject&); - LoadedCodeObject& operator=(const LoadedCodeObject&); -}; - -//===----------------------------------------------------------------------===// -// LoadedSegment. // -//===----------------------------------------------------------------------===// - -class LoadedSegment { -public: - static amd_loaded_segment_t Handle(LoadedSegment *object) { - amd_loaded_segment_t handle = - {reinterpret_cast(object)}; - return handle; - } - - static LoadedSegment* Object(amd_loaded_segment_t handle) { - LoadedSegment *object = - reinterpret_cast(handle.handle); - return object; - } - - virtual ~LoadedSegment() {} - - virtual bool GetInfo(amd_loaded_segment_info_t attribute, void *value) = 0; - -protected: - LoadedSegment() {} - -private: - LoadedSegment(const LoadedSegment&); - LoadedSegment& operator=(const LoadedSegment&); -}; - -//===----------------------------------------------------------------------===// -// Executable. // -//===----------------------------------------------------------------------===// - -class Executable { -public: - static hsa_executable_t Handle(Executable *executable) { - hsa_executable_t executable_handle = - {reinterpret_cast(executable)}; - return executable_handle; - } - - static Executable* Object(hsa_executable_t executable_handle) { - Executable *executable = - reinterpret_cast(executable_handle.handle); - return executable; - } - - virtual ~Executable() {} - - virtual hsa_status_t GetInfo( - hsa_executable_info_t executable_info, void *value) = 0; - - virtual hsa_status_t DefineProgramExternalVariable( - const char *name, void *address) = 0; - - virtual hsa_status_t DefineAgentExternalVariable( - const char *name, - hsa_agent_t agent, - hsa_variable_segment_t segment, - void *address) = 0; - - virtual hsa_status_t LoadCodeObject( - hsa_agent_t agent, - hsa_code_object_t code_object, - const char *options, - amd_loaded_code_object_t *loaded_code_object = nullptr) = 0; - - virtual hsa_status_t LoadCodeObject( - hsa_agent_t agent, - hsa_code_object_t code_object, - size_t code_object_size, - const char *options, - amd_loaded_code_object_t *loaded_code_object = nullptr) = 0; - - virtual hsa_status_t Freeze(const char *options) = 0; - - virtual hsa_status_t Validate(uint32_t *result) = 0; - - virtual Symbol* GetSymbol( - const char *module_name, - const char *symbol_name, - hsa_agent_t agent, - int32_t call_convention) = 0; - - typedef hsa_status_t (*iterate_symbols_f)( - hsa_executable_t executable, - hsa_symbol_t symbol_handle, - void *data); - - virtual hsa_status_t IterateSymbols( - iterate_symbols_f callback, void *data) = 0; - - virtual hsa_status_t IterateLoadedCodeObjects( - hsa_status_t (*callback)( - amd_loaded_code_object_t loaded_code_object, - void *data), - void *data) = 0; - -protected: - Executable() {} - -private: - Executable(const Executable &e); - Executable& operator=(const Executable &e); - - static std::vector executables; - static std::mutex executables_mutex; -}; - -/// @class Loader -class Loader { -public: - /// @brief Destructor. - virtual ~Loader() {} - - /// @brief Creates AMD HSA Loader with specified @p context. - /// - /// @param[in] context Context. Must not be null. - /// - /// @returns AMD HSA Loader on success, null on failure. - static Loader* Create(Context* context); - - /// @brief Destroys AMD HSA Loader @p Loader_object. - /// - /// @param[in] loader AMD HSA Loader to destroy. Must not be null. - static void Destroy(Loader *loader); - - /// @returns Context associated with Loader. - virtual Context* GetContext() const = 0; - - /// @brief Creates empty AMD HSA Executable with specified @p profile, - /// @p options - virtual Executable* CreateExecutable(hsa_profile_t profile, const char *options) = 0; - - virtual void DestroyExecutable(Executable *executable) = 0; - - virtual hsa_status_t IterateExecutables( - hsa_status_t (*callback)( - hsa_executable_t executable, - void *data), - void *data) = 0; - -protected: - /// @brief Default constructor. - Loader() {} - -private: - /// @brief Copy constructor - not available. - Loader(const Loader&); - - /// @brief Assignment operator - not available. - Loader& operator=(const Loader&); -}; - - -} // namespace loader -} // namespace hsa -} // namespace amd - -#endif // AMD_HSA_LOADER_HPP diff --git a/runtime/hsa-runtime/core/inc/amd_load_map.h b/runtime/hsa-runtime/core/inc/amd_load_map.h deleted file mode 100644 index bd3f78c82d..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_load_map.h +++ /dev/null @@ -1,174 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef AMD_LOAD_MAP_H -#define AMD_LOAD_MAP_H - -#include "hsa.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/// @todo. -enum { - AMD_EXTENSION_LOAD_MAP = 0x1002 -}; - -/// @todo. -typedef struct amd_loaded_code_object_s { - uint64_t handle; -} amd_loaded_code_object_t; - -/// @todo. -enum amd_loaded_code_object_info_t { - AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE = 0, - AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE_SIZE = 1 -}; - -/// @todo. -typedef struct amd_loaded_segment_s { - uint64_t handle; -} amd_loaded_segment_t; - -/// @todo. -enum amd_loaded_segment_info_t { - AMD_LOADED_SEGMENT_INFO_TYPE = 0, - AMD_LOADED_SEGMENT_INFO_ELF_BASE_ADDRESS = 1, - AMD_LOADED_SEGMENT_INFO_LOAD_BASE_ADDRESS = 2, - AMD_LOADED_SEGMENT_INFO_SIZE = 3 -}; - -/// @todo. -hsa_status_t amd_executable_load_code_object( - hsa_executable_t executable, - hsa_agent_t agent, - hsa_code_object_t code_object, - const char *options, - amd_loaded_code_object_t *loaded_code_object); - -/// @brief Invokes @p callback for each available executable in current -/// process. -hsa_status_t amd_iterate_executables( - hsa_status_t (*callback)( - hsa_executable_t executable, - void *data), - void *data); - -/// @brief Invokes @p callback for each loaded code object in specified -/// @p executable. -hsa_status_t amd_executable_iterate_loaded_code_objects( - hsa_executable_t executable, - hsa_status_t (*callback)( - amd_loaded_code_object_t loaded_code_object, - void *data), - void *data); - -/// @brief Retrieves current value of specified @p loaded_code_object's -/// @p attribute. -hsa_status_t amd_loaded_code_object_get_info( - amd_loaded_code_object_t loaded_code_object, - amd_loaded_code_object_info_t attribute, - void *value); - -/// @brief Invokes @p callback for each loaded segment in specified -/// @p loaded_code_object. -hsa_status_t amd_loaded_code_object_iterate_loaded_segments( - amd_loaded_code_object_t loaded_code_object, - hsa_status_t (*callback)( - amd_loaded_segment_t loaded_segment, - void *data), - void *data); - -/// @brief Retrieves current value of specified @p loaded_segment's -/// @p attribute. -hsa_status_t amd_loaded_segment_get_info( - amd_loaded_segment_t loaded_segment, - amd_loaded_segment_info_t attribute, - void *value); - -#define amd_load_map_1_00 - -typedef struct amd_load_map_1_00_pfn_s { - hsa_status_t (*amd_executable_load_code_object)( - hsa_executable_t executable, - hsa_agent_t agent, - hsa_code_object_t code_object, - const char *options, - amd_loaded_code_object_t *loaded_code_object); - - hsa_status_t (*amd_iterate_executables)( - hsa_status_t (*callback)( - hsa_executable_t executable, - void *data), - void *data); - - hsa_status_t (*amd_executable_iterate_loaded_code_objects)( - hsa_executable_t executable, - hsa_status_t (*callback)( - amd_loaded_code_object_t loaded_code_object, - void *data), - void *data); - - hsa_status_t (*amd_loaded_code_object_get_info)( - amd_loaded_code_object_t loaded_code_object, - amd_loaded_code_object_info_t attribute, - void *value); - - hsa_status_t (*amd_loaded_code_object_iterate_loaded_segments)( - amd_loaded_code_object_t loaded_code_object, - hsa_status_t (*callback)( - amd_loaded_segment_t loaded_segment, - void *data), - void *data); - - hsa_status_t (*amd_loaded_segment_get_info)( - amd_loaded_segment_t loaded_segment, - amd_loaded_segment_info_t attribute, - void *value); -} amd_load_map_1_00_pfn_t; - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // AMD_LOAD_MAP_H diff --git a/runtime/hsa-runtime/core/inc/amd_loader_context.hpp b/runtime/hsa-runtime/core/inc/amd_loader_context.hpp deleted file mode 100644 index 19a6a5cfd9..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_loader_context.hpp +++ /dev/null @@ -1,97 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP -#define HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP - -#include "core/inc/amd_hsa_loader.hpp" - -namespace amd { - -class LoaderContext final: public hsa::loader::Context { -public: - LoaderContext(): hsa::loader::Context() {} - - ~LoaderContext() {} - - hsa_isa_t IsaFromName(const char *name) override; - - bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t code_object_isa) override; - - void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) override; - - bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) override; - - void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size = 0) override; - - void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override; - - void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override; - - bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) override; - - bool ImageExtensionSupported(); - - hsa_status_t ImageCreate( - hsa_agent_t agent, - hsa_access_permission_t image_permission, - const hsa_ext_image_descriptor_t *image_descriptor, - const void *image_data, - hsa_ext_image_t *image_handle); - - hsa_status_t ImageDestroy(hsa_agent_t agent, hsa_ext_image_t image_handle); - - hsa_status_t SamplerCreate( - hsa_agent_t agent, - const hsa_ext_sampler_descriptor_t *sampler_descriptor, - hsa_ext_sampler_t *sampler_handle); - - hsa_status_t SamplerDestroy(hsa_agent_t agent, hsa_ext_sampler_t sampler_handle); - -private: - LoaderContext(const LoaderContext&); - LoaderContext& operator=(const LoaderContext&); -}; - -} // namespace amd - -#endif // HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP diff --git a/runtime/hsa-runtime/core/inc/amd_memory_region.h b/runtime/hsa-runtime/core/inc/amd_memory_region.h deleted file mode 100644 index fb3a6531e4..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_memory_region.h +++ /dev/null @@ -1,191 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// AMD specific HSA backend. - -#ifndef HSA_RUNTIME_CORE_INC_AMD_MEMORY_REGION_H_ -#define HSA_RUNTIME_CORE_INC_AMD_MEMORY_REGION_H_ - -#include "hsakmt.h" - -#include "core/inc/agent.h" -#include "core/inc/memory_region.h" - -#include "inc/hsa_ext_amd.h" - -namespace amd { -class MemoryRegion : public core::MemoryRegion { - public: - /// @brief Convert this object into hsa_region_t. - static __forceinline hsa_region_t Convert(MemoryRegion* region) { - const hsa_region_t region_handle = { - static_cast(reinterpret_cast(region))}; - return region_handle; - } - - static __forceinline const hsa_region_t Convert(const MemoryRegion* region) { - const hsa_region_t region_handle = { - static_cast(reinterpret_cast(region))}; - return region_handle; - } - - /// @brief Convert hsa_region_t into amd::MemoryRegion *. - static __forceinline MemoryRegion* Convert(hsa_region_t region) { - return reinterpret_cast(region.handle); - } - - /// @brief Allocate agent accessible memory (system / local memory). - static void* AllocateKfdMemory(const HsaMemFlags& flag, HSAuint32 node_id, - size_t size); - - /// @brief Free agent accessible memory (system / local memory). - static void FreeKfdMemory(void* ptr, size_t size); - - static bool RegisterMemory(void* ptr, size_t size, size_t num_nodes, - const uint32_t* nodes); - - static void DeregisterMemory(void* ptr); - - /// @brief Pin memory. - static bool MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, - void* ptr, size_t size, - uint64_t* alternate_va, - HsaMemMapFlags map_flag); - - /// @brief Unpin memory. - static void MakeKfdMemoryUnresident(void* ptr); - - MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owner, - const HsaMemoryProperties& mem_props); - - ~MemoryRegion(); - - hsa_status_t Allocate(size_t size, void** address) const; - - hsa_status_t Allocate(bool restrict_access, size_t size, - void** address) const; - - hsa_status_t Free(void* address, size_t size) const; - - hsa_status_t GetInfo(hsa_region_info_t attribute, void* value) const; - - hsa_status_t GetPoolInfo(hsa_amd_memory_pool_info_t attribute, - void* value) const; - - hsa_status_t GetAgentPoolInfo(const core::Agent& agent, - hsa_amd_agent_memory_pool_info_t attribute, - void* value) const; - - hsa_status_t AllowAccess(uint32_t num_agents, const hsa_agent_t* agents, - const void* ptr, size_t size) const; - - hsa_status_t CanMigrate(const MemoryRegion& dst, bool& result) const; - - hsa_status_t Migrate(uint32_t flag, const void* ptr) const; - - hsa_status_t Lock(uint32_t num_agents, const hsa_agent_t* agents, - void* host_ptr, size_t size, void** agent_ptr) const; - - hsa_status_t Unlock(void* host_ptr) const; - - HSAuint64 GetBaseAddress() const { return mem_props_.VirtualBaseAddress; } - - HSAuint64 GetPhysicalSize() const { return mem_props_.SizeInBytes; } - - HSAuint64 GetVirtualSize() const { return virtual_size_; } - - hsa_status_t AssignAgent(void* ptr, size_t size, const core::Agent& agent, - hsa_access_permission_t access) const; - - __forceinline bool IsLocalMemory() const { - return ((mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE) || - (mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC)); - } - - __forceinline bool IsPublic() const { - return (mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC); - } - - __forceinline bool IsSystem() const { - return mem_props_.HeapType == HSA_HEAPTYPE_SYSTEM; - } - - __forceinline bool IsLDS() const { - return mem_props_.HeapType == HSA_HEAPTYPE_GPU_LDS; - } - - __forceinline bool IsGDS() const { - return mem_props_.HeapType == HSA_HEAPTYPE_GPU_GDS; - } - - __forceinline bool IsScratch() const { - return mem_props_.HeapType == HSA_HEAPTYPE_GPU_SCRATCH; - } - - __forceinline bool IsSvm() const { - return mem_props_.HeapType == HSA_HEAPTYPE_DEVICE_SVM; - } - - __forceinline uint32_t BusWidth() const { - return static_cast(mem_props_.Width); - } - - __forceinline uint32_t MaxMemCloc() const { - return static_cast(mem_props_.MemoryClockMax); - } - - private: - const HsaMemoryProperties mem_props_; - - HsaMemFlags mem_flag_; - - HsaMemMapFlags map_flag_; - - size_t max_single_alloc_size_; - - HSAuint64 virtual_size_; - - static const size_t kPageSize_ = 4096; -}; -} // namespace - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/amd_topology.h b/runtime/hsa-runtime/core/inc/amd_topology.h deleted file mode 100644 index 8e62679d14..0000000000 --- a/runtime/hsa-runtime/core/inc/amd_topology.h +++ /dev/null @@ -1,56 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_AMD_TOPOLOGY_H_ -#define HSA_RUNTIME_CORE_INC_AMD_TOPOLOGY_H_ - -namespace amd { -/// @brief Initializes the runtime. -/// Should not be called directly, must be called only from Runtime::Acquire() -bool Load(); - -/// @brief Shutdown/cleanup of runtime. -/// Should not be called directly, must be called only from Runtime::Release() -bool Unload(); -} // namespace - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/blit.h b/runtime/hsa-runtime/core/inc/blit.h deleted file mode 100644 index b3c94a25cd..0000000000 --- a/runtime/hsa-runtime/core/inc/blit.h +++ /dev/null @@ -1,108 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_BLIT_H_ -#define HSA_RUNTIME_CORE_INC_BLIT_H_ - -#include - -#include "core/inc/agent.h" - -namespace core { -class Blit { - public: - explicit Blit() {} - virtual ~Blit() {} - - /// @brief Initialize a blit object. - /// - /// @param agent Pointer to the agent that will execute the blit commands. - /// - /// @return hsa_status_t - virtual hsa_status_t Initialize(const core::Agent& agent) = 0; - - /// @brief Marks the blit object as invalid and uncouples its link with - /// the underlying compute device's control block. Use of blit object - /// once it has been release is illegal and any behavior is indeterminate - /// - /// @note: The call will block until all commands have executed. - /// - /// @return hsa_status_t - virtual hsa_status_t Destroy() = 0; - - /// @brief Submit a linear copy command to the the underlying compute device's - /// control block. The call is blocking until the command execution is - /// finished. - /// - /// @param dst Memory address of the copy destination. - /// @param src Memory address of the copy source. - /// @param size Size of the data to be copied. - virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src, - size_t size) = 0; - - /// @brief Submit a linear copy command to the the underlying compute device's - /// control block. The call is non blocking. The memory transfer will start - /// after all dependent signals are satisfied. After the transfer is - /// completed, the out signal will be decremented. - /// - /// @param dst Memory address of the copy destination. - /// @param src Memory address of the copy source. - /// @param size Size of the data to be copied. - /// @param dep_signals Arrays of dependent signal. - /// @param out_signal Output signal. - virtual hsa_status_t SubmitLinearCopyCommand( - void* dst, const void* src, size_t size, - std::vector& dep_signals, core::Signal& out_signal) = 0; - - /// @brief Submit a linear fill command to the the underlying compute device's - /// control block. The call is blocking until the command execution is - /// finished. - /// - /// @param ptr Memory address of the fill destination. - /// @param value Value to be set. - /// @param num Number of uint32_t element to be set to the value. - virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value, - size_t num) = 0; -}; -} // namespace core - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/checked.h b/runtime/hsa-runtime/core/inc/checked.h deleted file mode 100644 index d0ad2ff6d0..0000000000 --- a/runtime/hsa-runtime/core/inc/checked.h +++ /dev/null @@ -1,75 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTME_CORE_INC_CHECKED_H_ -#define HSA_RUNTME_CORE_INC_CHECKED_H_ - -#include "stdint.h" - -namespace core { - -/// @brief Base class for all classes whose validity can be checked using -/// IsValid() method. -template -class Checked { - public: - typedef Checked CheckedType; - - Checked() { object_ = uintptr_t(this) ^ uintptr_t(code); } - Checked(const Checked&) { object_ = uintptr_t(this) ^ uintptr_t(code); } - Checked(Checked&&) { object_ = uintptr_t(this) ^ uintptr_t(code); } - - virtual ~Checked() { object_ = NULL; } - - const Checked& operator=(Checked&& rhs) { return *this; } - const Checked& operator=(const Checked& rhs) { return *this; } - - bool IsValid() const { - return object_ == (uintptr_t(this) ^ uintptr_t(code)); - } - - private: - uintptr_t object_; -}; - -} // namespace core -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/default_signal.h b/runtime/hsa-runtime/core/inc/default_signal.h deleted file mode 100644 index f0f13eb06e..0000000000 --- a/runtime/hsa-runtime/core/inc/default_signal.h +++ /dev/null @@ -1,174 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// HSA runtime C++ interface file. - -#ifndef HSA_RUNTME_CORE_INC_DEFAULT_SIGNAL_H_ -#define HSA_RUNTME_CORE_INC_DEFAULT_SIGNAL_H_ - -#include "core/inc/runtime.h" -#include "core/inc/signal.h" -#include "core/util/utils.h" - -namespace core { - -/// @brief Simple pure memory based signal. -/// @brief See base class Signal. -class DefaultSignal : public Signal { - public: - /// @brief Determines if a Signal* can be safely converted to DefaultSignal* - /// via static_cast. - static __forceinline bool IsType(Signal* ptr) { - return ptr->IsType(&rtti_id_); - } - - /// @brief See base class Signal. - explicit DefaultSignal(hsa_signal_value_t initial_value); - - /// @brief See base class Signal. - ~DefaultSignal(); - - // Below are various methods corresponding to the APIs, which load/store the - // signal value or modify the existing signal value automically and with - // specified memory ordering semantics. - - hsa_signal_value_t LoadRelaxed(); - - hsa_signal_value_t LoadAcquire(); - - void StoreRelaxed(hsa_signal_value_t value); - - void StoreRelease(hsa_signal_value_t value); - - hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout, hsa_wait_state_t wait_hint); - - hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout, hsa_wait_state_t wait_hint); - - void AndRelaxed(hsa_signal_value_t value); - - void AndAcquire(hsa_signal_value_t value); - - void AndRelease(hsa_signal_value_t value); - - void AndAcqRel(hsa_signal_value_t value); - - void OrRelaxed(hsa_signal_value_t value); - - void OrAcquire(hsa_signal_value_t value); - - void OrRelease(hsa_signal_value_t value); - - void OrAcqRel(hsa_signal_value_t value); - - void XorRelaxed(hsa_signal_value_t value); - - void XorAcquire(hsa_signal_value_t value); - - void XorRelease(hsa_signal_value_t value); - - void XorAcqRel(hsa_signal_value_t value); - - void AddRelaxed(hsa_signal_value_t value); - - void AddAcquire(hsa_signal_value_t value); - - void AddRelease(hsa_signal_value_t value); - - void AddAcqRel(hsa_signal_value_t value); - - void SubRelaxed(hsa_signal_value_t value); - - void SubAcquire(hsa_signal_value_t value); - - void SubRelease(hsa_signal_value_t value); - - void SubAcqRel(hsa_signal_value_t value); - - hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value); - - hsa_signal_value_t ExchAcquire(hsa_signal_value_t value); - - hsa_signal_value_t ExchRelease(hsa_signal_value_t value); - - hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value); - - hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected, - hsa_signal_value_t value); - - hsa_signal_value_t CasAcquire(hsa_signal_value_t expected, - hsa_signal_value_t value); - - hsa_signal_value_t CasRelease(hsa_signal_value_t expected, - hsa_signal_value_t value); - - hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected, - hsa_signal_value_t value); - - /// @brief see the base class Signal - __forceinline hsa_signal_value_t* ValueLocation() const { - return (hsa_signal_value_t*)&signal_.value; - } - - /// @brief see the base class Signal - __forceinline HsaEvent* EopEvent() { return NULL; } - - /// @brief prevent throwing exceptions - void* operator new(size_t size) { return malloc(size); } - - /// @brief prevent throwing exceptions - void operator delete(void* ptr) { free(ptr); } - - protected: - bool _IsA(rtti_t id) const { return id == &rtti_id_; } - - private: - static int rtti_id_; - - DISALLOW_COPY_AND_ASSIGN(DefaultSignal); -}; - -} // namespace core -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/host_queue.h b/runtime/hsa-runtime/core/inc/host_queue.h deleted file mode 100644 index e3ad022f7e..0000000000 --- a/runtime/hsa-runtime/core/inc/host_queue.h +++ /dev/null @@ -1,167 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_HOST_QUEUE_H_ -#define HSA_RUNTIME_CORE_INC_HOST_QUEUE_H_ - -#include "core/inc/memory_region.h" -#include "core/inc/queue.h" -#include "core/inc/runtime.h" -#include "core/inc/signal.h" - -namespace core { -class HostQueue : public Queue { - public: - HostQueue(hsa_region_t region, uint32_t ring_size, hsa_queue_type_t type, - uint32_t features, hsa_signal_t doorbell_signal); - - ~HostQueue(); - - hsa_status_t Inactivate() { return HSA_STATUS_SUCCESS; } - - uint64_t LoadReadIndexAcquire() { - return atomic::Load(&amd_queue_.read_dispatch_id, - std::memory_order_acquire); - } - - uint64_t LoadReadIndexRelaxed() { - return atomic::Load(&amd_queue_.read_dispatch_id, - std::memory_order_relaxed); - } - - uint64_t LoadWriteIndexAcquire() { - return atomic::Load(&amd_queue_.write_dispatch_id, - std::memory_order_acquire); - } - - uint64_t LoadWriteIndexRelaxed() { - return atomic::Load(&amd_queue_.write_dispatch_id, - std::memory_order_relaxed); - } - - void StoreReadIndexRelaxed(uint64_t value) { - atomic::Store(&amd_queue_.read_dispatch_id, value, - std::memory_order_relaxed); - } - - void StoreReadIndexRelease(uint64_t value) { - atomic::Store(&amd_queue_.read_dispatch_id, value, - std::memory_order_release); - } - - void StoreWriteIndexRelaxed(uint64_t value) { - atomic::Store(&amd_queue_.write_dispatch_id, value, - std::memory_order_relaxed); - } - - void StoreWriteIndexRelease(uint64_t value) { - atomic::Store(&amd_queue_.write_dispatch_id, value, - std::memory_order_release); - } - - uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) { - return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, - std::memory_order_acq_rel); - } - - uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) { - return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, - std::memory_order_acquire); - } - - uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) { - return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, - std::memory_order_relaxed); - } - - uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) { - return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, - std::memory_order_release); - } - - uint64_t AddWriteIndexAcqRel(uint64_t value) { - return atomic::Add(&amd_queue_.write_dispatch_id, value, - std::memory_order_acq_rel); - } - - uint64_t AddWriteIndexAcquire(uint64_t value) { - return atomic::Add(&amd_queue_.write_dispatch_id, value, - std::memory_order_acquire); - } - - uint64_t AddWriteIndexRelaxed(uint64_t value) { - return atomic::Add(&amd_queue_.write_dispatch_id, value, - std::memory_order_relaxed); - } - - uint64_t AddWriteIndexRelease(uint64_t value) { - return atomic::Add(&amd_queue_.write_dispatch_id, value, - std::memory_order_release); - } - - hsa_status_t SetCUMasking(const uint32_t num_cu_mask_count, - const uint32_t* cu_mask) { - return HSA_STATUS_ERROR; - } - - bool active() const { return active_; } - - void* operator new(size_t size) { - return _aligned_malloc(size, HSA_QUEUE_ALIGN_BYTES); - } - - void* operator new(size_t size, void* ptr) { return ptr; } - - void operator delete(void* ptr) { _aligned_free(ptr); } - - void operator delete(void*, void*) {} - - private: - static const size_t kRingAlignment = 256; - const uint32_t size_; - bool active_; - void* ring_; - - DISALLOW_COPY_AND_ASSIGN(HostQueue); -}; -} // namespace core -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/hsa_api_trace_int.h b/runtime/hsa-runtime/core/inc/hsa_api_trace_int.h deleted file mode 100644 index e4aa194342..0000000000 --- a/runtime/hsa-runtime/core/inc/hsa_api_trace_int.h +++ /dev/null @@ -1,63 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_HSA_API_TRACE_INT_H -#define HSA_RUNTIME_CORE_INC_HSA_API_TRACE_INT_H - -#include "inc/hsa_api_trace.h" -#include "core/inc/hsa_internal.h" - -namespace core { -struct ApiTable { - ::ApiTable table; - ExtTable extension_backup; - - ApiTable(); - void Reset(); - void LinkExts(ExtTable* ptr); -}; - -extern ApiTable hsa_api_table_; -extern ApiTable hsa_internal_api_table_; -} - -#endif diff --git a/runtime/hsa-runtime/core/inc/hsa_ext_interface.h b/runtime/hsa-runtime/core/inc/hsa_ext_interface.h deleted file mode 100644 index f0692e1440..0000000000 --- a/runtime/hsa-runtime/core/inc/hsa_ext_interface.h +++ /dev/null @@ -1,80 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTME_CORE_INC_AMD_EXT_INTERFACE_H_ -#define HSA_RUNTME_CORE_INC_AMD_EXT_INTERFACE_H_ - -#include -#include - -#include "hsa_api_trace_int.h" - -#include "core/util/os.h" -#include "core/util/utils.h" - -namespace core { -struct ExtTableInternal : public ExtTable { - decltype(::hsa_amd_image_get_info_max_dim)* hsa_amd_image_get_info_max_dim_fn; - decltype(::hsa_amd_image_create)* hsa_amd_image_create_fn; -}; - -class ExtensionEntryPoints { - public: - ExtTableInternal table; - - ExtensionEntryPoints(); - - bool Load(std::string library_name); - void Unload(); - - private: - typedef void (*Load_t)(const ::ApiTable* table); - typedef void (*Unload_t)(); - - std::vector libs_; - - void InitTable(); - DISALLOW_COPY_AND_ASSIGN(ExtensionEntryPoints); -}; -} - -#endif diff --git a/runtime/hsa-runtime/core/inc/hsa_internal.h b/runtime/hsa-runtime/core/inc/hsa_internal.h deleted file mode 100644 index e1d3806425..0000000000 --- a/runtime/hsa-runtime/core/inc/hsa_internal.h +++ /dev/null @@ -1,347 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_INC_HSA_INTERNAL_H -#define HSA_RUNTIME_CORE_INC_HSA_INTERNAL_H - -#include "inc/hsa.h" - -namespace HSA -{ - - // Define core namespace interfaces - copy of function declarations in hsa.h - hsa_status_t HSA_API hsa_init(); - hsa_status_t HSA_API hsa_shut_down(); - hsa_status_t HSA_API - hsa_system_get_info(hsa_system_info_t attribute, void *value); - hsa_status_t HSA_API - hsa_system_extension_supported(uint16_t extension, uint16_t version_major, - uint16_t version_minor, bool *result); - hsa_status_t HSA_API - hsa_system_get_extension_table(uint16_t extension, uint16_t version_major, - uint16_t version_minor, void *table); - hsa_status_t HSA_API - hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void *data), - void *data); - hsa_status_t HSA_API hsa_agent_get_info(hsa_agent_t agent, - hsa_agent_info_t attribute, - void *value); - hsa_status_t HSA_API hsa_agent_get_exception_policies(hsa_agent_t agent, - hsa_profile_t profile, - uint16_t *mask); - hsa_status_t HSA_API - hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent, - uint16_t version_major, - uint16_t version_minor, bool *result); - hsa_status_t HSA_API - hsa_queue_create(hsa_agent_t agent, uint32_t size, hsa_queue_type_t type, - void (*callback)(hsa_status_t status, hsa_queue_t *source, - void *data), - void *data, uint32_t private_segment_size, - uint32_t group_segment_size, hsa_queue_t **queue); - hsa_status_t HSA_API - hsa_soft_queue_create(hsa_region_t region, uint32_t size, - hsa_queue_type_t type, uint32_t features, - hsa_signal_t completion_signal, hsa_queue_t **queue); - hsa_status_t HSA_API hsa_queue_destroy(hsa_queue_t *queue); - hsa_status_t HSA_API hsa_queue_inactivate(hsa_queue_t *queue); - uint64_t HSA_API hsa_queue_load_read_index_acquire(const hsa_queue_t *queue); - uint64_t HSA_API hsa_queue_load_read_index_relaxed(const hsa_queue_t *queue); - uint64_t HSA_API hsa_queue_load_write_index_acquire(const hsa_queue_t *queue); - uint64_t HSA_API hsa_queue_load_write_index_relaxed(const hsa_queue_t *queue); - void HSA_API hsa_queue_store_write_index_relaxed(const hsa_queue_t *queue, - uint64_t value); - void HSA_API hsa_queue_store_write_index_release(const hsa_queue_t *queue, - uint64_t value); - uint64_t HSA_API hsa_queue_cas_write_index_acq_rel(const hsa_queue_t *queue, - uint64_t expected, - uint64_t value); - uint64_t HSA_API hsa_queue_cas_write_index_acquire(const hsa_queue_t *queue, - uint64_t expected, - uint64_t value); - uint64_t HSA_API hsa_queue_cas_write_index_relaxed(const hsa_queue_t *queue, - uint64_t expected, - uint64_t value); - uint64_t HSA_API hsa_queue_cas_write_index_release(const hsa_queue_t *queue, - uint64_t expected, - uint64_t value); - uint64_t HSA_API - hsa_queue_add_write_index_acq_rel(const hsa_queue_t *queue, uint64_t value); - uint64_t HSA_API - hsa_queue_add_write_index_acquire(const hsa_queue_t *queue, uint64_t value); - uint64_t HSA_API - hsa_queue_add_write_index_relaxed(const hsa_queue_t *queue, uint64_t value); - uint64_t HSA_API - hsa_queue_add_write_index_release(const hsa_queue_t *queue, uint64_t value); - void HSA_API hsa_queue_store_read_index_relaxed(const hsa_queue_t *queue, - uint64_t value); - void HSA_API hsa_queue_store_read_index_release(const hsa_queue_t *queue, - uint64_t value); - hsa_status_t HSA_API hsa_agent_iterate_regions( - hsa_agent_t agent, - hsa_status_t (*callback)(hsa_region_t region, void *data), void *data); - hsa_status_t HSA_API hsa_region_get_info(hsa_region_t region, - hsa_region_info_t attribute, - void *value); - hsa_status_t HSA_API hsa_memory_register(void *address, size_t size); - hsa_status_t HSA_API hsa_memory_deregister(void *address, size_t size); - hsa_status_t HSA_API - hsa_memory_allocate(hsa_region_t region, size_t size, void **ptr); - hsa_status_t HSA_API hsa_memory_free(void *ptr); - hsa_status_t HSA_API hsa_memory_copy(void *dst, const void *src, size_t size); - hsa_status_t HSA_API hsa_memory_assign_agent(void *ptr, hsa_agent_t agent, - hsa_access_permission_t access); - hsa_status_t HSA_API - hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers, - const hsa_agent_t *consumers, hsa_signal_t *signal); - hsa_status_t HSA_API hsa_signal_destroy(hsa_signal_t signal); - hsa_signal_value_t HSA_API hsa_signal_load_relaxed(hsa_signal_t signal); - hsa_signal_value_t HSA_API hsa_signal_load_acquire(hsa_signal_t signal); - void HSA_API - hsa_signal_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_store_release(hsa_signal_t signal, hsa_signal_value_t value); - hsa_signal_value_t HSA_API - hsa_signal_wait_relaxed(hsa_signal_t signal, - hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout_hint, - hsa_wait_state_t wait_expectancy_hint); - hsa_signal_value_t HSA_API - hsa_signal_wait_acquire(hsa_signal_t signal, - hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout_hint, - hsa_wait_state_t wait_expectancy_hint); - void HSA_API - hsa_signal_and_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_and_acquire(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_and_release(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_and_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_or_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_or_acquire(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_or_release(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_or_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_xor_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_xor_acquire(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_xor_release(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_xor_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_add_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_add_acquire(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_add_release(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_add_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_subtract_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_subtract_acquire(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_subtract_release(hsa_signal_t signal, hsa_signal_value_t value); - void HSA_API - hsa_signal_subtract_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - hsa_signal_value_t HSA_API - hsa_signal_exchange_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - hsa_signal_value_t HSA_API - hsa_signal_exchange_acquire(hsa_signal_t signal, hsa_signal_value_t value); - hsa_signal_value_t HSA_API - hsa_signal_exchange_release(hsa_signal_t signal, hsa_signal_value_t value); - hsa_signal_value_t HSA_API - hsa_signal_exchange_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value); - hsa_signal_value_t HSA_API hsa_signal_cas_acquire(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value); - hsa_signal_value_t HSA_API hsa_signal_cas_release(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value); - hsa_signal_value_t HSA_API hsa_signal_cas_acq_rel(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value); - hsa_status_t hsa_isa_from_name( - const char *name, - hsa_isa_t *isa - ); - hsa_status_t HSA_API hsa_isa_get_info( - hsa_isa_t isa, - hsa_isa_info_t attribute, - uint32_t index, - void *value - ); - hsa_status_t hsa_isa_compatible( - hsa_isa_t code_object_isa, - hsa_isa_t agent_isa, - bool *result - ); - hsa_status_t HSA_API hsa_code_object_serialize( - hsa_code_object_t code_object, - hsa_status_t (*alloc_callback)( - size_t size, hsa_callback_data_t data, void **address - ), - hsa_callback_data_t callback_data, - const char *options, - void **serialized_code_object, - size_t *serialized_code_object_size - ); - hsa_status_t HSA_API hsa_code_object_deserialize( - void *serialized_code_object, - size_t serialized_code_object_size, - const char *options, - hsa_code_object_t *code_object - ); - hsa_status_t HSA_API hsa_code_object_destroy( - hsa_code_object_t code_object - ); - hsa_status_t HSA_API hsa_code_object_get_info( - hsa_code_object_t code_object, - hsa_code_object_info_t attribute, - void *value - ); - hsa_status_t HSA_API hsa_code_object_get_symbol( - hsa_code_object_t code_object, - const char *symbol_name, - hsa_code_symbol_t *symbol - ); - hsa_status_t HSA_API hsa_code_symbol_get_info( - hsa_code_symbol_t code_symbol, - hsa_code_symbol_info_t attribute, - void *value - ); - hsa_status_t HSA_API hsa_code_object_iterate_symbols( - hsa_code_object_t code_object, - hsa_status_t (*callback)( - hsa_code_object_t code_object, hsa_code_symbol_t symbol, void *data - ), - void *data - ); - hsa_status_t HSA_API hsa_executable_create( - hsa_profile_t profile, - hsa_executable_state_t executable_state, - const char *options, - hsa_executable_t *executable - ); - hsa_status_t HSA_API hsa_executable_destroy( - hsa_executable_t executable - ); - hsa_status_t HSA_API hsa_executable_load_code_object( - hsa_executable_t executable, - hsa_agent_t agent, - hsa_code_object_t code_object, - const char *options - ); - hsa_status_t HSA_API hsa_executable_freeze( - hsa_executable_t executable, - const char *options - ); - hsa_status_t HSA_API hsa_executable_get_info( - hsa_executable_t executable, - hsa_executable_info_t attribute, - void *value - ); - hsa_status_t HSA_API hsa_executable_global_variable_define( - hsa_executable_t executable, - const char *variable_name, - void *address - ); - hsa_status_t HSA_API hsa_executable_agent_global_variable_define( - hsa_executable_t executable, - hsa_agent_t agent, - const char *variable_name, - void *address - ); - hsa_status_t HSA_API hsa_executable_readonly_variable_define( - hsa_executable_t executable, - hsa_agent_t agent, - const char *variable_name, - void *address - ); - hsa_status_t HSA_API hsa_executable_validate( - hsa_executable_t executable, - uint32_t *result - ); - hsa_status_t HSA_API hsa_executable_get_symbol( - hsa_executable_t executable, - const char *module_name, - const char *symbol_name, - hsa_agent_t agent, - int32_t call_convention, - hsa_executable_symbol_t *symbol - ); - hsa_status_t HSA_API hsa_executable_symbol_get_info( - hsa_executable_symbol_t executable_symbol, - hsa_executable_symbol_info_t attribute, - void *value - ); - hsa_status_t HSA_API hsa_executable_iterate_symbols( - hsa_executable_t executable, - hsa_status_t (*callback)( - hsa_executable_t executable, hsa_executable_symbol_t symbol, void *data - ), - void *data - ); - hsa_status_t HSA_API - hsa_status_string(hsa_status_t status, const char **status_string); - -} - -#ifdef BUILDING_HSA_CORE_RUNTIME -//This using declaration is deliberate! -//We want unqualified name resolution to fail when building the runtime. This is a guard against accidental use of the intercept layer in the runtime. -using namespace HSA; -#endif - -#endif diff --git a/runtime/hsa-runtime/core/inc/hsa_table_interface.h b/runtime/hsa-runtime/core/inc/hsa_table_interface.h deleted file mode 100644 index 8769de8825..0000000000 --- a/runtime/hsa-runtime/core/inc/hsa_table_interface.h +++ /dev/null @@ -1,47 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "hsa_api_trace.h" - -void hsa_table_interface_init(const ApiTable* table); - -const ApiTable* hsa_table_interface_get_table(); diff --git a/runtime/hsa-runtime/core/inc/interrupt_signal.h b/runtime/hsa-runtime/core/inc/interrupt_signal.h deleted file mode 100644 index 19c2d59642..0000000000 --- a/runtime/hsa-runtime/core/inc/interrupt_signal.h +++ /dev/null @@ -1,206 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// HSA runtime C++ interface file. - -#ifndef HSA_RUNTME_CORE_INC_INTERRUPT_SIGNAL_H_ -#define HSA_RUNTME_CORE_INC_INTERRUPT_SIGNAL_H_ - -#include "hsakmt.h" - -#include "core/inc/runtime.h" -#include "core/inc/signal.h" -#include "core/util/utils.h" - -namespace core { - -/// @brief A Signal implementation using interrupts versus plain memory based. -/// Also see base class Signal. -/// -/// Breaks common/vendor separation - signals in general needs to be re-worked -/// at the foundation level to make sense in a multi-device system. -/// Supports only one waiter for now. -/// KFD changes are needed to support multiple waiters and have device -/// signaling. -class InterruptSignal : public Signal { - public: - static HsaEvent* CreateEvent(HSA_EVENTTYPE type, bool manual_reset); - static void DestroyEvent(HsaEvent* evt); - - /// @brief Determines if a Signal* can be safely converted to an - /// InterruptSignal* via static_cast. - static __forceinline bool IsType(Signal* ptr) { - return ptr->IsType(&rtti_id_); - } - - explicit InterruptSignal(hsa_signal_value_t initial_value, - HsaEvent* use_event = NULL); - - ~InterruptSignal(); - - // Below are various methods corresponding to the APIs, which load/store the - // signal value or modify the existing signal value automically and with - // specified memory ordering semantics. - - hsa_signal_value_t LoadRelaxed(); - - hsa_signal_value_t LoadAcquire(); - - void StoreRelaxed(hsa_signal_value_t value); - - void StoreRelease(hsa_signal_value_t value); - - hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout, hsa_wait_state_t wait_hint); - - hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout, hsa_wait_state_t wait_hint); - - void AndRelaxed(hsa_signal_value_t value); - - void AndAcquire(hsa_signal_value_t value); - - void AndRelease(hsa_signal_value_t value); - - void AndAcqRel(hsa_signal_value_t value); - - void OrRelaxed(hsa_signal_value_t value); - - void OrAcquire(hsa_signal_value_t value); - - void OrRelease(hsa_signal_value_t value); - - void OrAcqRel(hsa_signal_value_t value); - - void XorRelaxed(hsa_signal_value_t value); - - void XorAcquire(hsa_signal_value_t value); - - void XorRelease(hsa_signal_value_t value); - - void XorAcqRel(hsa_signal_value_t value); - - void AddRelaxed(hsa_signal_value_t value); - - void AddAcquire(hsa_signal_value_t value); - - void AddRelease(hsa_signal_value_t value); - - void AddAcqRel(hsa_signal_value_t value); - - void SubRelaxed(hsa_signal_value_t value); - - void SubAcquire(hsa_signal_value_t value); - - void SubRelease(hsa_signal_value_t value); - - void SubAcqRel(hsa_signal_value_t value); - - hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value); - - hsa_signal_value_t ExchAcquire(hsa_signal_value_t value); - - hsa_signal_value_t ExchRelease(hsa_signal_value_t value); - - hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value); - - hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected, - hsa_signal_value_t value); - - hsa_signal_value_t CasAcquire(hsa_signal_value_t expected, - hsa_signal_value_t value); - - hsa_signal_value_t CasRelease(hsa_signal_value_t expected, - hsa_signal_value_t value); - - hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected, - hsa_signal_value_t value); - - /// @brief See base class Signal. - __forceinline hsa_signal_value_t* ValueLocation() const { - return (hsa_signal_value_t*)&signal_.value; - } - - /// @brief See base class Signal. - __forceinline HsaEvent* EopEvent() { return event_; } - - // TODO(bwicakso) : work around for SDMA async copy. Bypass waiting on EOP - // event because SDMA copy does not handle interrupt yet. - __forceinline void DisableWaitEvent() { wait_on_event_ = false; } - - /// @brief prevent throwing exceptions - void* operator new(size_t size) { return malloc(size); } - - /// @brief prevent throwing exceptions - void operator delete(void* ptr) { free(ptr); } - - protected: - bool _IsA(rtti_t id) const { return id == &rtti_id_; } - - private: - /// @variable KFD event on which the interrupt signal is based on. - HsaEvent* event_; - - /// @variable Indicates whether the signal should release the event when it - /// closes or not. - bool free_event_; - - // TODO(bwicakso) : work around for SDMA async copy. Bypass waiting on EOP - // event because SDMA copy does not handle interrupt yet. - bool wait_on_event_; - - /// Used to obtain a globally unique value (address) for rtti. - static int rtti_id_; - - /// @brief Notify driver of signal value change if necessary. - __forceinline void SetEvent() { - std::atomic_signal_fence(std::memory_order_seq_cst); - if (InWaiting()) hsaKmtSetEvent(event_); - } - - DISALLOW_COPY_AND_ASSIGN(InterruptSignal); -}; - -} // namespace core -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/isa.h b/runtime/hsa-runtime/core/inc/isa.h deleted file mode 100644 index 46cdc85a1d..0000000000 --- a/runtime/hsa-runtime/core/inc/isa.h +++ /dev/null @@ -1,164 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_ISA_H_ -#define HSA_RUNTIME_CORE_ISA_H_ - -#include -#include -#include -#include -#include -#include "core/inc/amd_hsa_code.hpp" - -namespace core { - -// @class Isa -// @brief Instruction Set Architecture -class Isa final: public amd::hsa::common::Signed<0xB13594F2BD8F212D> { - public: - // @brief Isa's version type - typedef std::tuple Version; - - // @brief Default destructor - ~Isa() {} - - // @returns Handle equivalent of @p isa_object - static hsa_isa_t Handle(const Isa *isa_object) { - hsa_isa_t isa_handle = { reinterpret_cast(isa_object) }; - return isa_handle; - } - // @returns Object equivalend of @p isa_handle - static Isa *Object(const hsa_isa_t &isa_handle) { - Isa *isa_object = amd::hsa::common::ObjectAt(isa_handle.handle); - return isa_object; - } - - // @returns This Isa's version - const Version &version() const { - return version_; - } - - // @returns This Isa's vendor - std::string GetVendor() const { - return "AMD"; - } - // @returns This Isa's architecture - std::string GetArchitecture() const { - return "AMDGPU"; - } - // @returns This Isa's major version - int32_t GetMajorVersion() const { - return std::get<0>(version_); - } - // @returns This Isa's minor version - int32_t GetMinorVersion() const { - return std::get<1>(version_); - } - // @returns This Isa's stepping - int32_t GetStepping() const { - return std::get<2>(version_); - } - - // @returns True if this Isa is compatible with @p isa_object, false otherwise - bool IsCompatible(const Isa *isa_object) const { - assert(isa_object); - return version_ == isa_object->version_; - } - // @returns True if this Isa is compatible with @p isa_handle, false otherwise - bool IsCompatible(const hsa_isa_t &isa_handle) const { - assert(isa_handle.handle); - return IsCompatible(Object(isa_handle)); - } - // @brief Isa is always in valid state - bool IsValid() const { - return true; - } - - // @returns This Isa's full name - std::string GetFullName() const; - - // @brief Query value of requested @p attribute and record it in @p value - bool GetInfo(const hsa_isa_info_t &attribute, void *value) const; - - private: - // @brief Default constructor - Isa(): version_(Version(-1, -1, -1)) {} - - // @brief Construct from @p version - Isa(const Version &version): version_(version) {} - - // @brief Isa's version - Version version_; - - // @brief Isa's friends - friend class IsaRegistry; -}; // class Isa - -// @class IsaRegistry -// @brief Instruction Set Architecture Registry -class IsaRegistry final { - public: - // @returns Isa for requested @p full_name, null pointer if not supported - static const Isa *GetIsa(const std::string &full_name); - // @returns Isa for requested @p version, null pointer if not supported - static const Isa *GetIsa(const Isa::Version &version); - - private: - // @brief IsaRegistry's map type - typedef std::unordered_map IsaMap; - - // @brief Supported instruction set architectures - static const IsaMap supported_isas_; - - // @brief Default constructor - not available - IsaRegistry(); - // @brief Default destructor - not available - ~IsaRegistry(); - - // @returns Supported instruction set architectures - static const IsaMap GetSupportedIsas(); -}; // class IsaRegistry - -} // namespace core - -#endif // HSA_RUNTIME_CORE_ISA_HPP_ diff --git a/runtime/hsa-runtime/core/inc/memory_region.h b/runtime/hsa-runtime/core/inc/memory_region.h deleted file mode 100644 index ea37b6a2bc..0000000000 --- a/runtime/hsa-runtime/core/inc/memory_region.h +++ /dev/null @@ -1,109 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// HSA runtime C++ interface file. - -#ifndef HSA_RUNTME_CORE_INC_MEMORY_REGION_H_ -#define HSA_RUNTME_CORE_INC_MEMORY_REGION_H_ - -#include - -#include "core/inc/runtime.h" -#include "core/inc/agent.h" -#include "core/inc/checked.h" - -namespace core { -class Agent; - -class MemoryRegion : public Checked<0x9C961F19EE175BB3> { - public: - MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owner) - : fine_grain_(fine_grain), full_profile_(full_profile), owner_(owner) { - assert(owner_ != NULL); - } - - virtual ~MemoryRegion() {} - - // Convert this object into hsa_region_t. - static __forceinline hsa_region_t Convert(MemoryRegion* region) { - const hsa_region_t region_handle = { - static_cast(reinterpret_cast(region))}; - return region_handle; - } - - static __forceinline const hsa_region_t Convert(const MemoryRegion* region) { - const hsa_region_t region_handle = { - static_cast(reinterpret_cast(region))}; - return region_handle; - } - - // Convert hsa_region_t into MemoryRegion *. - static __forceinline MemoryRegion* Convert(hsa_region_t region) { - return reinterpret_cast(region.handle); - } - - virtual hsa_status_t Allocate(size_t size, void** address) const = 0; - - virtual hsa_status_t Free(void* address, size_t size) const = 0; - - // Translate memory properties into HSA region attribute. - virtual hsa_status_t GetInfo(hsa_region_info_t attribute, - void* value) const = 0; - - virtual hsa_status_t AssignAgent(void* ptr, size_t size, const Agent& agent, - hsa_access_permission_t access) const = 0; - - __forceinline bool fine_grain() const { return fine_grain_; } - - __forceinline bool full_profile() const { return full_profile_; } - - __forceinline core::Agent* owner() const { return owner_; } - - private: - const bool fine_grain_; - const bool full_profile_; - - core::Agent* owner_; -}; -} // namespace core - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/queue.h b/runtime/hsa-runtime/core/inc/queue.h deleted file mode 100644 index c1a56bded5..0000000000 --- a/runtime/hsa-runtime/core/inc/queue.h +++ /dev/null @@ -1,322 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// HSA runtime C++ interface file. - -#ifndef HSA_RUNTME_CORE_INC_COMMAND_QUEUE_H_ -#define HSA_RUNTME_CORE_INC_COMMAND_QUEUE_H_ -#include - -#include "core/common/shared.h" - -#include "core/inc/runtime.h" -#include "core/inc/checked.h" - -#include "core/util/utils.h" - -#include "inc/amd_hsa_queue.h" - -namespace core { -struct AqlPacket { - - union { - hsa_kernel_dispatch_packet_t dispatch; - hsa_barrier_and_packet_t barrier_and; - hsa_barrier_or_packet_t barrier_or; - hsa_agent_dispatch_packet_t agent; - }; - - uint8_t type() { - return ((dispatch.header >> HSA_PACKET_HEADER_TYPE) & - ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1)); - } - - bool IsValid() { - const uint8_t packet_type = dispatch.header >> HSA_PACKET_HEADER_TYPE; - return (packet_type > HSA_PACKET_TYPE_INVALID && - packet_type <= HSA_PACKET_TYPE_BARRIER_OR); - } - - std::string string() const { - std::stringstream string; - uint8_t type = ((dispatch.header >> HSA_PACKET_HEADER_TYPE) & - ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1)); - - const char* type_names[] = { - "HSA_PACKET_TYPE_VENDOR_SPECIFIC", "HSA_PACKET_TYPE_INVALID", - "HSA_PACKET_TYPE_KERNEL_DISPATCH", "HSA_PACKET_TYPE_BARRIER_AND", - "HSA_PACKET_TYPE_AGENT_DISPATCH", "HSA_PACKET_TYPE_BARRIER_OR"}; - - string << "type: " << type_names[type] - << "\nbarrier: " << ((dispatch.header >> HSA_PACKET_HEADER_BARRIER) & - ((1 << HSA_PACKET_HEADER_WIDTH_BARRIER) - 1)) - << "\nacquire: " - << ((dispatch.header >> HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) & - ((1 << HSA_PACKET_HEADER_WIDTH_ACQUIRE_FENCE_SCOPE) - 1)) - << "\nrelease: " - << ((dispatch.header >> HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE) & - ((1 << HSA_PACKET_HEADER_WIDTH_RELEASE_FENCE_SCOPE) - 1)); - - if (type == HSA_PACKET_TYPE_KERNEL_DISPATCH) { - string << "\nDim: " << dispatch.setup - << "\nworkgroup_size: " << dispatch.workgroup_size_x << ", " - << dispatch.workgroup_size_y << ", " << dispatch.workgroup_size_z - << "\ngrid_size: " << dispatch.grid_size_x << ", " - << dispatch.grid_size_y << ", " << dispatch.grid_size_z - << "\nprivate_size: " << dispatch.private_segment_size - << "\ngroup_size: " << dispatch.group_segment_size - << "\nkernel_object: " << dispatch.kernel_object - << "\nkern_arg: " << dispatch.kernarg_address - << "\nsignal: " << dispatch.completion_signal.handle; - } - - if ((type == HSA_PACKET_TYPE_BARRIER_AND) || - (type == HSA_PACKET_TYPE_BARRIER_OR)) { - for (int i = 0; i < 5; i++) - string << "\ndep[" << i << "]: " << barrier_and.dep_signal[i].handle; - string << "\nsignal: " << barrier_and.completion_signal.handle; - } - - return string.str(); - } -}; - -class Queue; - -/// @brief Helper structure to simplify conversion of amd_queue_t and -/// core::Queue object. -struct SharedQueue { - amd_queue_t amd_queue; - Queue* core_queue; -}; - -/// @brief Class Queue which encapsulate user mode queues and -/// provides Api to access its Read, Write indices using Acquire, -/// Release and Relaxed semantics. -/* -Queue is intended to be an pure interface class and may be wrapped or replaced -by tools. -All funtions other than Convert and public_handle must be virtual. -*/ -class Queue : public Checked<0xFA3906A679F9DB49>, - public Shared { - public: - Queue() : Shared(), amd_queue_(shared_object()->amd_queue) { - if (!Shared::IsSharedObjectAllocationValid()) { - return; - } - - shared_object()->core_queue = this; - - public_handle_ = Convert(this); - } - - virtual ~Queue() {} - - /// @brief Returns the handle of Queue's public data type - /// - /// @param queue Pointer to an instance of Queue implementation object - /// - /// @return hsa_queue_t * Pointer to the public data type of a queue - static __forceinline hsa_queue_t* Convert(Queue* queue) { - return ((queue != NULL) && (queue->IsSharedObjectAllocationValid())) - ? &queue->amd_queue_.hsa_queue - : NULL; - } - - /// @brief Transform the public data type of a Queue's data type into an - // instance of it Queue class object - /// - /// @param queue Handle of public data type of a queue - /// - /// @return Queue * Pointer to the Queue's implementation object - static __forceinline Queue* Convert(const hsa_queue_t* queue) { - return (queue != NULL) - ? reinterpret_cast( - reinterpret_cast(queue) - - (reinterpret_cast( - &reinterpret_cast(1234) - ->amd_queue.hsa_queue) - - uintptr_t(1234)))->core_queue - : NULL; - } - - /// @brief Inactivate the queue object. Once inactivate a - /// queue cannot be used anymore and must be destroyed - /// - /// @return hsa_status_t Status of request - virtual hsa_status_t Inactivate() = 0; - - /// @brief Reads the Read Index of Queue using Acquire semantics - /// - /// @return uint64_t Value of Read index - virtual uint64_t LoadReadIndexAcquire() = 0; - - /// @brief Reads the Read Index of Queue using Relaxed semantics - /// - /// @return uint64_t Value of Read index - virtual uint64_t LoadReadIndexRelaxed() = 0; - - /// @brief Reads the Write Index of Queue using Acquire semantics - /// - /// @return uint64_t Value of Write index - virtual uint64_t LoadWriteIndexAcquire() = 0; - - /// Reads the Write Index of Queue using Relaxed semantics - /// - /// @return uint64_t Value of Write index - virtual uint64_t LoadWriteIndexRelaxed() = 0; - - /// @brief Updates the Read Index of Queue using Relaxed semantics - /// - /// @param value New value of Read index to update - virtual void StoreReadIndexRelaxed(uint64_t value) = 0; - - /// @brief Updates the Read Index of Queue using Release semantics - /// - /// @param value New value of Read index to update - virtual void StoreReadIndexRelease(uint64_t value) = 0; - - /// @brief Updates the Write Index of Queue using Relaxed semantics - /// - /// @param value New value of Write index to update - virtual void StoreWriteIndexRelaxed(uint64_t value) = 0; - - /// @brief Updates the Write Index of Queue using Release semantics - /// - /// @param value New value of Write index to update - virtual void StoreWriteIndexRelease(uint64_t value) = 0; - - /// @brief Compares and swaps Write index using Acquire and Release semantics - /// - /// @param expected Current value of write index - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - virtual uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) = 0; - - /// @brief Compares and swaps Write index using Acquire semantics - /// - /// @param expected Current value of write index - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - virtual uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) = 0; - - /// @brief Compares and swaps Write index using Relaxed semantics - /// - /// @param expected Current value of write index - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - virtual uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) = 0; - - /// @brief Compares and swaps Write index using Release semantics - /// - /// @param expected Current value of write index - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - virtual uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) = 0; - - /// @brief Updates the Write index using Acquire and Release semantics - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - virtual uint64_t AddWriteIndexAcqRel(uint64_t value) = 0; - - /// @brief Updates the Write index using Acquire semantics - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - virtual uint64_t AddWriteIndexAcquire(uint64_t value) = 0; - - /// @brief Updates the Write index using Relaxed semantics - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - virtual uint64_t AddWriteIndexRelaxed(uint64_t value) = 0; - - /// @brief Updates the Write index using Release semantics - /// - /// @param value Value of new write index - /// - /// @return uint64_t Value of write index before the update - virtual uint64_t AddWriteIndexRelease(uint64_t value) = 0; - - /// @brief Set CU Masking - /// - /// @param num_cu_mask_count size of mask bit array - /// - /// @param cu_mask pointer to cu mask - /// - /// @return hsa_status_t - virtual hsa_status_t SetCUMasking(const uint32_t num_cu_mask_count, - const uint32_t* cu_mask) = 0; - - // Handle of AMD Queue struct - amd_queue_t& amd_queue_; - - hsa_queue_t* public_handle() const { return public_handle_; } - - protected: - static void set_public_handle(Queue* ptr, hsa_queue_t* handle) { - ptr->do_set_public_handle(handle); - } - virtual void do_set_public_handle(hsa_queue_t* handle) { - public_handle_ = handle; - } - hsa_queue_t* public_handle_; - - private: - DISALLOW_COPY_AND_ASSIGN(Queue); -}; -} - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/registers.h b/runtime/hsa-runtime/core/inc/registers.h deleted file mode 100644 index d2bffb654d..0000000000 --- a/runtime/hsa-runtime/core/inc/registers.h +++ /dev/null @@ -1,204 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// This file is used only for open source cmake builds, if we hardcode the -// register values in amd_aql_queue.cpp then this file won't be required. For -// now we are using this file where register details are spelled out in the -// structs/unions below. -#ifndef HSA_RUNTME_CORE_INC_REGISTERS_H_ -#define HSA_RUNTME_CORE_INC_REGISTERS_H_ - -typedef enum SQ_RSRC_BUF_TYPE { -SQ_RSRC_BUF = 0x00000000, -SQ_RSRC_BUF_RSVD_1 = 0x00000001, -SQ_RSRC_BUF_RSVD_2 = 0x00000002, -SQ_RSRC_BUF_RSVD_3 = 0x00000003, -} SQ_RSRC_BUF_TYPE; - -typedef enum BUF_DATA_FORMAT { -BUF_DATA_FORMAT_INVALID = 0x00000000, -BUF_DATA_FORMAT_8 = 0x00000001, -BUF_DATA_FORMAT_16 = 0x00000002, -BUF_DATA_FORMAT_8_8 = 0x00000003, -BUF_DATA_FORMAT_32 = 0x00000004, -BUF_DATA_FORMAT_16_16 = 0x00000005, -BUF_DATA_FORMAT_10_11_11 = 0x00000006, -BUF_DATA_FORMAT_11_11_10 = 0x00000007, -BUF_DATA_FORMAT_10_10_10_2 = 0x00000008, -BUF_DATA_FORMAT_2_10_10_10 = 0x00000009, -BUF_DATA_FORMAT_8_8_8_8 = 0x0000000a, -BUF_DATA_FORMAT_32_32 = 0x0000000b, -BUF_DATA_FORMAT_16_16_16_16 = 0x0000000c, -BUF_DATA_FORMAT_32_32_32 = 0x0000000d, -BUF_DATA_FORMAT_32_32_32_32 = 0x0000000e, -BUF_DATA_FORMAT_RESERVED_15 = 0x0000000f, -} BUF_DATA_FORMAT; - -typedef enum BUF_NUM_FORMAT { -BUF_NUM_FORMAT_UNORM = 0x00000000, -BUF_NUM_FORMAT_SNORM = 0x00000001, -BUF_NUM_FORMAT_USCALED = 0x00000002, -BUF_NUM_FORMAT_SSCALED = 0x00000003, -BUF_NUM_FORMAT_UINT = 0x00000004, -BUF_NUM_FORMAT_SINT = 0x00000005, -BUF_NUM_FORMAT_SNORM_OGL__SI__CI = 0x00000006, -BUF_NUM_FORMAT_RESERVED_6__VI = 0x00000006, -BUF_NUM_FORMAT_FLOAT = 0x00000007, -} BUF_NUM_FORMAT; - -typedef enum SQ_SEL_XYZW01 { -SQ_SEL_0 = 0x00000000, -SQ_SEL_1 = 0x00000001, -SQ_SEL_RESERVED_0 = 0x00000002, -SQ_SEL_RESERVED_1 = 0x00000003, -SQ_SEL_X = 0x00000004, -SQ_SEL_Y = 0x00000005, -SQ_SEL_Z = 0x00000006, -SQ_SEL_W = 0x00000007, -} SQ_SEL_XYZW01; - - union COMPUTE_TMPRING_SIZE { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int WAVES : 12; - unsigned int WAVESIZE : 13; - unsigned int : 7; -#elif defined(BIGENDIAN_CPU) - unsigned int : 7; - unsigned int WAVESIZE : 13; - unsigned int WAVES : 12; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - - union SQ_BUF_RSRC_WORD0 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int BASE_ADDRESS : 32; -#elif defined(BIGENDIAN_CPU) - unsigned int BASE_ADDRESS : 32; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - - union SQ_BUF_RSRC_WORD1 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int BASE_ADDRESS_HI : 16; - unsigned int STRIDE : 14; - unsigned int CACHE_SWIZZLE : 1; - unsigned int SWIZZLE_ENABLE : 1; -#elif defined(BIGENDIAN_CPU) - unsigned int SWIZZLE_ENABLE : 1; - unsigned int CACHE_SWIZZLE : 1; - unsigned int STRIDE : 14; - unsigned int BASE_ADDRESS_HI : 16; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - - union SQ_BUF_RSRC_WORD2 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int NUM_RECORDS : 32; -#elif defined(BIGENDIAN_CPU) - unsigned int NUM_RECORDS : 32; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - - union SQ_BUF_RSRC_WORD3 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int DST_SEL_X : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_W : 3; - unsigned int NUM_FORMAT : 3; - unsigned int DATA_FORMAT : 4; - unsigned int ELEMENT_SIZE : 2; - unsigned int INDEX_STRIDE : 2; - unsigned int ADD_TID_ENABLE : 1; - unsigned int ATC__CI__VI : 1; - unsigned int HASH_ENABLE : 1; - unsigned int HEAP : 1; - unsigned int MTYPE__CI__VI : 3; - unsigned int TYPE : 2; -#elif defined(BIGENDIAN_CPU) - unsigned int TYPE : 2; - unsigned int MTYPE__CI__VI : 3; - unsigned int HEAP : 1; - unsigned int HASH_ENABLE : 1; - unsigned int ATC__CI__VI : 1; - unsigned int ADD_TID_ENABLE : 1; - unsigned int INDEX_STRIDE : 2; - unsigned int ELEMENT_SIZE : 2; - unsigned int DATA_FORMAT : 4; - unsigned int NUM_FORMAT : 3; - unsigned int DST_SEL_W : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_X : 3; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h deleted file mode 100644 index c59a6ee0d2..0000000000 --- a/runtime/hsa-runtime/core/inc/runtime.h +++ /dev/null @@ -1,498 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// HSA runtime C++ interface file. - -#ifndef HSA_RUNTME_CORE_INC_RUNTIME_H_ -#define HSA_RUNTME_CORE_INC_RUNTIME_H_ - -#include -#include - -#include "core/inc/hsa_ext_interface.h" -#include "core/inc/hsa_internal.h" - -#include "core/inc/agent.h" -#include "core/inc/memory_region.h" -#include "core/inc/signal.h" -#include "core/util/utils.h" -#include "core/util/locks.h" -#include "core/util/os.h" - -#include "core/inc/amd_loader_context.hpp" -#include "amd_hsa_code.hpp" - -//---------------------------------------------------------------------------// -// Constants // -//---------------------------------------------------------------------------// - -#define HSA_ARGUMENT_ALIGN_BYTES 16 -#define HSA_QUEUE_ALIGN_BYTES 64 -#define HSA_PACKET_ALIGN_BYTES 64 - -namespace core { -extern bool g_use_interrupt_wait; - -/// @brief Runtime class provides the following functions: -/// - open and close connection to kernel driver. -/// - load supported extension library (image and finalizer). -/// - load tools library. -/// - expose supported agents. -/// - allocate and free memory. -/// - memory copy and fill. -/// - grant access to memory (dgpu memory pool extension). -/// - maintain loader state. -/// - monitor asynchronous event from agent. -class Runtime { - public: - /// @brief Structure to describe connectivity between agents. - struct LinkInfo { - uint32_t num_hop; - hsa_amd_memory_pool_link_info_t info; - }; - - /// @brief Open connection to kernel driver and increment reference count. - /// @retval True if the connection to kernel driver is successfully opened. - static bool Acquire(); - - /// @brief Checks if connection to kernel driver is opened. - /// @retval True if the connection to kernel driver is opened. - static bool IsOpen(); - - // @brief Callback handler for VM fault access. - static bool VMFaultHandler(hsa_signal_value_t val, void* arg); - - /// @brief Singleton object of the runtime. - static Runtime* runtime_singleton_; - - /// @brief Decrement reference count and close connection to kernel driver. - /// @retval True if reference count is larger than 0. - bool Release(); - - /// @brief Insert agent into agent list ::agents_. - /// @param [in] agent Pointer to the agent object. - void RegisterAgent(Agent* agent); - - /// @brief Delete all agent objects from ::agents_. - void DestroyAgents(); - - /// @brief Set the number of links connecting the agents in the platform. - void SetLinkCount(size_t num_link); - - /// @brief Register link information connecting @p node_id_from and @p - /// node_id_to. - /// @param [in] node_id_from Node id of the source node. - /// @param [in] node_id_to Node id of the destination node. - /// @param [in] link_info The link information between source and destination - /// nodes. - void RegisterLinkInfo(uint32_t node_id_from, uint32_t node_id_to, - uint32_t num_hop, - hsa_amd_memory_pool_link_info_t& link_info); - - /// @brief Query link information between two nodes. - /// @param [in] node_id_from Node id of the source node. - /// @param [in] node_id_to Node id of the destination node. - /// @retval The link information between source and destination nodes. - const LinkInfo& GetLinkInfo(uint32_t node_id_from, uint32_t node_id_to); - - /// @brief Invoke the user provided call back for each agent in the agent - /// list. - /// - /// @param [in] callback User provided callback function. - /// @param [in] data User provided pointer as input for @p callback. - /// - /// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed - /// agent returns ::HSA_STATUS_SUCCESS. - hsa_status_t IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent, - void* data), - void* data); - - /// @brief Allocate memory on a particular region. - /// - /// @param [in] region Pointer to region object. - /// @param [in] size Allocation size in bytes. - /// @param [out] address Pointer to store the allocation result. - /// - /// @retval ::HSA_STATUS_SUCCESS If allocation is successful. - hsa_status_t AllocateMemory(const MemoryRegion* region, size_t size, - void** address); - - /// @brief Allocate memory on a particular region with option to restrict - /// access to the owning agent. - /// - /// @param [in] restrict_access If true, the allocation result would only be - /// accessible to the agent(s) that own the region object. - /// @param [in] region Pointer to region object. - /// @param [in] size Allocation size in bytes. - /// @param [out] address Pointer to store the allocation result. - /// - /// @retval ::HSA_STATUS_SUCCESS If allocation is successful. - hsa_status_t AllocateMemory(bool restrict_access, const MemoryRegion* region, - size_t size, void** address); - - /// @brief Free memory previously allocated with AllocateMemory. - /// - /// @param [in] ptr Address of the memory to be freed. - /// - /// @retval ::HSA_STATUS_ERROR If @p ptr is not the address of previous - /// allocation via ::core::Runtime::AllocateMemory - /// @retval ::HSA_STATUS_SUCCESS if @p ptr is successfully released. - hsa_status_t FreeMemory(void* ptr); - - /// @brief Blocking memory copy from src to dst. - /// - /// @param [in] dst Memory address of the destination. - /// @param [in] src Memory address of the source. - /// @param [in] size Copy size in bytes. - /// - /// @retval ::HSA_STATUS_SUCCESS if memory copy is successful and completed. - hsa_status_t CopyMemory(void* dst, const void* src, size_t size); - - /// @brief Non-blocking memory copy from src to dst. - /// - /// @details The memory copy will be performed after all signals in - /// @p dep_signals have value of 0. On completion @p completion_signal - /// will be decremented. - /// - /// @param [in] dst Memory address of the destination. - /// @param [in] dst_agent Agent object associated with the destination. This - /// agent should be able to access the destination and source. - /// @param [in] src Memory address of the source. - /// @param [in] src_agent Agent object associated with the source. This - /// agent should be able to access the destination and source. - /// @param [in] size Copy size in bytes. - /// @param [in] dep_signals Array of signal dependency. - /// @param [in] completion_signal Completion signal object. - /// - /// @retval ::HSA_STATUS_SUCCESS if copy command has been submitted - /// successfully to the agent DMA queue. - hsa_status_t CopyMemory(void* dst, core::Agent& dst_agent, const void* src, - core::Agent& src_agent, size_t size, - std::vector& dep_signals, - core::Signal& completion_signal); - - /// @brief Fill the first @p count of uint32_t in ptr with value. - /// - /// @param [in] ptr Memory address to be filled. - /// @param [in] value The value/pattern that will be used to set @p ptr. - /// @param [in] count Number of uint32_t element to be set. - /// - /// @retval ::HSA_STATUS_SUCCESS if memory fill is successful and completed. - hsa_status_t FillMemory(void* ptr, uint32_t value, size_t count); - - /// @brief Set agents as the whitelist to access ptr. - /// - /// @param [in] num_agents The number of agent handles in @p agents array. - /// @param [in] agents Agent handle array. - /// @param [in] ptr Pointer of memory previously allocated via - /// core::Runtime::AllocateMemory. - /// - /// @retval ::HSA_STATUS_SUCCESS The whitelist has been configured - /// successfully and all agents in the @p agents could start accessing @p ptr. - hsa_status_t AllowAccess(uint32_t num_agents, const hsa_agent_t* agents, - const void* ptr); - - /// @brief Query system information. - /// - /// @param [in] attribute System info attribute to query. - /// @param [out] value Pointer to store the attribute value. - /// - /// @retval HSA_STATUS_SUCCESS The attribute is valid and the @p value is - /// set. - hsa_status_t GetSystemInfo(hsa_system_info_t attribute, void* value); - - /// @brief Query next available queue id. - /// - /// @retval Next available queue id. - uint32_t GetQueueId(); - - /// @brief Register a callback function @p handler that is associated with - /// @p signal to asynchronous event monitor thread. - /// - /// @param [in] signal Signal handle associated with @p handler. - /// @param [in] cond The condition to execute the @p handler. - /// @param [in] value The value to compare with @p signal value. If the - /// comparison satisfy @p cond, the @p handler will be called. - /// @param [in] arg Pointer to the argument that will be provided to @p - /// handler. - /// - /// @retval ::HSA_STATUS_SUCCESS Registration is successful. - hsa_status_t SetAsyncSignalHandler(hsa_signal_t signal, - hsa_signal_condition_t cond, - hsa_signal_value_t value, - hsa_amd_signal_handler handler, void* arg); - - hsa_status_t InteropMap(uint32_t num_agents, Agent** agents, - int interop_handle, uint32_t flags, size_t* size, - void** ptr, size_t* metadata_size, - const void** metadata); - - hsa_status_t InteropUnmap(void* ptr); - - const std::vector& cpu_agents() { return cpu_agents_; } - - const std::vector& gpu_agents() { return gpu_agents_; } - - - const std::vector& gpu_ids() { return gpu_ids_; } - - Agent* blit_agent() { return blit_agent_; } - - Agent* host_agent() { return host_agent_; } - - const std::vector& system_regions_fine() const { - return system_regions_fine_; - } - - const std::vector& system_regions_coarse() const { - return system_regions_coarse_; - } - - amd::hsa::loader::Loader* loader() { return loader_; } - - amd::LoaderContext* loader_context() { return &loader_context_; } - - amd::hsa::code::AmdHsaCodeManager* code_manager() { return &code_manager_; } - - std::function& system_allocator() { - return system_allocator_; - } - - std::function& system_deallocator() { - return system_deallocator_; - } - - ExtensionEntryPoints extensions_; - - protected: - static void AsyncEventsLoop(void*); - - struct AllocationRegion { - AllocationRegion() : region(NULL), assigned_agent_(NULL), size(0) {} - AllocationRegion(const MemoryRegion* region_arg, size_t size_arg) - : region(region_arg), assigned_agent_(NULL), size(size_arg) {} - - const MemoryRegion* region; - const Agent* assigned_agent_; - size_t size; - }; - - struct AsyncEventsControl { - AsyncEventsControl() : async_events_thread_(NULL) {} - void Shutdown(); - - hsa_signal_t wake; - os::Thread async_events_thread_; - KernelMutex lock; - bool exit; - }; - - struct AsyncEvents { - void PushBack(hsa_signal_t signal, hsa_signal_condition_t cond, - hsa_signal_value_t value, hsa_amd_signal_handler handler, - void* arg); - - void CopyIndex(size_t dst, size_t src); - - size_t Size(); - - void PopBack(); - - void Clear(); - - std::vector signal_; - std::vector cond_; - std::vector value_; - std::vector handler_; - std::vector arg_; - }; - - // Will be created before any user could call hsa_init but also could be - // destroyed before incorrectly written programs call hsa_shutdown. - static KernelMutex bootstrap_lock_; - - Runtime(); - - Runtime(const Runtime&); - - Runtime& operator=(const Runtime&); - - ~Runtime() {} - - /// @brief Open connection to kernel driver. - void Load(); - - /// @brief Close connection to kernel driver and cleanup resources. - void Unload(); - - /// @brief Dynamically load extension libraries (images, finalizer) and - /// call OnLoad method on each loaded library. - void LoadExtensions(); - - /// @brief Call OnUnload method on each extension library then close it. - void UnloadExtensions(); - - /// @brief Dynamically load tool libraries and call OnUnload method on each - /// loaded library. - void LoadTools(); - - /// @brief Call OnUnload method of each tool library. - void UnloadTools(); - - /// @brief Close tool libraries. - void CloseTools(); - - // @brief Binds virtual memory access fault handler to this node. - void BindVmFaultHandler(); - - /// @brief Blocking memory copy from src to dst. One of the src or dst - /// is user pointer. A particular setup need to be made if the DMA queue - /// for the memory copy belongs to a dGPU agent. E.g: pin the user pointer - /// before copying, or using a staging buffer. - /// - /// @param [in] dst Memory address of the destination. - /// @param [in] src Memory address of the source. - /// @param [in] size Copy size in bytes. - /// @param [in] dst_malloc If true, then @p dst is the user pointer. Otherwise - /// @p src is the user pointer. - /// - /// @retval ::HSA_STATUS_SUCCESS if memory copy is successful and completed. - hsa_status_t CopyMemoryHostAlloc(void* dst, const void* src, size_t size, - bool dst_malloc); - - /// @brief Get the index of ::link_matrix_. - /// @param [in] node_id_from Node id of the source node. - /// @param [in] node_id_to Node id of the destination node. - /// @retval Index in ::link_matrix_. - uint32_t GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to); - - // Mutex object to protect multithreaded access to ::Acquire and ::Release. - KernelMutex kernel_lock_; - - // Mutex object to protect multithreaded access to ::allocation_map_. - KernelMutex memory_lock_; - - // Array containing tools library handles. - std::vector tool_libs_; - - // Agent list containing all CPU agents in the platform. - std::vector cpu_agents_; - - // Agent list containing all compatible GPU agents in the platform. - std::vector gpu_agents_; - - // Agent list containing all compatible gpu agent ids in the platform. - std::vector gpu_ids_; - - // List of all fine grain system memory region in the platform. - std::vector system_regions_fine_; - - // List of all coarse grain system memory region in the platform. - std::vector system_regions_coarse_; - - // Matrix of IO link. - std::vector link_matrix_; - - // Loader instance. - amd::hsa::loader::Loader* loader_; - - // Loader context. - amd::LoaderContext loader_context_; - - // Code object manager. - amd::hsa::code::AmdHsaCodeManager code_manager_; - - // Contains the region, address, and size of previously allocated memory. - std::map allocation_map_; - - // Allocator using ::system_region_ - std::function system_allocator_; - - // Deallocator using ::system_region_ - std::function system_deallocator_; - - // Pointer to a host/cpu agent object. - Agent* host_agent_; - - // Pointer to DMA agent. - Agent* blit_agent_; - - AsyncEventsControl async_events_control_; - - AsyncEvents async_events_; - - AsyncEvents new_async_events_; - - // Queue id counter. - uint32_t queue_count_; - - // Starting address of SVM address space. - // On APU the cpu and gpu could access the area inside starting and end of - // the SVM address space. - // On dGPU, only the gpu is guaranteed to have access to the area inside the - // SVM address space, since it maybe backed by private gpu VRAM. - uintptr_t start_svm_address_; - - // End address of SVM address space. - // start_svm_address_ + size - uintptr_t end_svm_address_; - - // System clock frequency. - uint64_t sys_clock_freq_; - - // @brief AMD HSA event to monitor for virtual memory access fault. - HsaEvent* vm_fault_event_; - - // @brief HSA signal to contain the VM fault event. - Signal* vm_fault_signal_; - - // Holds reference count to runtime object. - volatile uint32_t ref_count_; - - // Frees runtime memory when the runtime library is unloaded if safe to do so. - // Failure to release the runtime indicates an incorrect application but is - // common (example: calls library routines at process exit). - friend class RuntimeCleanup; -}; - -} // namespace core -#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/signal.h b/runtime/hsa-runtime/core/inc/signal.h deleted file mode 100644 index e6509421cc..0000000000 --- a/runtime/hsa-runtime/core/inc/signal.h +++ /dev/null @@ -1,269 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// HSA runtime C++ interface file. - -#ifndef HSA_RUNTME_CORE_INC_SIGNAL_H_ -#define HSA_RUNTME_CORE_INC_SIGNAL_H_ - -#include "hsakmt.h" - -#include "core/common/shared.h" - -#include "core/inc/runtime.h" -#include "core/inc/checked.h" - -#include "core/util/utils.h" - -#include "inc/amd_hsa_signal.h" - -namespace core { -class Signal; - -/// @brief Helper structure to simplify conversion of amd_signal_t and -/// core::Signal object. -struct SharedSignal { - amd_signal_t amd_signal; - Signal* core_signal; -}; - -/// @brief An abstract base class which helps implement the public hsa_signal_t -/// type (an opaque handle) and its associated APIs. At its core, signal uses -/// a 32 or 64 bit value. This value can be waitied on or signaled atomically -/// using specified memory ordering semantics. -class Signal : public Checked<0x71FCCA6A3D5D5276>, - public Shared { - public: - /// @brief Constructor initializes the signal with initial value. - explicit Signal(hsa_signal_value_t initial_value) - : Shared(), signal_(shared_object()->amd_signal) { - if (!Shared::IsSharedObjectAllocationValid()) { - invalid_ = true; - return; - } - - shared_object()->core_signal = this; - - signal_.kind = AMD_SIGNAL_KIND_INVALID; - signal_.value = initial_value; - invalid_ = false; - waiting_ = 0; - retained_ = 0; - } - - virtual ~Signal() { signal_.kind = AMD_SIGNAL_KIND_INVALID; } - - bool IsValid() const { - if (CheckedType::IsValid() && !invalid_) return true; - return false; - } - - /// @brief Converts from this implementation class to the public - /// hsa_signal_t type - an opaque handle. - static __forceinline hsa_signal_t Convert(Signal* signal) { - const uint64_t handle = - (signal != NULL && signal->IsValid()) - ? static_cast( - reinterpret_cast(&signal->signal_)) - : 0; - const hsa_signal_t signal_handle = {handle}; - return signal_handle; - } - - /// @brief Converts from this implementation class to the public - /// hsa_signal_t type - an opaque handle. - static __forceinline const hsa_signal_t Convert(const Signal* signal) { - const uint64_t handle = - (signal != NULL && signal->IsValid()) - ? static_cast( - reinterpret_cast(&signal->signal_)) - : 0; - const hsa_signal_t signal_handle = {handle}; - return signal_handle; - } - - /// @brief Converts from public hsa_signal_t type (an opaque handle) to - /// this implementation class object. - static __forceinline Signal* Convert(hsa_signal_t signal) { - return (signal.handle != 0) - ? reinterpret_cast( - static_cast(signal.handle) - - (reinterpret_cast( - &reinterpret_cast(1234)->amd_signal) - - uintptr_t(1234)))->core_signal - : NULL; - } - - // Below are various methods corresponding to the APIs, which load/store the - // signal value or modify the existing signal value automically and with - // specified memory ordering semantics. - virtual hsa_signal_value_t LoadRelaxed() = 0; - virtual hsa_signal_value_t LoadAcquire() = 0; - - virtual void StoreRelaxed(hsa_signal_value_t value) = 0; - virtual void StoreRelease(hsa_signal_value_t value) = 0; - - virtual hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout, - hsa_wait_state_t wait_hint) = 0; - virtual hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout, - hsa_wait_state_t wait_hint) = 0; - - virtual void AndRelaxed(hsa_signal_value_t value) = 0; - virtual void AndAcquire(hsa_signal_value_t value) = 0; - virtual void AndRelease(hsa_signal_value_t value) = 0; - virtual void AndAcqRel(hsa_signal_value_t value) = 0; - - virtual void OrRelaxed(hsa_signal_value_t value) = 0; - virtual void OrAcquire(hsa_signal_value_t value) = 0; - virtual void OrRelease(hsa_signal_value_t value) = 0; - virtual void OrAcqRel(hsa_signal_value_t value) = 0; - - virtual void XorRelaxed(hsa_signal_value_t value) = 0; - virtual void XorAcquire(hsa_signal_value_t value) = 0; - virtual void XorRelease(hsa_signal_value_t value) = 0; - virtual void XorAcqRel(hsa_signal_value_t value) = 0; - - virtual void AddRelaxed(hsa_signal_value_t value) = 0; - virtual void AddAcquire(hsa_signal_value_t value) = 0; - virtual void AddRelease(hsa_signal_value_t value) = 0; - virtual void AddAcqRel(hsa_signal_value_t value) = 0; - - virtual void SubRelaxed(hsa_signal_value_t value) = 0; - virtual void SubAcquire(hsa_signal_value_t value) = 0; - virtual void SubRelease(hsa_signal_value_t value) = 0; - virtual void SubAcqRel(hsa_signal_value_t value) = 0; - - virtual hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value) = 0; - virtual hsa_signal_value_t ExchAcquire(hsa_signal_value_t value) = 0; - virtual hsa_signal_value_t ExchRelease(hsa_signal_value_t value) = 0; - virtual hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value) = 0; - - virtual hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected, - hsa_signal_value_t value) = 0; - virtual hsa_signal_value_t CasAcquire(hsa_signal_value_t expected, - hsa_signal_value_t value) = 0; - virtual hsa_signal_value_t CasRelease(hsa_signal_value_t expected, - hsa_signal_value_t value) = 0; - virtual hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected, - hsa_signal_value_t value) = 0; - - //------------------------- - // implementation specific - //------------------------- - typedef void* rtti_t; - - /// @brief Returns the address of the value. - virtual hsa_signal_value_t* ValueLocation() const = 0; - - /// @brief Applies only to InterrupEvent type, returns the event used to. - /// Returns NULL for DefaultEvent Type. - virtual HsaEvent* EopEvent() = 0; - - /// @brief Waits until any signal in the list satisfies its condition or - /// timeout is reached. - /// Returns the index of a satisfied signal. Returns -1 on timeout and - /// errors. - static uint32_t WaitAny(uint32_t signal_count, hsa_signal_t* hsa_signals, - hsa_signal_condition_t* conds, - hsa_signal_value_t* values, uint64_t timeout_hint, - hsa_wait_state_t wait_hint, - hsa_signal_value_t* satisfying_value); - - __forceinline bool IsType(rtti_t id) { return _IsA(id); } - - /// @brief Allows special case interaction with signal destruction cleanup. - void Retain() { atomic::Increment(&retained_); } - void Release() { atomic::Decrement(&retained_); } - - /// @brief Checks if signal is currently in use such that it should not be - /// deleted. - bool InUse() const { return (retained_ != 0) || (waiting_ != 0); } - - /// @brief Checks if signal is currently in use by a wait API. - bool InWaiting() const { return waiting_ != 0; } - - /// @brief Structure which defines key signal elements like type and value. - /// Address of this struct is used as a value for the opaque handle of type - /// hsa_signal_t provided to the public API. - amd_signal_t& signal_; - - protected: - /// @brief Simple RTTI type checking helper - /// Returns true if the object can be converted to the query type via - /// static_cast. - /// Do not use directly. Use IsType in the desired derived type instead. - virtual bool _IsA(rtti_t id) const = 0; - - /// @variable Indicates if signal is valid or not. - volatile bool invalid_; - - /// @variable Indicates number of runtime threads waiting on this signal. - /// Value of zero means no waits. - volatile uint32_t waiting_; - - volatile uint32_t retained_; - - private: - DISALLOW_COPY_AND_ASSIGN(Signal); -}; - -struct hsa_signal_handle { - hsa_signal_t signal; - - hsa_signal_handle() {} - hsa_signal_handle(hsa_signal_t Signal) { signal = Signal; } - operator hsa_signal_t() { return signal; } - Signal* operator->() { return core::Signal::Convert(signal); } -}; -static_assert( - sizeof(hsa_signal_handle) == sizeof(hsa_signal_t), - "hsa_signal_handle and hsa_signal_t must have identical binary layout."); -static_assert( - sizeof(hsa_signal_handle[2]) == sizeof(hsa_signal_t[2]), - "hsa_signal_handle and hsa_signal_t must have identical binary layout."); - -} // namespace core -#endif // header guard diff --git a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp deleted file mode 100644 index 870410a69c..0000000000 --- a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ /dev/null @@ -1,856 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/amd_aql_queue.h" - -#ifdef __linux__ -#include -#include -#include -#include -#include -#endif - -#ifdef _WIN32 -#include -#endif - -#include -#include - -#include "core/inc/runtime.h" -#include "core/inc/amd_memory_region.h" -#include "core/inc/signal.h" -#include "core/inc/queue.h" -#include "core/util/utils.h" -#include "core/inc/registers.h" -#include "core/inc/interrupt_signal.h" - -namespace amd { -// Queue::amd_queue_ is cache-aligned for performance. -const uint32_t kAmdQueueAlignBytes = 0x40; - -HsaEvent* AqlQueue::queue_event_ = NULL; -volatile uint32_t AqlQueue::queue_count_ = 0; -KernelMutex AqlQueue::queue_lock_; -int AqlQueue::rtti_id_; - -void* AqlQueue::operator new(size_t size) { - // Align base to 64B to enforce amd_queue_ member alignment. - return _aligned_malloc(size, kAmdQueueAlignBytes); -} - -void AqlQueue::operator delete(void* ptr) { _aligned_free(ptr); } - -AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id, - ScratchInfo& scratch, core::HsaEventCallback callback, - void* err_data, bool is_kv) - : Queue(), - Signal(0), - ring_buf_(NULL), - ring_buf_alloc_bytes_(0), - queue_id_(HSA_QUEUEID(-1)), - valid_(false), - agent_(agent), - queue_scratch_(scratch), - errors_callback_(callback), - errors_data_(err_data), - is_kv_queue_(is_kv) { - if (!Queue::Shared::IsSharedObjectAllocationValid()) { - return; - } - - hsa_status_t stat = agent_->GetInfo(HSA_AGENT_INFO_PROFILE, &agent_profile_); - assert(stat == HSA_STATUS_SUCCESS); - - const core::Isa* isa = agent_->isa(); - - // When queue_full_workaround_ is set to 1, the ring buffer is internally - // doubled in size. Virtual addresses in the upper half of the ring allocation - // are mapped to the same set of pages backing the lower half. - // Values written to the HW doorbell are modulo the doubled size. - // This allows the HW to accept (doorbell == last_doorbell + queue_size). - // This workaround is required for GFXIP 7 and GFXIP 8 ASICs. - queue_full_workaround_ = - (isa->GetMajorVersion() == 7 || isa->GetMajorVersion() == 8) - ? 1 - : 0; - - // Identify doorbell semantics for this agent. - doorbell_type_ = agent->properties().Capability.ui32.DoorbellType; - - // Queue size is a function of several restrictions. - const uint32_t min_pkts = ComputeRingBufferMinPkts(); - const uint32_t max_pkts = ComputeRingBufferMaxPkts(); - - // Apply sizing constraints to the ring buffer. - uint32_t queue_size_pkts = uint32_t(req_size_pkts); - queue_size_pkts = Min(queue_size_pkts, max_pkts); - queue_size_pkts = Max(queue_size_pkts, min_pkts); - - uint32_t queue_size_bytes = queue_size_pkts * sizeof(core::AqlPacket); - if ((queue_size_bytes & (queue_size_bytes - 1)) != 0) return; - - // Allocate the AQL packet ring buffer. - AllocRegisteredRingBuffer(queue_size_pkts); - if (ring_buf_ == NULL) return; - MAKE_NAMED_SCOPE_GUARD(RingGuard, [&]() { FreeRegisteredRingBuffer(); }); - - // Fill the ring buffer with ALWAYS_RESERVED packet headers. - // Leave packet content uninitialized to help track errors. - for (uint32_t pkt_id = 0; pkt_id < queue_size_pkts; ++pkt_id) { - ((uint32_t*)ring_buf_)[16 * pkt_id] = HSA_PACKET_TYPE_INVALID; - } - - // Zero the amd_queue_ structure to clear RPTR/WPTR before queue attach. - memset(&amd_queue_, 0, sizeof(amd_queue_)); - - // Initialize and map a HW AQL queue. - HsaQueueResource queue_rsrc = {0}; - queue_rsrc.Queue_read_ptr_aql = (uint64_t*)&amd_queue_.read_dispatch_id; - queue_rsrc.Queue_write_ptr_aql = - (uint64_t*)&amd_queue_.max_legacy_doorbell_dispatch_id_plus_1; - - HSAKMT_STATUS kmt_status; - kmt_status = hsaKmtCreateQueue(node_id, HSA_QUEUE_COMPUTE_AQL, 100, - HSA_QUEUE_PRIORITY_NORMAL, ring_buf_, - ring_buf_alloc_bytes_, NULL, &queue_rsrc); - if (kmt_status != HSAKMT_STATUS_SUCCESS) return; - queue_id_ = queue_rsrc.QueueId; - MAKE_NAMED_SCOPE_GUARD(QueueGuard, [&]() { hsaKmtDestroyQueue(queue_id_); }); - - // Populate doorbell signal structure. - memset(&signal_, 0, sizeof(signal_)); - signal_.kind = AMD_SIGNAL_KIND_LEGACY_DOORBELL; - signal_.legacy_hardware_doorbell_ptr = - (volatile uint32_t*)queue_rsrc.Queue_DoorBell; - signal_.queue_ptr = &amd_queue_; - - // Populate amd_queue_ structure. - amd_queue_.hsa_queue.type = HSA_QUEUE_TYPE_MULTI; - amd_queue_.hsa_queue.features = HSA_QUEUE_FEATURE_KERNEL_DISPATCH; - amd_queue_.hsa_queue.base_address = ring_buf_; - amd_queue_.hsa_queue.doorbell_signal = Signal::Convert(this); - amd_queue_.hsa_queue.size = queue_size_pkts; - amd_queue_.hsa_queue.id = core::Runtime::runtime_singleton_->GetQueueId(); - amd_queue_.read_dispatch_id_field_base_byte_offset = uint32_t( - uintptr_t(&amd_queue_.read_dispatch_id) - uintptr_t(&amd_queue_)); - - const auto& props = agent->properties(); - amd_queue_.max_cu_id = (props.NumFComputeCores / props.NumSIMDPerCU) - 1; - amd_queue_.max_wave_id = props.MaxWavesPerSIMD - 1; - -#ifdef HSA_LARGE_MODEL - AMD_HSA_BITS_SET(amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64, - 1); -#else - AMD_HSA_BITS_SET(amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64, - 0); -#endif - - // Populate scratch resource descriptor in amd_queue_. - SQ_BUF_RSRC_WORD0 srd0; - SQ_BUF_RSRC_WORD1 srd1; - SQ_BUF_RSRC_WORD2 srd2; - SQ_BUF_RSRC_WORD3 srd3; - uintptr_t scratch_base = uintptr_t(queue_scratch_.queue_base); - uint32_t scratch_base_hi = 0; - -#ifdef HSA_LARGE_MODEL - scratch_base_hi = uint32_t(scratch_base >> 32); -#endif - - srd0.bits.BASE_ADDRESS = uint32_t(scratch_base); - srd1.bits.BASE_ADDRESS_HI = scratch_base_hi; - srd1.bits.STRIDE = 0; - srd1.bits.CACHE_SWIZZLE = 0; - srd1.bits.SWIZZLE_ENABLE = 1; - srd2.bits.NUM_RECORDS = uint32_t(queue_scratch_.size); - srd3.bits.DST_SEL_X = SQ_SEL_X; - srd3.bits.DST_SEL_Y = SQ_SEL_Y; - srd3.bits.DST_SEL_Z = SQ_SEL_Z; - srd3.bits.DST_SEL_W = SQ_SEL_W; - srd3.bits.NUM_FORMAT = BUF_NUM_FORMAT_UINT; - srd3.bits.DATA_FORMAT = BUF_DATA_FORMAT_32; - srd3.bits.ELEMENT_SIZE = 1; // 4 - srd3.bits.INDEX_STRIDE = 3; // 64 - srd3.bits.ADD_TID_ENABLE = 1; - srd3.bits.ATC__CI__VI = (agent_profile_ == HSA_PROFILE_FULL) ? 1 : 0; - srd3.bits.HASH_ENABLE = 0; - srd3.bits.HEAP = 0; - srd3.bits.MTYPE__CI__VI = 0; - srd3.bits.TYPE = SQ_RSRC_BUF; - - amd_queue_.scratch_resource_descriptor[0] = srd0.u32All; - amd_queue_.scratch_resource_descriptor[1] = srd1.u32All; - amd_queue_.scratch_resource_descriptor[2] = srd2.u32All; - amd_queue_.scratch_resource_descriptor[3] = srd3.u32All; - - // Populate flat scratch parameters in amd_queue_. - amd_queue_.scratch_backing_memory_location = - queue_scratch_.queue_process_offset; - amd_queue_.scratch_backing_memory_byte_size = queue_scratch_.size; - amd_queue_.scratch_workitem_byte_size = - uint32_t(queue_scratch_.size_per_thread); - - // Set concurrent wavefront limits when scratch is being used. - COMPUTE_TMPRING_SIZE tmpring_size = {0}; - - if (queue_scratch_.size != 0) { - tmpring_size.bits.WAVES = - (queue_scratch_.size / queue_scratch_.size_per_thread / 64); - tmpring_size.bits.WAVESIZE = - (((64 * queue_scratch_.size_per_thread) + 1023) / 1024); - } - - amd_queue_.compute_tmpring_size = tmpring_size.u32All; - - // Set group and private memory apertures in amd_queue_. - auto& regions = agent->regions(); - - for (int i = 0; i < regions.size(); i++) { - const MemoryRegion* amdregion; - amdregion = static_cast(regions[i]); - uint64_t base = amdregion->GetBaseAddress(); - - if (amdregion->IsLDS()) { -#ifdef HSA_LARGE_MODEL - amd_queue_.group_segment_aperture_base_hi = - uint32_t(uintptr_t(base) >> 32); -#else - amd_queue_.group_segment_aperture_base_hi = uint32_t(base); -#endif - } - - if (amdregion->IsScratch()) { -#ifdef HSA_LARGE_MODEL - amd_queue_.private_segment_aperture_base_hi = - uint32_t(uintptr_t(base) >> 32); -#else - amd_queue_.private_segment_aperture_base_hi = uint32_t(base); -#endif - } - } - - assert(amd_queue_.group_segment_aperture_base_hi != NULL && - "No group region found."); - - if (os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH") == "1") { - assert(amd_queue_.private_segment_aperture_base_hi != NULL && - "No private region found."); - } - - MAKE_NAMED_SCOPE_GUARD(EventGuard, [&]() { - ScopedAcquire _lock(&queue_lock_); - queue_count_--; - if (queue_count_ == 0) { - core::InterruptSignal::DestroyEvent(queue_event_); - queue_event_ = NULL; - } - }); - - MAKE_NAMED_SCOPE_GUARD(SignalGuard, [&]() { - HSA::hsa_signal_destroy(amd_queue_.queue_inactive_signal); - }); -#if defined(HSA_LARGE_MODEL) && defined(__linux__) - if (core::g_use_interrupt_wait) { - { - ScopedAcquire _lock(&queue_lock_); - queue_count_++; - if (queue_event_ == NULL) { - assert(queue_count_ == 1 && - "Inconsistency in queue event reference counting found.\n"); - - queue_event_ = - core::InterruptSignal::CreateEvent(HSA_EVENTTYPE_SIGNAL, false); - if (queue_event_ == NULL) return; - } - } - auto signal = new core::InterruptSignal(0, queue_event_); - amd_queue_.queue_inactive_signal = core::InterruptSignal::Convert(signal); - if (hsa_amd_signal_async_handler( - amd_queue_.queue_inactive_signal, HSA_SIGNAL_CONDITION_NE, 0, - DynamicScratchHandler, this) != HSA_STATUS_SUCCESS) - return; - } else { - EventGuard.Dismiss(); - SignalGuard.Dismiss(); - } -#else - EventGuard.Dismiss(); - SignalGuard.Dismiss(); -#endif - - valid_ = true; - active_ = 1; - - RingGuard.Dismiss(); - QueueGuard.Dismiss(); - EventGuard.Dismiss(); - SignalGuard.Dismiss(); -} - -AqlQueue::~AqlQueue() { - if (!IsValid()) { - return; - } - - if (active_ == 1) hsaKmtDestroyQueue(queue_id_); - - FreeRegisteredRingBuffer(); - agent_->ReleaseQueueScratch(queue_scratch_.queue_base); - HSA::hsa_signal_destroy(amd_queue_.queue_inactive_signal); -#if defined(HSA_LARGE_MODEL) && defined(__linux__) - if (core::g_use_interrupt_wait) { - ScopedAcquire lock(&queue_lock_); - queue_count_--; - if (queue_count_ == 0) { - core::InterruptSignal::DestroyEvent(queue_event_); - queue_event_ = NULL; - } - } -#endif -} - -uint64_t AqlQueue::LoadReadIndexAcquire() { - return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_acquire); -} - -uint64_t AqlQueue::LoadReadIndexRelaxed() { - return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_relaxed); -} - -uint64_t AqlQueue::LoadWriteIndexAcquire() { - return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_acquire); -} - -uint64_t AqlQueue::LoadWriteIndexRelaxed() { - return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_relaxed); -} - -void AqlQueue::StoreWriteIndexRelaxed(uint64_t value) { - atomic::Store(&amd_queue_.write_dispatch_id, value, - std::memory_order_relaxed); -} - -void AqlQueue::StoreWriteIndexRelease(uint64_t value) { - atomic::Store(&amd_queue_.write_dispatch_id, value, - std::memory_order_release); -} - -uint64_t AqlQueue::CasWriteIndexAcqRel(uint64_t expected, uint64_t value) { - return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, - std::memory_order_acq_rel); -} -uint64_t AqlQueue::CasWriteIndexAcquire(uint64_t expected, uint64_t value) { - return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, - std::memory_order_acquire); -} -uint64_t AqlQueue::CasWriteIndexRelaxed(uint64_t expected, uint64_t value) { - return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, - std::memory_order_relaxed); -} -uint64_t AqlQueue::CasWriteIndexRelease(uint64_t expected, uint64_t value) { - return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, - std::memory_order_release); -} - -uint64_t AqlQueue::AddWriteIndexAcqRel(uint64_t value) { - return atomic::Add(&amd_queue_.write_dispatch_id, value, - std::memory_order_acq_rel); -} - -uint64_t AqlQueue::AddWriteIndexAcquire(uint64_t value) { - return atomic::Add(&amd_queue_.write_dispatch_id, value, - std::memory_order_acquire); -} - -uint64_t AqlQueue::AddWriteIndexRelaxed(uint64_t value) { - return atomic::Add(&amd_queue_.write_dispatch_id, value, - std::memory_order_relaxed); -} - -uint64_t AqlQueue::AddWriteIndexRelease(uint64_t value) { - return atomic::Add(&amd_queue_.write_dispatch_id, value, - std::memory_order_release); -} - -void AqlQueue::StoreRelaxed(hsa_signal_value_t value) { - // Acquire spinlock protecting the legacy doorbell. - while (atomic::Cas(&amd_queue_.legacy_doorbell_lock, 1U, 0U, - std::memory_order_acquire) != 0) { - os::YieldThread(); - } - -#ifdef HSA_LARGE_MODEL - // AMD hardware convention expects the packet index to point beyond - // the last packet to be processed. Packet indices written to the - // max_legacy_doorbell_dispatch_id_plus_1 field must conform to this - // expectation, since this field is used as the HW-visible write index. - uint64_t legacy_dispatch_id = value + 1; -#else - // In the small machine model it is difficult to distinguish packet index - // wrap at 2^32 packets from a backwards doorbell. Instead, ignore the - // doorbell value and submit the write index instead. It is OK to issue - // a doorbell for packets in the INVALID or ALWAYS_RESERVED state. - // The HW will stall on these packets until they enter a valid state. - uint64_t legacy_dispatch_id = amd_queue_.write_dispatch_id; - - // The write index may extend more than a full queue of packets beyond - // the read index. The hardware can process at most a full queue of packets - // at a time. Clamp the write index appropriately. A doorbell for the - // remaining packets is guaranteed to be sent at a later time. - legacy_dispatch_id = - Min(legacy_dispatch_id, - uint64_t(amd_queue_.read_dispatch_id) + amd_queue_.hsa_queue.size); -#endif - - // Discard backwards and duplicate doorbells. - if (legacy_dispatch_id > amd_queue_.max_legacy_doorbell_dispatch_id_plus_1) { - // Record the most recent packet index used in a doorbell submission. - // This field will be interpreted as a write index upon HW queue connect. - // Must be visible to the HW before sending the doorbell to avoid a race. - atomic::Store(&amd_queue_.max_legacy_doorbell_dispatch_id_plus_1, - legacy_dispatch_id, std::memory_order_relaxed); - - // Write the dispatch id to the hardware MMIO doorbell. - if (doorbell_type_ == 0) { - // The legacy GFXIP 7 hardware doorbell expects: - // 1. Packet index wrapped to a point within the ring buffer - // 2. Packet index converted to DWORD count - uint64_t queue_size_mask = - ((1 + queue_full_workaround_) * amd_queue_.hsa_queue.size) - 1; - - *(volatile uint32_t*)signal_.legacy_hardware_doorbell_ptr = - uint32_t((legacy_dispatch_id & queue_size_mask) * - (sizeof(core::AqlPacket) / sizeof(uint32_t))); - } else if (doorbell_type_ == 1) { - *(volatile uint32_t*)signal_.legacy_hardware_doorbell_ptr = - uint32_t(legacy_dispatch_id); - } else { - assert(false && "Agent has unsupported doorbell semantics"); - } - } - - // Release spinlock protecting the legacy doorbell. - atomic::Store(&amd_queue_.legacy_doorbell_lock, 0U, - std::memory_order_release); -} - -void AqlQueue::StoreRelease(hsa_signal_value_t value) { - std::atomic_thread_fence(std::memory_order_release); - StoreRelaxed(value); -} - -uint32_t AqlQueue::ComputeRingBufferMinPkts() { - // From CP_HQD_PQ_CONTROL.QUEUE_SIZE specification: - // Size of the primary queue (PQ) will be: 2^(HQD_QUEUE_SIZE+1) DWs. - // Min Size is 7 (2^8 = 256 DWs) and max size is 29 (2^30 = 1 G-DW) - uint32_t min_bytes = 0x400; - - if (queue_full_workaround_ == 1) { -#ifdef __linux__ - // Double mapping requires one page of backing store. - min_bytes = Max(min_bytes, 0x1000U); -#endif -#ifdef _WIN32 - // Shared memory mapping is at system allocation granularity. - SYSTEM_INFO sys_info; - GetNativeSystemInfo(&sys_info); - min_bytes = Max(min_bytes, uint32_t(sys_info.dwAllocationGranularity)); -#endif - } - - return uint32_t(min_bytes / sizeof(core::AqlPacket)); -} - -uint32_t AqlQueue::ComputeRingBufferMaxPkts() { - // From CP_HQD_PQ_CONTROL.QUEUE_SIZE specification: - // Size of the primary queue (PQ) will be: 2^(HQD_QUEUE_SIZE+1) DWs. - // Min Size is 7 (2^8 = 256 DWs) and max size is 29 (2^30 = 1 G-DW) - uint64_t max_bytes = 0x100000000; - - if (queue_full_workaround_ == 1) { - // Double mapping halves maximum size. - max_bytes /= 2; - } - - return uint32_t(max_bytes / sizeof(core::AqlPacket)); -} - -void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) { - if (agent_profile_ == HSA_PROFILE_FULL) { - // Compute the physical and virtual size of the queue. - uint32_t ring_buf_phys_size_bytes = - uint32_t(queue_size_pkts * sizeof(core::AqlPacket)); - ring_buf_alloc_bytes_ = 2 * ring_buf_phys_size_bytes; - -#ifdef __linux__ - // Create a system-unique shared memory path for this thread. - char ring_buf_shm_path[16]; - pid_t sys_unique_tid = pid_t(syscall(__NR_gettid)); - sprintf(ring_buf_shm_path, "/%u", sys_unique_tid); - - int ring_buf_shm_fd = -1; - void* reserve_va = NULL; - - do { - // Create a shared memory object to back the ring buffer. - ring_buf_shm_fd = shm_open(ring_buf_shm_path, O_CREAT | O_RDWR | O_EXCL, - S_IRUSR | S_IWUSR); - if (ring_buf_shm_fd == -1) { - break; - } - if (posix_fallocate(ring_buf_shm_fd, 0, ring_buf_phys_size_bytes) != 0) - break; - - // Reserve a VA range twice the size of the physical backing store. - reserve_va = mmap(NULL, ring_buf_alloc_bytes_, PROT_NONE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - assert(reserve_va != MAP_FAILED && "mmap failed"); - - // Remap the lower and upper halves of the VA range. - // Map both halves to the shared memory backing store. - // If the GPU device is KV, do not set PROT_EXEC flag. - void* ring_buf_lower_half = NULL; - void* ring_buf_upper_half = NULL; - if (is_kv_queue_) { - ring_buf_lower_half = - mmap(reserve_va, ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0); - assert(ring_buf_lower_half != MAP_FAILED && "mmap failed"); - - ring_buf_upper_half = - mmap((void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes), - ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0); - assert(ring_buf_upper_half != MAP_FAILED && "mmap failed"); - } else { - ring_buf_lower_half = mmap(reserve_va, ring_buf_phys_size_bytes, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0); - assert(ring_buf_lower_half != MAP_FAILED && "mmap failed"); - - ring_buf_upper_half = - mmap((void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes), - ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0); - assert(ring_buf_upper_half != MAP_FAILED && "mmap failed"); - } - - // Release explicit reference to shared memory object. - shm_unlink(ring_buf_shm_path); - close(ring_buf_shm_fd); - - // Successfully created mapping. - ring_buf_ = ring_buf_lower_half; - return; - } while (false); - - // Resource cleanup on failure. - if (reserve_va) munmap(reserve_va, ring_buf_alloc_bytes_); - if (ring_buf_shm_fd != -1) { - shm_unlink(ring_buf_shm_path); - close(ring_buf_shm_fd); - } -#endif -#ifdef _WIN32 - HANDLE ring_buf_mapping = INVALID_HANDLE_VALUE; - void* ring_buf_lower_half = NULL; - void* ring_buf_upper_half = NULL; - - do { - // Create a page file mapping to back the ring buffer. - ring_buf_mapping = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, - PAGE_EXECUTE_READWRITE | SEC_COMMIT, - 0, ring_buf_phys_size_bytes, NULL); - if (ring_buf_mapping == NULL) { - break; - } - - // Retry until obtaining an appropriate virtual address mapping. - for (int num_attempts = 0; num_attempts < 1000; ++num_attempts) { - // Find a virtual address range twice the size of the file mapping. - void* reserve_va = - VirtualAllocEx(GetCurrentProcess(), NULL, ring_buf_alloc_bytes_, - MEM_TOP_DOWN | MEM_RESERVE, PAGE_EXECUTE_READWRITE); - if (reserve_va == NULL) { - break; - } - VirtualFree(reserve_va, 0, MEM_RELEASE); - - // Map the ring buffer into the free virtual range. - // This may fail: another thread can allocate in this range. - ring_buf_lower_half = MapViewOfFileEx( - ring_buf_mapping, FILE_MAP_ALL_ACCESS | FILE_MAP_EXECUTE, 0, 0, - ring_buf_phys_size_bytes, reserve_va); - - if (ring_buf_lower_half == NULL) { - // Virtual range allocated by another thread, try again. - continue; - } - - ring_buf_upper_half = MapViewOfFileEx( - ring_buf_mapping, FILE_MAP_ALL_ACCESS | FILE_MAP_EXECUTE, 0, 0, - ring_buf_phys_size_bytes, - (void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes)); - - if (ring_buf_upper_half == NULL) { - // Virtual range allocated by another thread, try again. - UnmapViewOfFile(ring_buf_lower_half); - continue; - } - - // Successfully created mapping. - ring_buf_ = ring_buf_lower_half; - break; - } - - if (ring_buf_ == NULL) { - break; - } - - // Release file mapping (reference counted by views). - CloseHandle(ring_buf_mapping); - - // Don't register the memory: causes a failure in the KFD. - // Instead use implicit registration to access the ring buffer. - return; - } while (false); - - // Resource cleanup on failure. - UnmapViewOfFile(ring_buf_upper_half); - UnmapViewOfFile(ring_buf_lower_half); - CloseHandle(ring_buf_mapping); -#endif - } else { - // Allocate storage for the ring buffer. - HsaMemFlags flags; - flags.Value = 0; - flags.ui32.HostAccess = 1; - flags.ui32.AtomicAccessPartial = 1; - flags.ui32.ExecuteAccess = 1; - flags.ui32.AQLQueueMemory = 1; - - ring_buf_alloc_bytes_ = AlignUp( - queue_size_pkts * static_cast(sizeof(core::AqlPacket)), 4096); - auto err = hsaKmtAllocMemory(agent_->node_id(), ring_buf_alloc_bytes_, - flags, (void**)&ring_buf_); - - if (err != HSAKMT_STATUS_SUCCESS) { - assert(false && "AQL queue memory allocation failure."); - return; - } - - HSAuint64 alternate_va; - err = hsaKmtMapMemoryToGPU(ring_buf_, ring_buf_alloc_bytes_, &alternate_va); - - if (err != HSAKMT_STATUS_SUCCESS) { - assert(false && "AQL queue memory map failure."); - hsaKmtFreeMemory(ring_buf_, ring_buf_alloc_bytes_); - ring_buf_ = NULL; - return; - } - - ring_buf_alloc_bytes_ = 2 * ring_buf_alloc_bytes_; - } -} - -void AqlQueue::FreeRegisteredRingBuffer() { - if (agent_profile_ == HSA_PROFILE_FULL) { -#ifdef __linux__ - munmap(ring_buf_, ring_buf_alloc_bytes_); -#endif -#ifdef _WIN32 - UnmapViewOfFile(ring_buf_); - UnmapViewOfFile( - (void*)(uintptr_t(ring_buf_) + (ring_buf_alloc_bytes_ / 2))); -#endif - } else { - hsaKmtUnmapMemoryToGPU(ring_buf_); - hsaKmtFreeMemory(ring_buf_, ring_buf_alloc_bytes_ / 2); - } - - ring_buf_ = NULL; - ring_buf_alloc_bytes_ = 0; -} - -hsa_status_t AqlQueue::Inactivate() { - int32_t active = atomic::Exchange((volatile int32_t*)&active_, 0); - if (active == 1) hsaKmtDestroyQueue(this->queue_id_); - return HSA_STATUS_SUCCESS; -} - -bool AqlQueue::DynamicScratchHandler(hsa_signal_value_t error_code, void* arg) { - AqlQueue* queue = (AqlQueue*)arg; - - if ((error_code & 1) == 1) { - // Insufficient scratch - recoverable - auto& scratch = queue->queue_scratch_; - - queue->agent_->ReleaseQueueScratch(scratch.queue_base); - - const core::AqlPacket& pkt = - ((core::AqlPacket*)queue->amd_queue_.hsa_queue - .base_address)[queue->amd_queue_.read_dispatch_id]; - - uint32_t scratch_request = pkt.dispatch.private_segment_size; - - scratch.size_per_thread = - Max(uint32_t(scratch.size_per_thread * 2), scratch_request); - // Align whole waves to 1KB. - scratch.size_per_thread = AlignUp(scratch.size_per_thread, 16); - scratch.size = scratch.size_per_thread * (queue->amd_queue_.max_cu_id + 1) * - 32 * 64; // TODO: replace constants. - - // printf("Growing scratch to %u - %u\n", uint32_t(scratch.size_per_thread), - // uint32_t(scratch.size)); - - queue->agent_->AcquireQueueScratch(scratch); - if (scratch.queue_base == NULL) { - // Out of scratch - promote error and invalidate queue - queue->Inactivate(); - if (queue->errors_callback_ != NULL) - queue->errors_callback_(HSA_STATUS_ERROR_OUT_OF_RESOURCES, - queue->public_handle(), queue->errors_data_); - return false; - } - - SQ_BUF_RSRC_WORD0 srd0; - SQ_BUF_RSRC_WORD2 srd2; - uintptr_t base = (uintptr_t)scratch.queue_base; - - srd0.u32All = queue->amd_queue_.scratch_resource_descriptor[0]; - srd2.u32All = queue->amd_queue_.scratch_resource_descriptor[2]; - - srd0.bits.BASE_ADDRESS = uint32_t(base); - srd2.bits.NUM_RECORDS = uint32_t(scratch.size); - - queue->amd_queue_.scratch_resource_descriptor[0] = srd0.u32All; - queue->amd_queue_.scratch_resource_descriptor[2] = srd2.u32All; - -#ifdef HSA_LARGE_MODEL - SQ_BUF_RSRC_WORD1 srd1; - srd1.u32All = queue->amd_queue_.scratch_resource_descriptor[1]; - srd1.bits.BASE_ADDRESS_HI = uint32_t(base >> 32); - queue->amd_queue_.scratch_resource_descriptor[1] = srd1.u32All; -#endif - - queue->amd_queue_.scratch_backing_memory_location = - scratch.queue_process_offset; - queue->amd_queue_.scratch_backing_memory_byte_size = scratch.size; - queue->amd_queue_.scratch_workitem_byte_size = - uint32_t(scratch.size_per_thread); - - COMPUTE_TMPRING_SIZE tmpring_size = {0}; - tmpring_size.bits.WAVES = (scratch.size / scratch.size_per_thread / 64); - tmpring_size.bits.WAVESIZE = - (((64 * scratch.size_per_thread) + 1023) / 1024); - queue->amd_queue_.compute_tmpring_size = tmpring_size.u32All; - - } else if ((error_code & 2) == 2) { // Invalid dim - queue->Inactivate(); - if (queue->errors_callback_ != NULL) - queue->errors_callback_(HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS, - queue->public_handle(), queue->errors_data_); - return false; - - } else if ((error_code & 4) == 4) { // Invalid group memory - queue->Inactivate(); - if (queue->errors_callback_ != NULL) - queue->errors_callback_(HSA_STATUS_ERROR_INVALID_ALLOCATION, - queue->public_handle(), queue->errors_data_); - return false; - - } else if ((error_code & 8) == 8) { // Invalid (or NULL) code - queue->Inactivate(); - if (queue->errors_callback_ != NULL) - queue->errors_callback_(HSA_STATUS_ERROR_INVALID_CODE_OBJECT, - queue->public_handle(), queue->errors_data_); - return false; - - } else if ((error_code & 32) == 32) { // Invalid format - queue->Inactivate(); - if (queue->errors_callback_ != NULL) - queue->errors_callback_(HSA_STATUS_ERROR_INVALID_PACKET_FORMAT, - queue->public_handle(), queue->errors_data_); - return false; - } else if ((error_code & 64) == 64) { // Group is too large - queue->Inactivate(); - if (queue->errors_callback_ != NULL) - queue->errors_callback_(HSA_STATUS_ERROR_INVALID_ARGUMENT, - queue->public_handle(), queue->errors_data_); - return false; - } else if ((error_code & 128) == 128) { // Out of VGPRs - queue->Inactivate(); - if (queue->errors_callback_ != NULL) - queue->errors_callback_(HSA_STATUS_ERROR_INVALID_ISA, - queue->public_handle(), queue->errors_data_); - return false; - } else if ((error_code & 0x80000000) == 0x80000000) { // Debug trap - queue->Inactivate(); - if (queue->errors_callback_ != NULL) - queue->errors_callback_(HSA_STATUS_ERROR_EXCEPTION, - queue->public_handle(), queue->errors_data_); - return false; - } else { - // Undefined code - queue->Inactivate(); - assert(false && "Undefined queue error code"); - if (queue->errors_callback_ != NULL) - queue->errors_callback_(HSA_STATUS_ERROR, queue->public_handle(), - queue->errors_data_); - return false; - } - - HSA::hsa_signal_store_relaxed(queue->amd_queue_.queue_inactive_signal, 0); - return true; -} - -hsa_status_t AqlQueue::SetCUMasking(const uint32_t num_cu_mask_count, - const uint32_t* cu_mask) { - HSAKMT_STATUS ret = hsaKmtSetQueueCUMask( - queue_id_, num_cu_mask_count, - reinterpret_cast(const_cast(cu_mask))); - return (HSAKMT_STATUS_SUCCESS == ret) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; -} -} // namespace amd diff --git a/runtime/hsa-runtime/core/runtime/amd_blit_kernel.cpp b/runtime/hsa-runtime/core/runtime/amd_blit_kernel.cpp deleted file mode 100644 index f654ea86e6..0000000000 --- a/runtime/hsa-runtime/core/runtime/amd_blit_kernel.cpp +++ /dev/null @@ -1,647 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/amd_blit_kernel.h" - -#include -#include -#include -#include - -#if defined(_WIN32) || defined(_WIN64) -#define NOMINMAX -#include -#else -#include -#endif - -#include "core/inc/amd_blit_kernel_kv.h" -#include "core/inc/amd_blit_kernel_vi.h" -#include "core/inc/amd_gpu_agent.h" -#include "core/inc/hsa_internal.h" -#include "core/util/utils.h" - -namespace amd { -const uint32_t BlitKernel::kGroupSize = 256; -const size_t BlitKernel::kMaxCopyCount = AlignDown(UINT32_MAX, kGroupSize); -const size_t BlitKernel::kMaxFillCount = AlignDown(UINT32_MAX, kGroupSize); - -static const uint16_t kInvalidPacketHeader = HSA_PACKET_TYPE_INVALID; - -BlitKernel::BlitKernel() - : core::Blit(), - copy_code_handle_(0), - fill_code_handle_(0), - queue_(NULL), - cached_index_(0), - kernarg_(NULL), - kernarg_async_(NULL), - kernarg_async_mask_(0), - kernarg_async_counter_(0), - code_arg_buffer_(NULL) { - completion_signal_.handle = 0; -} - -BlitKernel::~BlitKernel() {} - -hsa_status_t BlitKernel::Initialize(const core::Agent& agent) { - hsa_agent_t agent_handle = agent.public_handle(); - - uint32_t features = 0; - hsa_status_t status = - HSA::hsa_agent_get_info(agent_handle, HSA_AGENT_INFO_FEATURE, &features); - if (status != HSA_STATUS_SUCCESS) { - return status; - } - - if ((features & HSA_AGENT_FEATURE_KERNEL_DISPATCH) == 0) { - return HSA_STATUS_ERROR; - } - - // Need queue buffer that can cover the max size of local memory. - const uint64_t kGpuVmVaSize = 1ULL << 40; - const uint32_t kRequiredQueueSize = NextPow2(static_cast( - std::ceil(static_cast(kGpuVmVaSize) / kMaxCopyCount))); - - uint32_t max_queue_size = 0; - status = HSA::hsa_agent_get_info(agent_handle, HSA_AGENT_INFO_QUEUE_MAX_SIZE, - &max_queue_size); - - if (HSA_STATUS_SUCCESS != status) { - return status; - } - - if (max_queue_size < kRequiredQueueSize) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - status = - HSA::hsa_queue_create(agent_handle, kRequiredQueueSize, - HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue_); - - if (HSA_STATUS_SUCCESS != status) { - return status; - } - - queue_bitmask_ = queue_->size - 1; - - cached_index_ = 0; - - void* copy_raw_obj_mem = NULL; - size_t copy_akc_size = 0; - size_t copy_akc_offset = 0; - - void* copy_aligned_raw_obj_mem = NULL; - size_t copy_aligned_akc_size = 0; - size_t copy_aligned_akc_offset = 0; - - void* fill_raw_obj_mem = NULL; - size_t fill_akc_size = 0; - size_t fill_akc_offset = 0; - - switch (agent.isa()->GetMajorVersion()) { - case 7: - copy_raw_obj_mem = kVectorCopyKvObject; - copy_akc_size = HSA_VECTOR_COPY_KV_AKC_SIZE; - copy_akc_offset = HSA_VECTOR_COPY_KV_AKC_OFFSET; - - copy_aligned_raw_obj_mem = kVectorCopyAlignedKvObject; - copy_aligned_akc_size = HSA_VECTOR_COPY_ALIGNED_KV_AKC_SIZE; - copy_aligned_akc_offset = HSA_VECTOR_COPY_ALIGNED_KV_AKC_OFFSET; - - fill_raw_obj_mem = kFillMemoryKvObject; - fill_akc_size = HSA_FILL_MEMORY_KV_AKC_SIZE; - fill_akc_offset = HSA_FILL_MEMORY_KV_AKC_OFFSET; - break; - case 8: - copy_raw_obj_mem = kVectorCopyViObject; - copy_akc_size = HSA_VECTOR_COPY_VI_AKC_SIZE; - copy_akc_offset = HSA_VECTOR_COPY_VI_AKC_OFFSET; - - copy_aligned_raw_obj_mem = kVectorCopyAlignedViObject; - copy_aligned_akc_size = HSA_VECTOR_COPY_ALIGNED_VI_AKC_SIZE; - copy_aligned_akc_offset = HSA_VECTOR_COPY_ALIGNED_VI_AKC_OFFSET; - - fill_raw_obj_mem = kFillMemoryViObject; - fill_akc_size = HSA_FILL_MEMORY_VI_AKC_SIZE; - fill_akc_offset = HSA_FILL_MEMORY_VI_AKC_OFFSET; - break; - default: - assert(false && "Only gfx7 and gfx8 are supported"); - break; - } - - static const size_t kKernArgSize = - std::max(sizeof(KernelCopyArgs), sizeof(KernelFillArgs)); - const size_t total_alloc_size = AlignUp( - AlignUp(copy_akc_size, 256) + AlignUp(copy_aligned_akc_size, 256) + - AlignUp(fill_akc_size, 256) + AlignUp(kKernArgSize, 16), - 4096); - - amd_kernel_code_t *code_ptr = nullptr; - code_arg_buffer_ = core::Runtime::runtime_singleton_->system_allocator()( - total_alloc_size, 4096); - - char* akc_arg = reinterpret_cast(code_arg_buffer_); - memcpy(akc_arg, - reinterpret_cast(copy_raw_obj_mem) + copy_akc_offset, - copy_akc_size); - copy_code_handle_ = reinterpret_cast(akc_arg); - code_ptr = (amd_kernel_code_t*)(copy_code_handle_); - code_ptr->runtime_loader_kernel_symbol = 0; - akc_arg += copy_akc_size; - - akc_arg = AlignUp(akc_arg, 256); - memcpy(akc_arg, reinterpret_cast(copy_aligned_raw_obj_mem) + - copy_aligned_akc_offset, - copy_aligned_akc_size); - copy_aligned_code_handle_ = reinterpret_cast(akc_arg); - code_ptr = (amd_kernel_code_t*)(copy_aligned_code_handle_); - code_ptr->runtime_loader_kernel_symbol = 0; - akc_arg += copy_aligned_akc_size; - - akc_arg = AlignUp(akc_arg, 256); - memcpy(akc_arg, - reinterpret_cast(fill_raw_obj_mem) + fill_akc_offset, - fill_akc_size); - fill_code_handle_ = reinterpret_cast(akc_arg); - code_ptr = (amd_kernel_code_t*)(fill_code_handle_); - code_ptr->runtime_loader_kernel_symbol = 0; - akc_arg += fill_akc_size; - - akc_arg = AlignUp(akc_arg, 16); - kernarg_ = akc_arg; - - status = HSA::hsa_signal_create(1, 0, NULL, &completion_signal_); - if (HSA_STATUS_SUCCESS != status) { - return status; - } - - kernarg_async_ = reinterpret_cast( - core::Runtime::runtime_singleton_->system_allocator()( - kRequiredQueueSize * AlignUp(sizeof(KernelCopyArgs), 16), 16)); - - kernarg_async_mask_ = kRequiredQueueSize - 1; - - // TODO(bwicakso): remove this code when execute permission level is not - // mandatory. - if (((amd::GpuAgent&)agent).profile() == HSA_PROFILE_FULL) { -#if defined(_WIN32) || defined(_WIN64) -#define NOMINMAX - DWORD old_protect = 0; - const DWORD new_protect = PAGE_EXECUTE_READWRITE; - if (!VirtualProtect(code_arg_buffer_, total_alloc_size, new_protect, - &old_protect)) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } -#else - if (0 != mprotect(code_arg_buffer_, total_alloc_size, - PROT_READ | PROT_WRITE | PROT_EXEC)) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } -#endif - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t BlitKernel::Destroy(void) { - std::lock_guard guard(lock_); - - if (queue_ != NULL) { - HSA::hsa_queue_destroy(queue_); - } - - if (kernarg_async_ != NULL) { - core::Runtime::runtime_singleton_->system_deallocator()(kernarg_async_); - } - - if (code_arg_buffer_ != NULL) { - core::Runtime::runtime_singleton_->system_deallocator()(code_arg_buffer_); - } - - if (completion_signal_.handle != 0) { - HSA::hsa_signal_destroy(completion_signal_); - } - - return HSA_STATUS_SUCCESS; -} - -static bool IsSystemMemory(void* address) { - static const uint64_t kLimitSystem = 1ULL << 48; - return (reinterpret_cast(address) < kLimitSystem); -} - -hsa_status_t BlitKernel::SubmitLinearCopyCommand(void* dst, const void* src, - size_t size) { - assert(copy_code_handle_ != 0); - - std::lock_guard guard(lock_); - - HSA::hsa_signal_store_relaxed(completion_signal_, 1); - - const size_t kAlignmentChar = 1; - const size_t kAlignmentUin32 = 4; - const size_t kAlignmentVec4 = 16; - const size_t copy_granule = - (IsMultipleOf(dst, kAlignmentVec4) && IsMultipleOf(src, kAlignmentVec4) && - IsMultipleOf(size, kAlignmentVec4)) - ? kAlignmentVec4 - : (IsMultipleOf(dst, kAlignmentUin32) && - IsMultipleOf(src, kAlignmentUin32) && - IsMultipleOf(size, kAlignmentUin32)) - ? kAlignmentUin32 - : kAlignmentChar; - - size = size / copy_granule; - - const uint32_t num_copy_packet = static_cast( - std::ceil(static_cast(size) / kMaxCopyCount)); - - // Reserve write index for copy + fence packet. - uint64_t write_index = AcquireWriteIndex(num_copy_packet); - - const uint32_t last_copy_index = num_copy_packet - 1; - size_t total_copy_count = 0; - for (uint32_t i = 0; i < num_copy_packet; ++i) { - // Setup arguments. - const uint32_t copy_count = static_cast( - std::min((size - total_copy_count), kMaxCopyCount)); - - void* cur_dst = static_cast(dst) + (total_copy_count * copy_granule); - const void* cur_src = - static_cast(src) + (total_copy_count * copy_granule); - - KernelCopyArgs* args = ObtainAsyncKernelCopyArg(); - assert(args != NULL); - assert(IsMultipleOf(args, 16)); - - args->src = cur_src; - args->dst = cur_dst; - args->size = copy_count; - args->use_vector = (copy_granule == kAlignmentVec4) ? 1 : 0; - - const uint32_t grid_size_x = - AlignUp(static_cast(copy_count), kGroupSize); - - // This assert to make sure kMaxCopySize is not changed to a number that - // could cause overflow to packet.grid_size_x. - assert(grid_size_x >= copy_count); - - hsa_signal_t signal = {(i == last_copy_index) ? completion_signal_.handle - : 0}; - PopulateQueue(write_index + i, ((copy_granule == kAlignmentChar) - ? copy_code_handle_ - : copy_aligned_code_handle_), - args, grid_size_x, signal); - - total_copy_count += copy_count; - } - - // Launch copy packet. - ReleaseWriteIndex(write_index, num_copy_packet); - - // Wait for the packet to finish. - if (HSA::hsa_signal_wait_acquire(completion_signal_, HSA_SIGNAL_CONDITION_LT, - 1, uint64_t(-1), - HSA_WAIT_STATE_ACTIVE) != 0) { - // Signal wait returned unexpected value. - return HSA_STATUS_ERROR; - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t BlitKernel::SubmitLinearCopyCommand( - void* dst, const void* src, size_t size, - std::vector& dep_signals, core::Signal& out_signal) { - (copy_code_handle_ != 0); - const size_t kAlignmentChar = 1; - const size_t kAlignmentUin32 = 4; - const size_t kAlignmentVec4 = 16; - const size_t copy_granule = - (IsMultipleOf(dst, kAlignmentVec4) && IsMultipleOf(src, kAlignmentVec4) && - IsMultipleOf(size, kAlignmentVec4)) - ? kAlignmentVec4 - : (IsMultipleOf(dst, kAlignmentUin32) && - IsMultipleOf(src, kAlignmentUin32) && - IsMultipleOf(size, kAlignmentUin32)) - ? kAlignmentUin32 - : kAlignmentChar; - - size = size / copy_granule; - - const uint32_t num_copy_packet = static_cast( - std::ceil(static_cast(size) / kMaxCopyCount)); - - const uint32_t num_barrier_packet = - static_cast(std::ceil(dep_signals.size() / 5.0f)); - - // Reserve write index for copy + fence packet. - const uint32_t total_num_packet = num_barrier_packet + num_copy_packet; - - uint64_t write_index = AcquireWriteIndex(total_num_packet); - uint64_t write_index_temp = write_index; - - const uint16_t kBarrierPacketHeader = - (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) | - (1 << HSA_PACKET_HEADER_BARRIER) | - (HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | - (HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); - - hsa_barrier_and_packet_t barrier_packet = {0}; - barrier_packet.header = HSA_PACKET_TYPE_INVALID; - - hsa_barrier_and_packet_t* queue_buffer = - reinterpret_cast(queue_->base_address); - - const size_t dep_signal_count = dep_signals.size(); - for (size_t i = 0; i < dep_signal_count; ++i) { - const size_t idx = i % 5; - barrier_packet.dep_signal[idx] = core::Signal::Convert(dep_signals[i]); - if (i == (dep_signal_count - 1) || idx == 4) { - std::atomic_thread_fence(std::memory_order_acquire); - queue_buffer[(write_index)&queue_bitmask_] = barrier_packet; - std::atomic_thread_fence(std::memory_order_release); - queue_buffer[(write_index)&queue_bitmask_].header = kBarrierPacketHeader; - - ++write_index; - - memset(&barrier_packet, 0, sizeof(hsa_barrier_and_packet_t)); - barrier_packet.header = HSA_PACKET_TYPE_INVALID; - } - } - - const uint32_t last_copy_index = num_copy_packet - 1; - size_t total_copy_count = 0; - for (uint32_t i = 0; i < num_copy_packet; ++i) { - // Setup arguments. - const uint32_t copy_count = static_cast( - std::min((size - total_copy_count), kMaxCopyCount)); - - void* cur_dst = static_cast(dst) + (total_copy_count * copy_granule); - const void* cur_src = - static_cast(src) + (total_copy_count * copy_granule); - - KernelCopyArgs* args = ObtainAsyncKernelCopyArg(); - assert(args != NULL); - assert(IsMultipleOf(args, 16)); - - args->src = cur_src; - args->dst = cur_dst; - args->size = copy_count; - args->use_vector = (copy_granule == kAlignmentVec4) ? 1 : 0; - - const uint32_t grid_size_x = - AlignUp(static_cast(copy_count), kGroupSize); - - // This assert to make sure kMaxCopySize is not changed to a number that - // could cause overflow to packet.grid_size_x. - assert(grid_size_x >= copy_count); - - hsa_signal_t signal = {(i == last_copy_index) - ? (core::Signal::Convert(&out_signal)).handle - : 0}; - PopulateQueue(write_index, ((copy_granule == kAlignmentChar) - ? copy_code_handle_ - : copy_aligned_code_handle_), - args, grid_size_x, signal); - - ++write_index; - - total_copy_count += copy_count; - } - - // Launch copy packet. - ReleaseWriteIndex(write_index_temp, total_num_packet); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t BlitKernel::SubmitLinearFillCommand(void* ptr, uint32_t value, - size_t num) { - assert(fill_code_handle_ != 0); - - std::lock_guard guard(lock_); - - HSA::hsa_signal_store_relaxed(completion_signal_, 1); - - const uint32_t num_fill_packet = static_cast( - std::ceil(static_cast(num) / kMaxFillCount)); - - // Reserve write index for copy + fence packet. - uint64_t write_index = AcquireWriteIndex(num_fill_packet); - - KernelFillArgs* args = reinterpret_cast(kernarg_); - - if (args == NULL) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - const uint32_t last_fill_index = num_fill_packet - 1; - size_t total_fill_count = 0; - for (uint32_t i = 0; i < num_fill_packet; ++i) { - assert(IsMultipleOf(&args[i], 16)); - - // Setup arguments. - const uint32_t fill_count = static_cast( - std::min((num - total_fill_count), kMaxFillCount)); - void* cur_ptr = static_cast(ptr) + total_fill_count; - - args[i].ptr = cur_ptr; - args[i].num = fill_count; - args[i].value = value; - - const uint32_t grid_size_x = - AlignUp(static_cast(fill_count), kGroupSize); - - // This assert to make sure kMaxFillCount is not changed to a number that - // could cause overflow to packet.grid_size_x. - assert(grid_size_x >= fill_count); - - hsa_signal_t signal = {(i == last_fill_index) ? completion_signal_.handle - : 0}; - PopulateQueue(write_index + i, fill_code_handle_, &args[i], grid_size_x, - signal); - - total_fill_count += fill_count; - } - - // Launch fill packet. - // Launch copy packet. - ReleaseWriteIndex(write_index, num_fill_packet); - - // Wait for the packet to finish. - if (HSA::hsa_signal_wait_acquire(completion_signal_, HSA_SIGNAL_CONDITION_LT, - 1, uint64_t(-1), - HSA_WAIT_STATE_ACTIVE) != 0) { - // Signal wait returned unexpected value. - return HSA_STATUS_ERROR; - } - - return HSA_STATUS_SUCCESS; -} - -uint64_t BlitKernel::AcquireWriteIndex(uint32_t num_packet) { - assert(queue_->size >= num_packet); - - uint64_t write_index = - HSA::hsa_queue_add_write_index_acq_rel(queue_, num_packet); - - while (true) { - // Wait until we have room in the queue; - const uint64_t read_index = HSA::hsa_queue_load_read_index_relaxed(queue_); - if ((write_index - read_index) < queue_->size) { - break; - } - } - - return write_index; -} - -void BlitKernel::ReleaseWriteIndex(uint64_t write_index, uint32_t num_packet) { - // Launch packet. - while (true) { - // Make sure that the address before ::current_offset is already released. - // Otherwise the packet processor may read invalid packets. - uint64_t expected_offset = write_index; - if (atomic::Cas(&cached_index_, write_index + num_packet, expected_offset, - std::memory_order_release) == expected_offset) { - // Update doorbel register with last packet id. - HSA::hsa_signal_store_release(queue_->doorbell_signal, - write_index + num_packet - 1); - break; - } - } -} - -hsa_status_t BlitKernel::FenceRelease(uint64_t write_index, - uint32_t num_copy_packet, - hsa_fence_scope_t fence) { - // This function is not thread safe. - - const uint16_t kBarrierPacketHeader = - (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) | - (1 << HSA_PACKET_HEADER_BARRIER) | - (HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | - (fence << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); - - hsa_barrier_and_packet_t packet = {0}; - packet.header = kInvalidPacketHeader; - - HSA::hsa_signal_store_relaxed(completion_signal_, 1); - packet.completion_signal = completion_signal_; - - if (num_copy_packet == 0) { - assert(write_index == 0); - // Reserve write index. - write_index = AcquireWriteIndex(1); - } - - // Populate queue buffer with AQL packet. - hsa_barrier_and_packet_t* queue_buffer = - reinterpret_cast(queue_->base_address); - std::atomic_thread_fence(std::memory_order_acquire); - queue_buffer[(write_index + num_copy_packet) & queue_bitmask_] = packet; - std::atomic_thread_fence(std::memory_order_release); - queue_buffer[(write_index + num_copy_packet) & queue_bitmask_].header = - kBarrierPacketHeader; - - // Launch packet. - ReleaseWriteIndex(write_index, num_copy_packet + 1); - - // Wait for the packet to finish. - if (HSA::hsa_signal_wait_acquire(packet.completion_signal, - HSA_SIGNAL_CONDITION_LT, 1, uint64_t(-1), - HSA_WAIT_STATE_ACTIVE) != 0) { - // Signal wait returned unexpected value. - return HSA_STATUS_ERROR; - } - - return HSA_STATUS_SUCCESS; -} - -void BlitKernel::PopulateQueue(uint64_t index, uint64_t code_handle, void* args, - uint32_t grid_size_x, - hsa_signal_t completion_signal) { - assert(IsMultipleOf(args, 16)); - - hsa_kernel_dispatch_packet_t packet = {0}; - - static const uint16_t kDispatchPacketHeader = - (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | - (((completion_signal.handle != 0) ? 1 : 0) << HSA_PACKET_HEADER_BARRIER) | - (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | - (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); - - packet.header = kInvalidPacketHeader; - packet.kernel_object = code_handle; - packet.kernarg_address = args; - - // Setup working size. - const int kNumDimension = 1; - packet.setup = kNumDimension << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; - packet.grid_size_x = AlignUp(static_cast(grid_size_x), kGroupSize); - packet.grid_size_y = packet.grid_size_z = 1; - packet.workgroup_size_x = kGroupSize; - packet.workgroup_size_y = packet.workgroup_size_z = 1; - - packet.completion_signal = completion_signal; - - // Populate queue buffer with AQL packet. - hsa_kernel_dispatch_packet_t* queue_buffer = - reinterpret_cast(queue_->base_address); - std::atomic_thread_fence(std::memory_order_acquire); - queue_buffer[index & queue_bitmask_] = packet; - std::atomic_thread_fence(std::memory_order_release); - queue_buffer[index & queue_bitmask_].header = kDispatchPacketHeader; -} - -BlitKernel::KernelCopyArgs* BlitKernel::ObtainAsyncKernelCopyArg() { - const uint32_t index = - atomic::Add(&kernarg_async_counter_, 1U, std::memory_order_acquire); - KernelCopyArgs* arg = &kernarg_async_[index & kernarg_async_mask_]; - assert(IsMultipleOf(arg, 16)); - return arg; -} - -} // namespace amd diff --git a/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp b/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp deleted file mode 100644 index b22da34cd1..0000000000 --- a/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp +++ /dev/null @@ -1,858 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/amd_blit_sdma.h" - -#include -#include -#include -#include - -#include "core/inc/amd_gpu_agent.h" -#include "core/inc/runtime.h" -#include "core/inc/signal.h" - -namespace amd { -// SDMA packet for VI device. -// Reference: http://people.freedesktop.org/~agd5f/dma_packets.txt - -const unsigned int SDMA_OP_COPY = 1; -const unsigned int SDMA_OP_FENCE = 5; -const unsigned int SDMA_OP_POLL_REGMEM = 8; -const unsigned int SDMA_OP_ATOMIC = 10; -const unsigned int SDMA_OP_CONST_FILL = 11; -const unsigned int SDMA_SUBOP_COPY_LINEAR = 0; -const unsigned int SDMA_ATOMIC_ADD64 = 47; - -typedef struct SDMA_PKT_COPY_LINEAR_TAG { - union { - struct { - unsigned int op : 8; - unsigned int sub_op : 8; - unsigned int extra_info : 16; - }; - unsigned int DW_0_DATA; - } HEADER_UNION; - - union { - struct { - unsigned int count : 22; - unsigned int reserved_0 : 10; - }; - unsigned int DW_1_DATA; - } COUNT_UNION; - - union { - struct { - unsigned int reserved_0 : 16; - unsigned int dst_swap : 2; - unsigned int reserved_1 : 6; - unsigned int src_swap : 2; - unsigned int reserved_2 : 6; - }; - unsigned int DW_2_DATA; - } PARAMETER_UNION; - - union { - struct { - unsigned int src_addr_31_0 : 32; - }; - unsigned int DW_3_DATA; - } SRC_ADDR_LO_UNION; - - union { - struct { - unsigned int src_addr_63_32 : 32; - }; - unsigned int DW_4_DATA; - } SRC_ADDR_HI_UNION; - - union { - struct { - unsigned int dst_addr_31_0 : 32; - }; - unsigned int DW_5_DATA; - } DST_ADDR_LO_UNION; - - union { - struct { - unsigned int dst_addr_63_32 : 32; - }; - unsigned int DW_6_DATA; - } DST_ADDR_HI_UNION; -} SDMA_PKT_COPY_LINEAR; - -typedef struct SDMA_PKT_CONSTANT_FILL_TAG { - union { - struct { - unsigned int op : 8; - unsigned int sub_op : 8; - unsigned int sw : 2; - unsigned int reserved_0 : 12; - unsigned int fillsize : 2; - }; - unsigned int DW_0_DATA; - } HEADER_UNION; - - union { - struct { - unsigned int dst_addr_31_0 : 32; - }; - unsigned int DW_1_DATA; - } DST_ADDR_LO_UNION; - - union { - struct { - unsigned int dst_addr_63_32 : 32; - }; - unsigned int DW_2_DATA; - } DST_ADDR_HI_UNION; - - union { - struct { - unsigned int src_data_31_0 : 32; - }; - unsigned int DW_3_DATA; - } DATA_UNION; - - union { - struct { - unsigned int count : 22; - unsigned int reserved_0 : 10; - }; - unsigned int DW_4_DATA; - } COUNT_UNION; -} SDMA_PKT_CONSTANT_FILL; - -typedef struct SDMA_PKT_FENCE_TAG { - union { - struct { - unsigned int op : 8; - unsigned int sub_op : 8; - unsigned int reserved_0 : 16; - }; - unsigned int DW_0_DATA; - } HEADER_UNION; - - union { - struct { - unsigned int addr_31_0 : 32; - }; - unsigned int DW_1_DATA; - } ADDR_LO_UNION; - - union { - struct { - unsigned int addr_63_32 : 32; - }; - unsigned int DW_2_DATA; - } ADDR_HI_UNION; - - union { - struct { - unsigned int data : 32; - }; - unsigned int DW_3_DATA; - } DATA_UNION; -} SDMA_PKT_FENCE; - -typedef struct SDMA_PKT_POLL_REGMEM_TAG { - union { - struct { - unsigned int op : 8; - unsigned int sub_op : 8; - unsigned int reserved_0 : 10; - unsigned int hdp_flush : 1; - unsigned int reserved_1 : 1; - unsigned int func : 3; - unsigned int mem_poll : 1; - }; - unsigned int DW_0_DATA; - } HEADER_UNION; - - union { - struct { - unsigned int addr_31_0 : 32; - }; - unsigned int DW_1_DATA; - } ADDR_LO_UNION; - - union { - struct { - unsigned int addr_63_32 : 32; - }; - unsigned int DW_2_DATA; - } ADDR_HI_UNION; - - union { - struct { - unsigned int value : 32; - }; - unsigned int DW_3_DATA; - } VALUE_UNION; - - union { - struct { - unsigned int mask : 32; - }; - unsigned int DW_4_DATA; - } MASK_UNION; - - union { - struct { - unsigned int interval : 16; - unsigned int retry_count : 12; - unsigned int reserved_0 : 4; - }; - unsigned int DW_5_DATA; - } DW5_UNION; -} SDMA_PKT_POLL_REGMEM; - -typedef struct SDMA_PKT_ATOMIC_TAG { - union { - struct { - unsigned int op : 8; - unsigned int sub_op : 8; - unsigned int l : 1; - unsigned int reserved_0 : 8; - unsigned int operation : 7; - }; - unsigned int DW_0_DATA; - } HEADER_UNION; - - union { - struct { - unsigned int addr_31_0 : 32; - }; - unsigned int DW_1_DATA; - } ADDR_LO_UNION; - - union { - struct { - unsigned int addr_63_32 : 32; - }; - unsigned int DW_2_DATA; - } ADDR_HI_UNION; - - union { - struct { - unsigned int src_data_31_0 : 32; - }; - unsigned int DW_3_DATA; - } SRC_DATA_LO_UNION; - - union { - struct { - unsigned int src_data_63_32 : 32; - }; - unsigned int DW_4_DATA; - } SRC_DATA_HI_UNION; - - union { - struct { - unsigned int cmp_data_31_0 : 32; - }; - unsigned int DW_5_DATA; - } CMP_DATA_LO_UNION; - - union { - struct { - unsigned int cmp_data_63_32 : 32; - }; - unsigned int DW_6_DATA; - } CMP_DATA_HI_UNION; - - union { - struct { - unsigned int loop_interval : 13; - unsigned int reserved_0 : 19; - }; - unsigned int DW_7_DATA; - } LOOP_UNION; -} SDMA_PKT_ATOMIC; - -inline uint32_t ptrlow32(const void* p) { - return static_cast(reinterpret_cast(p)); -} - -inline uint32_t ptrhigh32(const void* p) { -#if defined(HSA_LARGE_MODEL) - return static_cast(reinterpret_cast(p) >> 32); -#else - return 0; -#endif -} - -BlitSdma::BlitSdma() - : core::Blit(), - queue_size_(0), - queue_start_addr_(NULL), - fence_base_addr_(NULL), - fence_pool_size_(0), - fence_pool_counter_(0), - cached_reserve_offset_(0), - cached_commit_offset_(0) { - std::memset(&queue_resource_, 0, sizeof(queue_resource_)); -} - -BlitSdma::~BlitSdma() {} - -hsa_status_t BlitSdma::Initialize(const core::Agent& agent) { - if (queue_start_addr_ != NULL && queue_size_ != 0) { - // Already initialized. - return HSA_STATUS_SUCCESS; - } - - if (agent.device_type() != core::Agent::kAmdGpuDevice) { - return HSA_STATUS_ERROR; - } - - linear_copy_command_size_ = sizeof(SDMA_PKT_COPY_LINEAR); - fill_command_size_ = sizeof(SDMA_PKT_CONSTANT_FILL); - fence_command_size_ = sizeof(SDMA_PKT_FENCE); - poll_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM); - atomic_command_size_ = sizeof(SDMA_PKT_ATOMIC); - - const uint32_t sync_command_size = fence_command_size_; - const uint32_t max_num_copy_command = - std::floor((static_cast(queue_size_) - sync_command_size) / - linear_copy_command_size_); - const uint32_t max_num_fill_command = - std::floor((static_cast(queue_size_) - sync_command_size) / - fill_command_size_); - - max_single_linear_copy_size_ = 0x3fffe0; - max_total_linear_copy_size_ = static_cast( - std::min(static_cast(SIZE_MAX), - static_cast(max_num_copy_command) * - static_cast(max_single_linear_copy_size_))); - - max_single_fill_size_ = 1 << 22; - max_total_fill_size_ = static_cast( - std::min(static_cast(SIZE_MAX), - static_cast(max_num_fill_command) * - static_cast(max_single_fill_size_))); - - const amd::GpuAgent& amd_gpu_agent = static_cast(agent); - - if (amd_gpu_agent.isa()->version() != core::Isa::Version(8, 0, 3)) { - assert(false && "Only for Fiji currently"); - return HSA_STATUS_ERROR; - } - - // Allocate queue buffer. - const size_t kPageSize = 4096; - const size_t kSdmaQueueSize = 1024 * 1024; - - queue_size_ = kSdmaQueueSize; - - HsaMemFlags flags; - flags.Value = 0; - flags.ui32.HostAccess = 1; - flags.ui32.AtomicAccessPartial = 1; - flags.ui32.ExecuteAccess = 1; - - auto err = hsaKmtAllocMemory(amd_gpu_agent.node_id(), queue_size_, flags, - reinterpret_cast(&queue_start_addr_)); - - if (err != HSAKMT_STATUS_SUCCESS) { - assert(false && "SDMA queue memory allocation failure."); - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - HSAuint64 alternate_va; - err = hsaKmtMapMemoryToGPU(queue_start_addr_, queue_size_, &alternate_va); - - if (err != HSAKMT_STATUS_SUCCESS) { - assert(false && "AQL queue memory map failure."); - Destroy(); - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - std::memset(queue_start_addr_, 0, queue_size_); - - // Access kernel driver to initialize the queue control block - // This call binds user mode queue object to underlying compute - // device. - const GpuAgent& gpu_agent = reinterpret_cast(agent); - const HSA_QUEUE_TYPE kQueueType_ = HSA_QUEUE_SDMA; - if (HSAKMT_STATUS_SUCCESS != - hsaKmtCreateQueue(gpu_agent.node_id(), kQueueType_, 100, - HSA_QUEUE_PRIORITY_MAXIMUM, queue_start_addr_, - queue_size_, NULL, &queue_resource_)) { - Destroy(); - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - cached_reserve_offset_ = *(queue_resource_.Queue_write_ptr); - cached_commit_offset_ = cached_reserve_offset_; - - fence_pool_size_ = - static_cast(std::ceil(kSdmaQueueSize / fence_command_size_)); - - fence_pool_mask_ = fence_pool_size_ - 1; - - fence_base_addr_ = reinterpret_cast( - core::Runtime::runtime_singleton_->system_allocator()( - fence_pool_size_ * sizeof(uint32_t), 256)); - - if (fence_base_addr_ == NULL) { - Destroy(); - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t BlitSdma::Destroy(void) { - // Release all allocated resources and reset them to zero. - - if (queue_resource_.QueueId != 0) { - // Release queue resources from the kernel - auto err = hsaKmtDestroyQueue(queue_resource_.QueueId); - assert(err == HSAKMT_STATUS_SUCCESS); - memset(&queue_resource_, 0, sizeof(queue_resource_)); - } - - if (queue_start_addr_ != NULL && queue_size_ != 0) { - // Release queue buffer. - hsaKmtUnmapMemoryToGPU(queue_start_addr_); - hsaKmtFreeMemory(queue_start_addr_, queue_size_); - } - - if (fence_base_addr_ != NULL) { - core::Runtime::runtime_singleton_->system_deallocator()(fence_base_addr_); - } - - queue_size_ = 0; - queue_start_addr_ = NULL; - cached_reserve_offset_ = 0; - cached_commit_offset_ = 0; - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t BlitSdma::SubmitLinearCopyCommand(void* dst, const void* src, - size_t size) { - if (size > max_total_linear_copy_size_) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - // Break the copy into multiple copy operation incase the copy size exceeds - // the SDMA linear copy limit. - const uint32_t num_copy_command = static_cast( - std::ceil(static_cast(size) / max_single_linear_copy_size_)); - - const uint32_t total_copy_command_size = - num_copy_command * linear_copy_command_size_; - - const uint32_t total_command_size = - total_copy_command_size + fence_command_size_; - - const uint32_t kFenceValue = 2015; - uint32_t* fence_addr = ObtainFenceObject(); - *fence_addr = 0; - - char* command_addr = AcquireWriteAddress(total_command_size); - char* const command_addr_temp = command_addr; - - if (command_addr == NULL) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - BuildCopyCommand(command_addr, num_copy_command, dst, src, size); - - command_addr += total_copy_command_size; - - BuildFenceCommand(command_addr, fence_addr, kFenceValue); - - ReleaseWriteAddress(command_addr_temp, total_command_size); - - WaitFence(fence_addr, kFenceValue); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t BlitSdma::SubmitLinearCopyCommand( - void* dst, const void* src, size_t size, - std::vector& dep_signals, core::Signal& out_signal) { - if (size > max_total_linear_copy_size_) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - // The signal is 64 bit value, and poll checks for 32 bit value. So we - // need to use two poll operations per dependent signal. - const uint32_t num_poll_command = - static_cast(2 * dep_signals.size()); - const uint32_t total_poll_command_size = - (num_poll_command * poll_command_size_); - - // Break the copy into multiple copy operation incase the copy size exceeds - // the SDMA linear copy limit. - const uint32_t num_copy_command = static_cast( - std::ceil(static_cast(size) / max_single_linear_copy_size_)); - const uint32_t total_copy_command_size = - num_copy_command * linear_copy_command_size_; - - const uint32_t total_command_size = - total_poll_command_size + total_copy_command_size + atomic_command_size_ + - fence_command_size_; - - const uint32_t kFenceValue = 2015; - uint32_t* fence_addr = ObtainFenceObject(); - *fence_addr = 0; - - char* command_addr = AcquireWriteAddress(total_command_size); - char* const command_addr_temp = command_addr; - - if (command_addr == NULL) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - for (size_t i = 0; i < dep_signals.size(); ++i) { - uint32_t* signal_addr = - reinterpret_cast(dep_signals[i]->ValueLocation()); - // Wait for the higher 64 bit to 0. - BuildPollCommand(command_addr, &signal_addr[1], 0); - command_addr += poll_command_size_; - // Then wait for the lower 64 bit to 0. - BuildPollCommand(command_addr, &signal_addr[0], 0); - command_addr += poll_command_size_; - } - - // Do the transfer after all polls are satisfied. - BuildCopyCommand(command_addr, num_copy_command, dst, src, size); - - command_addr += total_copy_command_size; - - // After transfer is completed, decrement the signal. - BuildAtomicDecrementCommand(command_addr, out_signal.ValueLocation()); - - command_addr += atomic_command_size_; - - BuildFenceCommand(command_addr, fence_addr, kFenceValue); - - ReleaseWriteAddress(command_addr_temp, total_command_size); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t BlitSdma::SubmitLinearFillCommand(void* ptr, uint32_t value, - size_t count) { - const size_t size = count * sizeof(uint32_t); - - if (size > max_total_fill_size_) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - // Break the copy into multiple copy operation incase the copy size exceeds - // the SDMA linear copy limit. - const uint32_t num_fill_command = static_cast( - std::ceil(static_cast(size) / max_single_fill_size_)); - - const uint32_t total_fill_command_size = - num_fill_command * fill_command_size_; - - const uint32_t total_command_size = - total_fill_command_size + fence_command_size_; - - char* command_addr = AcquireWriteAddress(total_command_size); - char* const command_addr_temp = command_addr; - - if (command_addr == NULL) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - const uint32_t fill_command_size = fill_command_size_; - size_t cur_size = 0; - for (uint32_t i = 0; i < num_fill_command; ++i) { - const uint32_t fill_size = static_cast( - std::min((size - cur_size), max_single_fill_size_)); - - void* cur_ptr = static_cast(ptr) + cur_size; - - SDMA_PKT_CONSTANT_FILL* packet_addr = - reinterpret_cast(command_addr); - - memset(packet_addr, 0, sizeof(SDMA_PKT_CONSTANT_FILL)); - - packet_addr->HEADER_UNION.op = SDMA_OP_CONST_FILL; - packet_addr->HEADER_UNION.fillsize = 2; // DW fill - - packet_addr->DST_ADDR_LO_UNION.dst_addr_31_0 = ptrlow32(cur_ptr); - packet_addr->DST_ADDR_HI_UNION.dst_addr_63_32 = ptrhigh32(cur_ptr); - - packet_addr->DATA_UNION.src_data_31_0 = value; - - packet_addr->COUNT_UNION.count = fill_size; - - command_addr += fill_command_size; - cur_size += fill_size; - } - - assert(cur_size == size); - - const uint32_t kFenceValue = 2015; - uint32_t* fence_addr = ObtainFenceObject(); - *fence_addr = 0; - - BuildFenceCommand(command_addr, fence_addr, kFenceValue); - - ReleaseWriteAddress(command_addr_temp, total_command_size); - - WaitFence(fence_addr, kFenceValue); - - return HSA_STATUS_SUCCESS; -} - -char* BlitSdma::AcquireWriteAddress(uint32_t cmd_size) { - if (cmd_size > queue_size_) { - return NULL; - } - - while (true) { - uint32_t curr_offset = - atomic::Load(&cached_reserve_offset_, std::memory_order_acquire); - const uint32_t end_offset = curr_offset + cmd_size; - - if (end_offset >= queue_size_) { - // Queue buffer is not enough to contain the new command. - - // The safe space for the new command is the start of the queue buffer to - // the last read address. - if (atomic::Load(queue_resource_.Queue_read_ptr, - std::memory_order_acquire) < cmd_size) { - // There is no safe space to use currently. - return NULL; - } - - WrapQueue(cmd_size); - - continue; - } - - if (atomic::Cas(&cached_reserve_offset_, end_offset, curr_offset, - std::memory_order_release) == curr_offset) { - return queue_start_addr_ + curr_offset; - } - } - - return NULL; -} - -void BlitSdma::UpdateWriteAndDoorbellRegister(uint32_t current_offset, - uint32_t new_offset) { - while (true) { - // Make sure that the address before ::current_offset is already released. - // Otherwise the CP may read invalid packets. - if (atomic::Load(&cached_commit_offset_, std::memory_order_acquire) == - current_offset) { - // Update write pointer and doorbel register. - atomic::Store(queue_resource_.Queue_write_ptr, new_offset); - atomic::Store(queue_resource_.Queue_DoorBell, new_offset); - atomic::Store(&cached_commit_offset_, new_offset, - std::memory_order_release); - break; - } - } -} - -void BlitSdma::ReleaseWriteAddress(char* cmd_addr, uint32_t cmd_size) { - assert(cmd_addr != NULL); - assert(cmd_addr >= queue_start_addr_); - - if (cmd_size > queue_size_) { - assert(false && "cmd_addr is outside the queue buffer range"); - return; - } - - // Update write register. - const uint32_t curent_offset = cmd_addr - queue_start_addr_; - const uint32_t new_offset = curent_offset + cmd_size; - UpdateWriteAndDoorbellRegister(curent_offset, new_offset); -} - -void BlitSdma::WrapQueue(uint32_t cmd_size) { - // Re-determine the offset into queue buffer where NOOP instructions - // should be written. - while (true) { - uint32_t curent_offset = - atomic::Load(&cached_reserve_offset_, std::memory_order_acquire); - const uint32_t end_offset = curent_offset + cmd_size; - if (end_offset < queue_size_) { - return; - } - - std::lock_guard guard(wrap_lock_); - - if (atomic::Cas(&cached_reserve_offset_, queue_size_ + 1, curent_offset, - std::memory_order_release) == curent_offset) { - // Wait till all reserved packets are commited. - while (atomic::Load(&cached_commit_offset_, std::memory_order_acquire) != - curent_offset) { - os::YieldThread(); - } - - // Fill the remainder of the queue with NOOP commands. - char* noop_address = queue_start_addr_ + curent_offset; - const size_t noop_commands_size = queue_size_ - curent_offset; - memset(noop_address, 0, noop_commands_size); - - // Update write and doorbell registers to execute NOOP instructions. - UpdateWriteAndDoorbellRegister(curent_offset, 0); - - // Wait till queue wrapped. - while (atomic::Load(queue_resource_.Queue_read_ptr, - std::memory_order_acquire) != 0) { - os::YieldThread(); - } - - // Open access to queue. - atomic::Store(&cached_reserve_offset_, 0U, std::memory_order_release); - } - } -} - -void BlitSdma::BuildFenceCommand(char* fence_command_addr, uint32_t* fence, - uint32_t fence_value) { - assert(fence_command_addr != NULL); - SDMA_PKT_FENCE* packet_addr = - reinterpret_cast(fence_command_addr); - - memset(packet_addr, 0, sizeof(SDMA_PKT_FENCE)); - - packet_addr->HEADER_UNION.op = SDMA_OP_FENCE; - - packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(fence); - - packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(fence); - - packet_addr->DATA_UNION.data = fence_value; -} - -uint32_t* BlitSdma::ObtainFenceObject() { - const uint32_t fence_index = - atomic::Add(&fence_pool_counter_, 1U, std::memory_order_acquire); - uint32_t* fence_addr = &fence_base_addr_[fence_index & fence_pool_mask_]; - assert(IsMultipleOf(fence_addr, 4)); - return fence_addr; -} - -void BlitSdma::WaitFence(uint32_t* fence, uint32_t fence_value) { - int spin_count = 51; - while (atomic::Load(fence, std::memory_order_acquire) != fence_value) { - if (--spin_count > 0) { - continue; - } - os::YieldThread(); - } -} - -void BlitSdma::BuildCopyCommand(char* cmd_addr, uint32_t num_copy_command, - void* dst, const void* src, size_t size) { - size_t cur_size = 0; - for (uint32_t i = 0; i < num_copy_command; ++i) { - const uint32_t copy_size = static_cast( - std::min((size - cur_size), max_single_linear_copy_size_)); - - void* cur_dst = static_cast(dst) + cur_size; - const void* cur_src = static_cast(src) + cur_size; - - SDMA_PKT_COPY_LINEAR* packet_addr = - reinterpret_cast(cmd_addr); - - memset(packet_addr, 0, sizeof(SDMA_PKT_COPY_LINEAR)); - - packet_addr->HEADER_UNION.op = SDMA_OP_COPY; - packet_addr->HEADER_UNION.sub_op = SDMA_SUBOP_COPY_LINEAR; - - packet_addr->COUNT_UNION.count = copy_size; - - packet_addr->SRC_ADDR_LO_UNION.src_addr_31_0 = ptrlow32(cur_src); - packet_addr->SRC_ADDR_HI_UNION.src_addr_63_32 = ptrhigh32(cur_src); - - packet_addr->DST_ADDR_LO_UNION.dst_addr_31_0 = ptrlow32(cur_dst); - packet_addr->DST_ADDR_HI_UNION.dst_addr_63_32 = ptrhigh32(cur_dst); - - cmd_addr += linear_copy_command_size_; - cur_size += copy_size; - } - - assert(cur_size == size); -} - -void BlitSdma::BuildPollCommand(char* cmd_addr, void* addr, - uint32_t reference) { - SDMA_PKT_POLL_REGMEM* packet_addr = - reinterpret_cast(cmd_addr); - - memset(packet_addr, 0, sizeof(SDMA_PKT_POLL_REGMEM)); - - packet_addr->HEADER_UNION.op = SDMA_OP_POLL_REGMEM; - packet_addr->HEADER_UNION.mem_poll = 1; - packet_addr->HEADER_UNION.func = 0x3; // IsEqual. - packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(addr); - packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(addr); - - packet_addr->VALUE_UNION.value = reference; - - packet_addr->MASK_UNION.mask = 0xffffffff; // Compare the whole content. - - packet_addr->DW5_UNION.interval = 0x04; - packet_addr->DW5_UNION.retry_count = 0xfff; // Retry forever. -} - -void BlitSdma::BuildAtomicDecrementCommand(char* cmd_addr, void* addr) { - SDMA_PKT_ATOMIC* packet_addr = reinterpret_cast(cmd_addr); - - memset(packet_addr, 0, sizeof(SDMA_PKT_ATOMIC)); - - packet_addr->HEADER_UNION.op = SDMA_OP_ATOMIC; - packet_addr->HEADER_UNION.operation = SDMA_ATOMIC_ADD64; - - packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(addr); - packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(addr); - - packet_addr->SRC_DATA_LO_UNION.src_data_31_0 = 0xffffffff; - packet_addr->SRC_DATA_HI_UNION.src_data_63_32 = 0xffffffff; -} -} // namespace amd diff --git a/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp deleted file mode 100644 index fefa17e60e..0000000000 --- a/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp +++ /dev/null @@ -1,329 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/amd_cpu_agent.h" - -#include -#include - -#include "core/inc/amd_memory_region.h" -#include "core/inc/host_queue.h" - -#include "hsa_ext_image.h" - -namespace amd { -CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props) - : core::Agent(node, kAmdCpuDevice), properties_(node_props) { - InitRegionList(); - - InitCacheList(); -} - -CpuAgent::~CpuAgent() { - std::for_each(regions_.begin(), regions_.end(), DeleteObject()); - regions_.clear(); -} - -void CpuAgent::InitRegionList() { - const bool is_apu_node = (properties_.NumFComputeCores > 0); - - std::vector mem_props(properties_.NumMemoryBanks); - if (HSAKMT_STATUS_SUCCESS == - hsaKmtGetNodeMemoryProperties(node_id(), properties_.NumMemoryBanks, - &mem_props[0])) { - std::vector::iterator system_prop = - std::find_if(mem_props.begin(), mem_props.end(), - [](HsaMemoryProperties prop) -> bool { - return (prop.SizeInBytes > 0 && prop.HeapType == HSA_HEAPTYPE_SYSTEM); - }); - - if (system_prop != mem_props.end()) { - MemoryRegion* system_region_fine = - new MemoryRegion(true, is_apu_node, this, *system_prop); - - regions_.push_back(system_region_fine); - - if (!is_apu_node) { - MemoryRegion* system_region_coarse = - new MemoryRegion(false, is_apu_node, this, *system_prop); - - regions_.push_back(system_region_coarse); - } - } else { - HsaMemoryProperties system_props; - std::memset(&system_props, 0, sizeof(HsaMemoryProperties)); - - const uintptr_t system_base = os::GetUserModeVirtualMemoryBase(); - const size_t system_physical_size = os::GetUsablePhysicalHostMemorySize(); - assert(system_physical_size != 0); - - system_props.HeapType = HSA_HEAPTYPE_SYSTEM; - system_props.SizeInBytes = (HSAuint64)system_physical_size; - system_props.VirtualBaseAddress = (HSAuint64)(system_base); - - MemoryRegion* system_region = - new MemoryRegion(true, is_apu_node, this, system_props); - - regions_.push_back(system_region); - } - } -} - -void CpuAgent::InitCacheList() { - // Get CPU cache information. - cache_props_.resize(properties_.NumCaches); - if (HSAKMT_STATUS_SUCCESS != - hsaKmtGetNodeCacheProperties(node_id(), properties_.CComputeIdLo, - properties_.NumCaches, &cache_props_[0])) { - cache_props_.clear(); - } else { - // Only store CPU D-cache. - for (size_t cache_id = 0; cache_id < cache_props_.size(); ++cache_id) { - const HsaCacheType type = cache_props_[cache_id].CacheType; - if (type.ui32.CPU != 1 || type.ui32.Instruction == 1) { - cache_props_.erase(cache_props_.begin() + cache_id); - --cache_id; - } - } - } -} - -hsa_status_t CpuAgent::VisitRegion(bool include_peer, - hsa_status_t (*callback)(hsa_region_t region, - void* data), - void* data) const { - if (!include_peer) { - return VisitRegion(regions_, callback, data); - } - - // Expose all system regions in the system. - hsa_status_t stat = VisitRegion( - core::Runtime::runtime_singleton_->system_regions_fine(), callback, data); - if (stat != HSA_STATUS_SUCCESS) { - return stat; - } - - return VisitRegion(core::Runtime::runtime_singleton_->system_regions_coarse(), - callback, data); -} - -hsa_status_t CpuAgent::VisitRegion( - const std::vector& regions, - hsa_status_t (*callback)(hsa_region_t region, void* data), - void* data) const { - for (const core::MemoryRegion* region : regions) { - hsa_region_t region_handle = core::MemoryRegion::Convert(region); - hsa_status_t status = callback(region_handle, data); - if (status != HSA_STATUS_SUCCESS) { - return status; - } - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t CpuAgent::IterateRegion( - hsa_status_t (*callback)(hsa_region_t region, void* data), - void* data) const { - return VisitRegion(true, callback, data); -} - -hsa_status_t CpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const { - const size_t kNameSize = 64; // agent, and vendor name size limit - - const size_t attribute_u = static_cast(attribute); - switch (attribute_u) { - case HSA_AGENT_INFO_NAME: - // TODO: hardcode for now, wait until SWDEV-88894 implemented - std::memset(value, 0, kNameSize); - std::memcpy(value, "CPU Device", sizeof("CPU Device")); - break; - case HSA_AGENT_INFO_VENDOR_NAME: - // TODO: hardcode for now, wait until SWDEV-88894 implemented - std::memset(value, 0, kNameSize); - std::memcpy(value, "CPU", sizeof("CPU")); - break; - case HSA_AGENT_INFO_FEATURE: - *((hsa_agent_feature_t*)value) = static_cast(0); - break; - case HSA_AGENT_INFO_MACHINE_MODEL: -#if defined(HSA_LARGE_MODEL) - *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_LARGE; -#else - *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_SMALL; -#endif - break; - case HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES: - case HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE: - // TODO: validate if this is true. - *((hsa_default_float_rounding_mode_t*)value) = - HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR; - break; - case HSA_AGENT_INFO_FAST_F16_OPERATION: - // TODO: validate if this is trye. - *((bool*)value) = false; - break; - case HSA_AGENT_INFO_PROFILE: - *((hsa_profile_t*)value) = HSA_PROFILE_FULL; - break; - case HSA_AGENT_INFO_WAVEFRONT_SIZE: - *((uint32_t*)value) = 0; - break; - case HSA_AGENT_INFO_WORKGROUP_MAX_DIM: - std::memset(value, 0, sizeof(uint16_t) * 3); - break; - case HSA_AGENT_INFO_WORKGROUP_MAX_SIZE: - *((uint32_t*)value) = 0; - break; - case HSA_AGENT_INFO_GRID_MAX_DIM: - std::memset(value, 0, sizeof(hsa_dim3_t)); - break; - case HSA_AGENT_INFO_GRID_MAX_SIZE: - *((uint32_t*)value) = 0; - break; - case HSA_AGENT_INFO_FBARRIER_MAX_SIZE: - // TODO: ? - *((uint32_t*)value) = 0; - break; - case HSA_AGENT_INFO_QUEUES_MAX: - *((uint32_t*)value) = 0; - break; - case HSA_AGENT_INFO_QUEUE_MIN_SIZE: - *((uint32_t*)value) = 0; - break; - case HSA_AGENT_INFO_QUEUE_MAX_SIZE: - *((uint32_t*)value) = 0; - break; - case HSA_AGENT_INFO_QUEUE_TYPE: - *((hsa_queue_type_t*)value) = static_cast(0); - break; - case HSA_AGENT_INFO_NODE: - // TODO: associate with OS NUMA support (numactl / GetNumaProcessorNode). - *((uint32_t*)value) = node_id(); - break; - case HSA_AGENT_INFO_DEVICE: - *((hsa_device_type_t*)value) = HSA_DEVICE_TYPE_CPU; - break; - case HSA_AGENT_INFO_CACHE_SIZE: { - std::memset(value, 0, sizeof(uint32_t) * 4); - - assert(cache_props_.size() > 0 && "CPU cache info missing."); - const size_t num_cache = cache_props_.size(); - for (size_t i = 0; i < num_cache; ++i) { - const uint32_t line_level = cache_props_[i].CacheLevel; - ((uint32_t*)value)[line_level - 1] = cache_props_[i].CacheSize * 1024; - } - } break; - case HSA_AGENT_INFO_ISA: - ((hsa_isa_t*)value)->handle = 0; - break; - case HSA_AGENT_INFO_EXTENSIONS: - memset(value, 0, sizeof(uint8_t) * 128); - break; - case HSA_AGENT_INFO_VERSION_MAJOR: - *((uint16_t*)value) = 1; - break; - case HSA_AGENT_INFO_VERSION_MINOR: - *((uint16_t*)value) = 0; - break; - case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS: - *((uint32_t*)value) = 0; - break; - case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS: - memset(value, 0, sizeof(uint32_t) * 2); - break; - case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS: - memset(value, 0, sizeof(uint32_t) * 3); - break; - case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS: - *((uint32_t*)value) = 0; - break; - case HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES: - case HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES: - case HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS: - *((uint32_t*)value) = 0; - break; - case HSA_AMD_AGENT_INFO_CHIP_ID: - *((uint32_t*)value) = properties_.DeviceId; - break; - case HSA_AMD_AGENT_INFO_CACHELINE_SIZE: - // TODO: hardcode for now. - *((uint32_t*)value) = 64; - break; - case HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT: - *((uint32_t*)value) = properties_.NumCPUCores; - break; - case HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY: - *((uint32_t*)value) = properties_.MaxEngineClockMhzCCompute; - break; - case HSA_AMD_AGENT_INFO_DRIVER_NODE_ID: - *((uint32_t*)value) = node_id(); - break; - case HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS: - *((uint32_t*)value) = static_cast( - 1 << properties_.Capability.ui32.WatchPointsTotalBits); - break; - case HSA_AMD_AGENT_INFO_BDFID: - *((uint32_t*)value) = static_cast(properties_.LocationId); - break; - default: - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - break; - } - return HSA_STATUS_SUCCESS; -} - -hsa_status_t CpuAgent::QueueCreate(size_t size, hsa_queue_type_t queue_type, - core::HsaEventCallback event_callback, - void* data, uint32_t private_segment_size, - uint32_t group_segment_size, - core::Queue** queue) { - // No HW AQL packet processor on CPU device. - return HSA_STATUS_ERROR; -} - -} // namespace amd diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp deleted file mode 100644 index 5962a0c1b2..0000000000 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ /dev/null @@ -1,863 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/amd_gpu_agent.h" - -#include -#include -#include -#include -#include - -#include "core/inc/amd_aql_queue.h" -#include "core/inc/amd_blit_kernel.h" -#include "core/inc/amd_blit_sdma.h" -#include "core/inc/amd_memory_region.h" -#include "core/inc/interrupt_signal.h" -#include "core/inc/isa.h" -#include "core/inc/runtime.h" - -#include "utils/sp3/sp3.h" - -#include "hsa_ext_image.h" - -// Size of scratch (private) segment pre-allocated per thread, in bytes. -#define DEFAULT_SCRATCH_BYTES_PER_THREAD 2048 - -namespace amd { -GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props) - : GpuAgentInt(node), - properties_(node_props), - current_coherency_type_(HSA_AMD_COHERENCY_TYPE_COHERENT), - blit_(NULL), - is_kv_device_(false), - trap_code_buf_(NULL), - trap_code_buf_size_(0), - ape1_base_(0), - ape1_size_(0) { - const bool is_apu_node = (properties_.NumCPUCores > 0); - profile_ = (is_apu_node) ? HSA_PROFILE_FULL : HSA_PROFILE_BASE; - - HSAKMT_STATUS err = hsaKmtGetClockCounters(node_id(), &t0_); - t1_ = t0_; - assert(err == HSAKMT_STATUS_SUCCESS && "hsaGetClockCounters error"); - - // Set instruction set architecture via node property, only on GPU device. - isa_ = (core::Isa*)core::IsaRegistry::GetIsa(core::Isa::Version( - node_props.EngineId.ui32.Major, node_props.EngineId.ui32.Minor, - node_props.EngineId.ui32.Stepping)); - // Check if the device is Kaveri, only on GPU device. - if (isa_->GetMajorVersion() == 7 && isa_->GetMinorVersion() == 0 && - isa_->GetStepping() == 0) { - is_kv_device_ = true; - } - - current_coherency_type((profile_ == HSA_PROFILE_FULL) - ? HSA_AMD_COHERENCY_TYPE_COHERENT - : HSA_AMD_COHERENCY_TYPE_NONCOHERENT); - - max_queues_ = - static_cast(atoi(os::GetEnvVar("HSA_MAX_QUEUES").c_str())); -#if !defined(HSA_LARGE_MODEL) || !defined(__linux__) - if (max_queues_ == 0) { - max_queues_ = 10; - } - max_queues_ = std::min(10U, max_queues_); -#else - if (max_queues_ == 0) { - max_queues_ = 128; - } - max_queues_ = std::min(128U, max_queues_); -#endif - - // Populate region list. - InitRegionList(); - - // Reserve memory for scratch. - InitScratchPool(); - - // Populate cache list. - InitCacheList(); - - // Bind the second-level trap handler to this node. - BindTrapHandler(); -} - -GpuAgent::~GpuAgent() { - if (blit_ != NULL) { - hsa_status_t status = blit_->Destroy(); - assert(status == HSA_STATUS_SUCCESS); - - delete blit_; - blit_ = NULL; - } - - if (ape1_base_ != 0) { - _aligned_free(reinterpret_cast(ape1_base_)); - } - - if (scratch_pool_.base() != NULL) { - hsaKmtFreeMemory(scratch_pool_.base(), scratch_pool_.size()); - } - - if (trap_code_buf_ != NULL) { - ReleaseShader(trap_code_buf_, trap_code_buf_size_); - } - - std::for_each(regions_.begin(), regions_.end(), DeleteObject()); - regions_.clear(); -} - -void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name, - void*& code_buf, size_t& code_buf_size) { -#ifdef __linux__ // No VS builds of libsp3 available right now - // Assemble source string with libsp3. - sp3_context* sp3 = sp3_new(); - - switch (isa_->GetMajorVersion()) { - case 7: - sp3_setasic(sp3, "CI"); - break; - case 8: - sp3_setasic(sp3, "VI"); - break; - default: - assert(false && "SP3 assembly not supported on this agent"); - } - - sp3_parse_string(sp3, src_sp3); - sp3_shader* code_sp3_meta = sp3_compile(sp3, func_name); - - // Allocate a GPU-visible buffer for the trap shader. - HsaMemFlags code_buf_flags = {0}; - code_buf_flags.ui32.HostAccess = 1; - code_buf_flags.ui32.ExecuteAccess = 1; - code_buf_flags.ui32.NoSubstitute = 1; - - size_t code_size = code_sp3_meta->size * sizeof(uint32_t); - code_buf_size = AlignUp(code_size, 0x1000); - - HSAKMT_STATUS err = - hsaKmtAllocMemory(node_id(), code_buf_size, code_buf_flags, &code_buf); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtAllocMemory(Trap) failed"); - - err = hsaKmtMapMemoryToGPU(code_buf, code_buf_size, NULL); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtMapMemoryToGPU(Trap) failed"); - - // Copy trap handler code into the GPU-visible buffer. - memset(code_buf, 0, code_buf_size); - memcpy(code_buf, code_sp3_meta->data, code_size); - - // Release SP3 resources. - sp3_free_shader(code_sp3_meta); - sp3_close(sp3); -#endif -} - -void GpuAgent::ReleaseShader(void* code_buf, size_t code_buf_size) { - hsaKmtUnmapMemoryToGPU(code_buf); - hsaKmtFreeMemory(code_buf, code_buf_size); -} - -void GpuAgent::InitRegionList() { - const bool is_apu_node = (properties_.NumCPUCores > 0); - - std::vector mem_props(properties_.NumMemoryBanks); - if (HSAKMT_STATUS_SUCCESS == - hsaKmtGetNodeMemoryProperties(node_id(), properties_.NumMemoryBanks, - &mem_props[0])) { - for (uint32_t mem_idx = 0; mem_idx < properties_.NumMemoryBanks; - ++mem_idx) { - // Ignore the one(s) with unknown size. - if (mem_props[mem_idx].SizeInBytes == 0) { - continue; - } - - switch (mem_props[mem_idx].HeapType) { - case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: - case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC: - if (!is_apu_node) { - mem_props[mem_idx].VirtualBaseAddress = 0; - } - case HSA_HEAPTYPE_GPU_LDS: - case HSA_HEAPTYPE_GPU_SCRATCH: - case HSA_HEAPTYPE_DEVICE_SVM: { - MemoryRegion* region = - new MemoryRegion(false, false, this, mem_props[mem_idx]); - - regions_.push_back(region); - break; - } - default: - continue; - } - } - } -} - -void GpuAgent::InitScratchPool() { - HsaMemFlags flags; - flags.Value = 0; - flags.ui32.Scratch = 1; - flags.ui32.HostAccess = 1; - - scratch_per_thread_ = atoi(os::GetEnvVar("HSA_SCRATCH_MEM").c_str()); - if (scratch_per_thread_ == 0) - scratch_per_thread_ = DEFAULT_SCRATCH_BYTES_PER_THREAD; - - // Scratch length is: waves/CU * threads/wave * queues * #CUs * - // scratch/thread - const uint32_t num_cu = - properties_.NumFComputeCores / properties_.NumSIMDPerCU; - queue_scratch_len_ = 0; - queue_scratch_len_ = AlignUp(32 * 64 * num_cu * scratch_per_thread_, 65536); - size_t max_scratch_len = queue_scratch_len_ * max_queues_; - -#if defined(HSA_LARGE_MODEL) && defined(__linux__) - // For 64-bit linux use max queues unless otherwise specified - if ((max_scratch_len == 0) || (max_scratch_len > 4294967296)) { - max_scratch_len = 4294967296; // 4GB apeture max - } -#endif - - void* scratch_base; - HSAKMT_STATUS err = - hsaKmtAllocMemory(node_id(), max_scratch_len, flags, &scratch_base); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtAllocMemory(Scratch) failed"); - assert(IsMultipleOf(scratch_base, 0x1000) && - "Scratch base is not page aligned!"); - - scratch_pool_. ~SmallHeap(); - if (HSAKMT_STATUS_SUCCESS == err) { - new (&scratch_pool_) SmallHeap(scratch_base, max_scratch_len); - } else { - new (&scratch_pool_) SmallHeap(NULL, 0); - } -} - -void GpuAgent::InitCacheList() { - // Get GPU cache information. - // Similar to getting CPU cache but here we use FComputeIdLo. - cache_props_.resize(properties_.NumCaches); - if (HSAKMT_STATUS_SUCCESS != - hsaKmtGetNodeCacheProperties(node_id(), properties_.FComputeIdLo, - properties_.NumCaches, &cache_props_[0])) { - cache_props_.clear(); - } else { - // Only store GPU D-cache. - for (size_t cache_id = 0; cache_id < cache_props_.size(); ++cache_id) { - const HsaCacheType type = cache_props_[cache_id].CacheType; - if (type.ui32.HSACU != 1 || type.ui32.Instruction == 1) { - cache_props_.erase(cache_props_.begin() + cache_id); - --cache_id; - } - } - } -} - -hsa_status_t GpuAgent::IterateRegion( - hsa_status_t (*callback)(hsa_region_t region, void* data), - void* data) const { - return VisitRegion(true, callback, data); -} - -hsa_status_t GpuAgent::VisitRegion(bool include_peer, - hsa_status_t (*callback)(hsa_region_t region, - void* data), - void* data) const { - if (include_peer) { - // Only expose system, local, and LDS memory of the blit agent. - if (this->node_id() == - core::Runtime::runtime_singleton_->blit_agent()->node_id()) { - hsa_status_t stat = VisitRegion(regions_, callback, data); - if (stat != HSA_STATUS_SUCCESS) { - return stat; - } - } - - // Also expose system regions accessible by this agent. - hsa_status_t stat = - VisitRegion(core::Runtime::runtime_singleton_->system_regions_fine(), - callback, data); - if (stat != HSA_STATUS_SUCCESS) { - return stat; - } - - return VisitRegion( - core::Runtime::runtime_singleton_->system_regions_coarse(), callback, - data); - } - - // Only expose system, local, and LDS memory of this agent. - return VisitRegion(regions_, callback, data); -} - -hsa_status_t GpuAgent::VisitRegion( - const std::vector& regions, - hsa_status_t (*callback)(hsa_region_t region, void* data), - void* data) const { - for (const core::MemoryRegion* region : regions) { - const amd::MemoryRegion* amd_region = - reinterpret_cast(region); - - // Only expose system, local, and LDS memory. - if (amd_region->IsSystem() || amd_region->IsLocalMemory() || - amd_region->IsLDS()) { - hsa_region_t region_handle = core::MemoryRegion::Convert(region); - hsa_status_t status = callback(region_handle, data); - if (status != HSA_STATUS_SUCCESS) { - return status; - } - } - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t GpuAgent::InitDma() { - // Try create SDMA blit first. - std::string sdma_enable = os::GetEnvVar("HSA_ENABLE_SDMA"); - - if (sdma_enable != "0" && isa_->GetMajorVersion() == 8 && - isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 3) { - blit_ = new BlitSdma(); - - if (blit_->Initialize(*this) == HSA_STATUS_SUCCESS) { - return HSA_STATUS_SUCCESS; - } - - // Fall back to blit kernel if SDMA is unavailable. - blit_->Destroy(); - delete blit_; - blit_ = NULL; - } - - assert(blit_ == NULL); - blit_ = new BlitKernel(); - - if (blit_->Initialize(*this) != HSA_STATUS_SUCCESS) { - blit_->Destroy(); - delete blit_; - blit_ = NULL; - - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t GpuAgent::DmaCopy(void* dst, const void* src, size_t size) { - if (blit_ == NULL) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - return blit_->SubmitLinearCopyCommand(dst, src, size); -} - -hsa_status_t GpuAgent::DmaCopy(void* dst, const void* src, size_t size, - std::vector& dep_signals, - core::Signal& out_signal) { - if (blit_ == NULL) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - // TODO(bwicakso): temporarily disable wait on thunk event if the out_signal - // is an interrupt signal object. Remove this when SDMA handle interrupt - // packet properly. - if (out_signal.EopEvent() != NULL) { - static_cast(out_signal).DisableWaitEvent(); - } - - return blit_->SubmitLinearCopyCommand(dst, src, size, dep_signals, - out_signal); -} - -hsa_status_t GpuAgent::DmaFill(void* ptr, uint32_t value, size_t count) { - if (blit_ == NULL) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - return blit_->SubmitLinearFillCommand(ptr, value, count); -} - -hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const { - const size_t kNameSize = 64; // agent, and vendor name size limit - - const core::ExtensionEntryPoints& extensions = - core::Runtime::runtime_singleton_->extensions_; - - hsa_agent_t agent = core::Agent::Convert(this); - - const size_t attribute_u = static_cast(attribute); - switch (attribute_u) { - case HSA_AGENT_INFO_NAME: - // TODO(bwicakso): hardcode for now. - std::memset(value, 0, kNameSize); - if (isa_->GetMajorVersion() == 7) { - std::memcpy(value, "Kaveri", sizeof("Kaveri")); - } else if (isa_->GetMajorVersion() == 8) { - if (isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 2) { - std::memcpy(value, "Tonga", sizeof("Tonga")); - } else if (isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 3) { - std::memcpy(value, "Fiji", sizeof("Fiji")); - } else { - std::memcpy(value, "Carrizo", sizeof("Carrizo")); - } - } else { - std::memcpy(value, "Unknown", sizeof("Unknown")); - } - break; - case HSA_AGENT_INFO_VENDOR_NAME: - std::memset(value, 0, kNameSize); - std::memcpy(value, "AMD", sizeof("AMD")); - break; - case HSA_AGENT_INFO_FEATURE: - *((hsa_agent_feature_t*)value) = HSA_AGENT_FEATURE_KERNEL_DISPATCH; - break; - case HSA_AGENT_INFO_MACHINE_MODEL: -#if defined(HSA_LARGE_MODEL) - *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_LARGE; -#else - *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_SMALL; -#endif - break; - case HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES: - case HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE: - *((hsa_default_float_rounding_mode_t*)value) = - HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR; - break; - case HSA_AGENT_INFO_FAST_F16_OPERATION: - *((bool*)value) = false; - break; - case HSA_AGENT_INFO_PROFILE: - *((hsa_profile_t*)value) = profile_; - break; - case HSA_AGENT_INFO_WAVEFRONT_SIZE: - *((uint32_t*)value) = properties_.WaveFrontSize; - break; - case HSA_AGENT_INFO_WORKGROUP_MAX_DIM: { - // TODO: must be per-device - const uint16_t group_size[3] = {1024, 1024, 1024}; - std::memcpy(value, group_size, sizeof(group_size)); - } break; - case HSA_AGENT_INFO_WORKGROUP_MAX_SIZE: - // TODO: must be per-device - *((uint32_t*)value) = 1024; - break; - case HSA_AGENT_INFO_GRID_MAX_DIM: { - const hsa_dim3_t grid_size = {UINT32_MAX, UINT32_MAX, UINT32_MAX}; - std::memcpy(value, &grid_size, sizeof(hsa_dim3_t)); - } break; - case HSA_AGENT_INFO_GRID_MAX_SIZE: - *((uint32_t*)value) = UINT32_MAX; - break; - case HSA_AGENT_INFO_FBARRIER_MAX_SIZE: - // TODO: to confirm - *((uint32_t*)value) = 32; - break; - case HSA_AGENT_INFO_QUEUES_MAX: - *((uint32_t*)value) = max_queues_; - break; - case HSA_AGENT_INFO_QUEUE_MIN_SIZE: - *((uint32_t*)value) = minAqlSize_; - break; - case HSA_AGENT_INFO_QUEUE_MAX_SIZE: - *((uint32_t*)value) = maxAqlSize_; - break; - case HSA_AGENT_INFO_QUEUE_TYPE: - *((hsa_queue_type_t*)value) = HSA_QUEUE_TYPE_MULTI; - break; - case HSA_AGENT_INFO_NODE: - // TODO: associate with OS NUMA support (numactl / GetNumaProcessorNode). - *((uint32_t*)value) = node_id(); - break; - case HSA_AGENT_INFO_DEVICE: - *((hsa_device_type_t*)value) = HSA_DEVICE_TYPE_GPU; - break; - case HSA_AGENT_INFO_CACHE_SIZE: - std::memset(value, 0, sizeof(uint32_t) * 4); - // TODO: no GPU cache info from KFD. Hardcode for now. - // GCN whitepaper: L1 data cache is 16KB. - ((uint32_t*)value)[0] = 16 * 1024; - break; - case HSA_AGENT_INFO_ISA: - *((hsa_isa_t*)value) = core::Isa::Handle(isa_); - break; - case HSA_AGENT_INFO_EXTENSIONS: - memset(value, 0, sizeof(uint8_t) * 128); - - if (extensions.table.hsa_ext_program_finalize_fn != NULL) { - *((uint8_t*)value) = 1 << HSA_EXTENSION_FINALIZER; - } - - if (profile_ == HSA_PROFILE_FULL && - extensions.table.hsa_ext_image_create_fn != NULL) { - // TODO(bwicakso): only APU supports images currently. - *((uint8_t*)value) |= 1 << HSA_EXTENSION_IMAGES; - } - - *((uint8_t*)value) |= 1 << HSA_EXTENSION_AMD_PROFILER; - - break; - case HSA_AGENT_INFO_VERSION_MAJOR: - *((uint16_t*)value) = 1; - break; - case HSA_AGENT_INFO_VERSION_MINOR: - *((uint16_t*)value) = 0; - break; - case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS: - case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS: - return hsa_amd_image_get_info_max_dim(public_handle(), attribute, value); - case HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES: - // TODO: hardcode based on OCL constants. - *((uint32_t*)value) = 128; - break; - case HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES: - // TODO: hardcode based on OCL constants. - *((uint32_t*)value) = 64; - break; - case HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS: - // TODO: hardcode based on OCL constants. - *((uint32_t*)value) = 16; - case HSA_AMD_AGENT_INFO_CHIP_ID: - *((uint32_t*)value) = properties_.DeviceId; - break; - case HSA_AMD_AGENT_INFO_CACHELINE_SIZE: - // TODO: hardcode for now. - // GCN whitepaper: cache line size is 64 byte long. - *((uint32_t*)value) = 64; - break; - case HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT: - *((uint32_t*)value) = - (properties_.NumFComputeCores / properties_.NumSIMDPerCU); - break; - case HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY: - *((uint32_t*)value) = properties_.MaxEngineClockMhzFCompute; - break; - case HSA_AMD_AGENT_INFO_DRIVER_NODE_ID: - *((uint32_t*)value) = node_id(); - break; - case HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS: - *((uint32_t*)value) = static_cast( - 1 << properties_.Capability.ui32.WatchPointsTotalBits); - break; - case HSA_AMD_AGENT_INFO_BDFID: - *((uint32_t*)value) = static_cast(properties_.LocationId); - break; - default: - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - break; - } - return HSA_STATUS_SUCCESS; -} - -hsa_status_t GpuAgent::QueueCreate(size_t size, hsa_queue_type_t queue_type, - core::HsaEventCallback event_callback, - void* data, uint32_t private_segment_size, - uint32_t group_segment_size, - core::Queue** queue) { - // AQL queues must be a power of two in length. - if (!IsPowerOfTwo(size)) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - // Enforce max size - if (size > maxAqlSize_) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - // Allocate scratch memory - ScratchInfo scratch; -#if defined(HSA_LARGE_MODEL) && defined(__linux__) - if (core::g_use_interrupt_wait) { - if (private_segment_size == UINT_MAX) { - private_segment_size = - (profile_ == HSA_PROFILE_BASE) ? 0 : scratch_per_thread_; - } - - if (private_segment_size > 262128) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - scratch.size_per_thread = AlignUp(private_segment_size, 16); - if (scratch.size_per_thread > 262128) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - const uint32_t num_cu = - properties_.NumFComputeCores / properties_.NumSIMDPerCU; - scratch.size = scratch.size_per_thread * 32 * 64 * num_cu; - } else { - scratch.size = queue_scratch_len_; - scratch.size_per_thread = scratch_per_thread_; - } -#else - scratch.size = queue_scratch_len_; - scratch.size_per_thread = scratch_per_thread_; -#endif - scratch.queue_base = NULL; - if (scratch.size != 0) { - AcquireQueueScratch(scratch); - if (scratch.queue_base == NULL) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - } - - // Create an HW AQL queue - AqlQueue* hw_queue = new AqlQueue(this, size, node_id(), scratch, - event_callback, data, is_kv_device_); - if (hw_queue && hw_queue->IsValid()) { - // return queue - *queue = hw_queue; - return HSA_STATUS_SUCCESS; - } - // If reached here its always an ERROR. - delete hw_queue; - ReleaseQueueScratch(scratch.queue_base); - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; -} - -void GpuAgent::AcquireQueueScratch(ScratchInfo& scratch) { - if (scratch.size == 0) { - scratch.size = queue_scratch_len_; - scratch.size_per_thread = scratch_per_thread_; - } - - ScopedAcquire lock(&scratch_lock_); - scratch.queue_base = scratch_pool_.alloc(scratch.size); - scratch.queue_process_offset = - uintptr_t(scratch.queue_base) - uintptr_t(scratch_pool_.base()); - - if ((scratch.queue_base != NULL) && (profile_ == HSA_PROFILE_BASE)) { - HSAuint64 alternate_va; - if (HSAKMT_STATUS_SUCCESS != - hsaKmtMapMemoryToGPU(scratch.queue_base, scratch.size, &alternate_va)) { - assert(false && "Map scratch subrange failed!"); - scratch_pool_.free(scratch.queue_base); - scratch.queue_base = NULL; - } - } -} - -void GpuAgent::ReleaseQueueScratch(void* base) { - if (base == NULL) { - return; - } - - ScopedAcquire lock(&scratch_lock_); - if (profile_ == HSA_PROFILE_BASE) { - if (HSAKMT_STATUS_SUCCESS != hsaKmtUnmapMemoryToGPU(base)) { - assert(false && "Unmap scratch subrange failed!"); - } - } - scratch_pool_.free(base); -} - -void GpuAgent::TranslateTime(core::Signal* signal, - hsa_amd_profiling_dispatch_time_t& time) { - // Ensure interpolation - ScopedAcquire lock(&t1_lock_); - if (t1_.GPUClockCounter < signal->signal_.end_ts) { - SyncClocks(); - } - - time.start = uint64_t( - (double(int64_t(t0_.SystemClockCounter - t1_.SystemClockCounter)) / - double(int64_t(t0_.GPUClockCounter - t1_.GPUClockCounter))) * - double(int64_t(signal->signal_.start_ts - t1_.GPUClockCounter)) + - double(t1_.SystemClockCounter)); - time.end = uint64_t( - (double(int64_t(t0_.SystemClockCounter - t1_.SystemClockCounter)) / - double(int64_t(t0_.GPUClockCounter - t1_.GPUClockCounter))) * - double(int64_t(signal->signal_.end_ts - t1_.GPUClockCounter)) + - double(t1_.SystemClockCounter)); -} - -uint64_t GpuAgent::TranslateTime(uint64_t tick) { - ScopedAcquire lock(&t1_lock_); - SyncClocks(); - - uint64_t system_tick = 0; - system_tick = uint64_t( - (double(int64_t(t0_.SystemClockCounter - t1_.SystemClockCounter)) / - double(int64_t(t0_.GPUClockCounter - t1_.GPUClockCounter))) * - double(int64_t(tick - t1_.GPUClockCounter)) + - double(t1_.SystemClockCounter)); - return system_tick; -} - -bool GpuAgent::current_coherency_type(hsa_amd_coherency_type_t type) { - if (!is_kv_device_) { - current_coherency_type_ = type; - return true; - } - - ScopedAcquire Lock(&coherency_lock_); - - if (ape1_base_ == 0 && ape1_size_ == 0) { - static const size_t kApe1Alignment = 64 * 1024; - ape1_size_ = kApe1Alignment; - ape1_base_ = reinterpret_cast( - _aligned_malloc(ape1_size_, kApe1Alignment)); - assert((ape1_base_ != 0) && ("APE1 allocation failed")); - } else if (type == current_coherency_type_) { - return true; - } - - HSA_CACHING_TYPE type0, type1; - if (type == HSA_AMD_COHERENCY_TYPE_COHERENT) { - type0 = HSA_CACHING_CACHED; - type1 = HSA_CACHING_NONCACHED; - } else { - type0 = HSA_CACHING_NONCACHED; - type1 = HSA_CACHING_CACHED; - } - - if (hsaKmtSetMemoryPolicy(node_id(), type0, type1, - reinterpret_cast(ape1_base_), - ape1_size_) != HSAKMT_STATUS_SUCCESS) { - return false; - } - current_coherency_type_ = type; - return true; -} - -uint16_t GpuAgent::GetMicrocodeVersion() const { - return (properties_.EngineId.ui32.uCode); -} - -void GpuAgent::SyncClocks() { - HSAKMT_STATUS err = hsaKmtGetClockCounters(node_id(), &t1_); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaGetClockCounters error"); -} - -void GpuAgent::BindTrapHandler() { -#ifdef __linux__ // No raw string literal support in VS builds right now - const char* src_sp3 = R"( - var s_trap_info_lo = ttmp0 - var s_trap_info_hi = ttmp1 - var s_tmp0 = ttmp2 - var s_tmp1 = ttmp3 - var s_tmp2 = ttmp4 - var s_tmp3 = ttmp5 - - shader TrapHandler - type(CS) - - // Retrieve the queue inactive signal. - s_load_dwordx2 [s_tmp0, s_tmp1], s[0:1], 0xC0 - s_waitcnt lgkmcnt(0) - - // Mask all but one lane of the wavefront. - s_mov_b64 exec, 0x1 - - // Set queue signal value to unhandled exception error. - s_add_u32 s_tmp0, s_tmp0, 0x8 - s_addc_u32 s_tmp1, s_tmp1, 0x0 - v_mov_b32 v0, s_tmp0 - v_mov_b32 v1, s_tmp1 - v_mov_b32 v2, 0x80000000 - v_mov_b32 v3, 0x0 - flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] - s_waitcnt vmcnt(0) - - // Skip event if the signal was already set to unhandled exception. - v_cmp_eq_u64 vcc, v[0:1], v[2:3] - s_cbranch_vccnz L_SIGNAL_DONE - - // Check for a non-NULL signal event mailbox. - s_load_dwordx2 [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x8 - s_waitcnt lgkmcnt(0) - s_and_b64 [s_tmp2, s_tmp3], [s_tmp2, s_tmp3], [s_tmp2, s_tmp3] - s_cbranch_scc0 L_SIGNAL_DONE - - // Load the signal event value. - s_add_u32 s_tmp0, s_tmp0, 0x10 - s_addc_u32 s_tmp1, s_tmp1, 0x0 - s_load_dword s_tmp0, [s_tmp0, s_tmp1], 0x0 - s_waitcnt lgkmcnt(0) - - // Write the signal event value to the mailbox. - v_mov_b32 v0, s_tmp2 - v_mov_b32 v1, s_tmp3 - v_mov_b32 v2, s_tmp0 - flat_store_dword v[0:1], v2 - s_waitcnt vmcnt(0) - - // Send an interrupt to trigger event notification. - s_sendmsg sendmsg(MSG_INTERRUPT) - - L_SIGNAL_DONE: - // Halt wavefront and exit trap. - s_sethalt 1 - s_rfe_b64 [s_trap_info_lo, s_trap_info_hi] - end - )"; - - if (isa_->GetMajorVersion() == 7) { - // No trap handler support on Gfx7, soft error. - return; - } - - // Disable trap handler on Carrizo until KFD is fixed. - if (profile_ == HSA_PROFILE_FULL) { - return; - } - - // Assemble the trap handler source code. - AssembleShader(src_sp3, "TrapHandler", trap_code_buf_, trap_code_buf_size_); - - // Bind the trap handler to this node. - HSAKMT_STATUS err = hsaKmtSetTrapHandler(node_id(), trap_code_buf_, - trap_code_buf_size_, NULL, 0); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtSetTrapHandler() failed"); -#endif -} - -} // namespace diff --git a/runtime/hsa-runtime/core/runtime/amd_load_map.cpp b/runtime/hsa-runtime/core/runtime/amd_load_map.cpp deleted file mode 100644 index a3f9ac9c9a..0000000000 --- a/runtime/hsa-runtime/core/runtime/amd_load_map.cpp +++ /dev/null @@ -1,172 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include -#include "core/inc/amd_hsa_loader.hpp" -#include "core/inc/amd_load_map.h" -#include "core/inc/runtime.h" - -using amd::hsa::loader::Executable; -using amd::hsa::loader::LoadedCodeObject; -using amd::hsa::loader::LoadedSegment; - -hsa_status_t amd_executable_load_code_object( - hsa_executable_t executable, - hsa_agent_t agent, - hsa_code_object_t code_object, - const char *options, - amd_loaded_code_object_t *loaded_code_object) -{ - if (!core::Runtime::runtime_singleton_->IsOpen()) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; - } - if (nullptr == loaded_code_object) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - Executable *exec = Executable::Object(executable); - if (nullptr == exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - return exec->LoadCodeObject(agent, code_object, options, loaded_code_object); -} - -hsa_status_t amd_iterate_executables( - hsa_status_t (*callback)( - hsa_executable_t executable, - void *data), - void *data) -{ - if (!core::Runtime::runtime_singleton_->IsOpen()) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; - } - if (nullptr == callback) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - return core::Runtime::runtime_singleton_->loader()->IterateExecutables(callback, data); -} - -hsa_status_t amd_executable_iterate_loaded_code_objects( - hsa_executable_t executable, - hsa_status_t (*callback)( - amd_loaded_code_object_t loaded_code_object, - void *data), - void *data) -{ - if (!core::Runtime::runtime_singleton_->IsOpen()) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; - } - if (nullptr == callback) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - Executable *exec = Executable::Object(executable); - if (nullptr == exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - return exec->IterateLoadedCodeObjects(callback, data); -} - -hsa_status_t amd_loaded_code_object_get_info( - amd_loaded_code_object_t loaded_code_object, - amd_loaded_code_object_info_t attribute, - void *value) -{ - if (!core::Runtime::runtime_singleton_->IsOpen()) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; - } - if (nullptr == value) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - LoadedCodeObject *obj = LoadedCodeObject::Object(loaded_code_object); - if (nullptr == obj) { - // \todo: new error code: AMD_STATUS_ERROR_INVALID_LOADED_CODE_OBJECT. - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - return false == obj->GetInfo(attribute, value) ? - HSA_STATUS_ERROR_INVALID_ARGUMENT : HSA_STATUS_SUCCESS; -} - -hsa_status_t amd_loaded_code_object_iterate_loaded_segments( - amd_loaded_code_object_t loaded_code_object, - hsa_status_t (*callback)( - amd_loaded_segment_t loaded_segment, - void *data), - void *data) -{ - if (!core::Runtime::runtime_singleton_->IsOpen()) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; - } - if (nullptr == callback) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - LoadedCodeObject *obj = LoadedCodeObject::Object(loaded_code_object); - if (nullptr == obj) { - // \todo: new error code: AMD_STATUS_ERROR_INVALID_LOADED_CODE_OBJECT. - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - return obj->IterateLoadedSegments(callback, data); -} - -hsa_status_t amd_loaded_segment_get_info( - amd_loaded_segment_t loaded_segment, - amd_loaded_segment_info_t attribute, - void *value) -{ - if (!core::Runtime::runtime_singleton_->IsOpen()) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; - } - if (nullptr == value) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - LoadedSegment *obj = LoadedSegment::Object(loaded_segment); - if (nullptr == obj) { - // \todo: new error code: AMD_STATUS_ERROR_INVALID_LOADED_SEGMENT. - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - return false == obj->GetInfo(attribute, value) ? - HSA_STATUS_ERROR_INVALID_ARGUMENT : HSA_STATUS_SUCCESS; -} diff --git a/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp b/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp deleted file mode 100644 index ec7a91720b..0000000000 --- a/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp +++ /dev/null @@ -1,588 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/amd_loader_context.hpp" - -#include -#include -#include - -#include "core/inc/amd_gpu_agent.h" -#include "core/inc/amd_memory_region.h" -#include "core/util/os.h" - -#include -#include -#include "core/inc/hsa_internal.h" -#include "core/util/utils.h" -#include "inc/hsa_ext_amd.h" - -#if defined(_WIN32) || defined(_WIN64) -#include -#else -#include -#endif - -namespace { - -bool IsLocalRegion(const core::MemoryRegion *region) -{ - const amd::MemoryRegion *amd_region = (amd::MemoryRegion*)region; - if (nullptr == amd_region || !amd_region->IsLocalMemory()) { - return false; - } - return true; -} - -bool IsDebuggerRegistered() -{ - return false; - // Leaving code commented as it will be used later on - // return (("1" == os::GetEnvVar("HSA_EMULATE_AQL")) && - // (0 != os::GetEnvVar("HSA_TOOLS_LIB").size())); -} - -class SegmentMemory { -public: - virtual ~SegmentMemory() {} - virtual void* Address(size_t offset = 0) const = 0; - virtual void* HostAddress(size_t offset = 0) const = 0; - virtual bool Allocated() const = 0; - virtual bool Allocate(size_t size, size_t align, bool zero) = 0; - virtual bool Copy(size_t offset, const void *src, size_t size) = 0; - virtual void Free() = 0; - virtual bool Freeze() = 0; - -protected: - SegmentMemory() {} - -private: - SegmentMemory(const SegmentMemory&); - SegmentMemory& operator=(const SegmentMemory&); -}; - -class MallocedMemory final: public SegmentMemory { -public: - MallocedMemory(): SegmentMemory(), ptr_(nullptr), size_(0) {} - ~MallocedMemory() {} - - void* Address(size_t offset = 0) const override - { assert(this->Allocated()); return (char*)ptr_ + offset; } - void* HostAddress(size_t offset = 0) const override - { assert(false); return nullptr; } - bool Allocated() const override - { return nullptr != ptr_; } - - bool Allocate(size_t size, size_t align, bool zero) override; - bool Copy(size_t offset, const void *src, size_t size) override; - void Free() override; - bool Freeze() override; - -private: - MallocedMemory(const MallocedMemory&); - MallocedMemory& operator=(const MallocedMemory&); - - void *ptr_; - size_t size_; -}; - -bool MallocedMemory::Allocate(size_t size, size_t align, bool zero) -{ - assert(!this->Allocated()); - assert(0 < size); - assert(0 < align && 0 == (align & (align - 1))); - ptr_ = _aligned_malloc(size, align); - if (nullptr == ptr_) { - return false; - } - if (HSA_STATUS_SUCCESS != HSA::hsa_memory_register(ptr_, size)) { - _aligned_free(ptr_); - ptr_ = nullptr; - return false; - } - if (zero) { - memset(ptr_, 0x0, size); - } - size_ = size; - return true; -} - -bool MallocedMemory::Copy(size_t offset, const void *src, size_t size) -{ - assert(this->Allocated()); - assert(nullptr != src); - assert(0 < size); - memcpy(this->Address(offset), src, size); - return true; -} - -void MallocedMemory::Free() -{ - assert(this->Allocated()); - HSA::hsa_memory_deregister(ptr_, size_); - _aligned_free(ptr_); - ptr_ = nullptr; - size_ = 0; -} - -bool MallocedMemory::Freeze() -{ - assert(this->Allocated()); - return true; -} - -class MappedMemory final: public SegmentMemory { -public: - MappedMemory(bool is_kv = false): SegmentMemory(), is_kv_(is_kv), ptr_(nullptr), size_(0) {} - ~MappedMemory() {} - - void* Address(size_t offset = 0) const override - { assert(this->Allocated()); return (char*)ptr_ + offset; } - void* HostAddress(size_t offset = 0) const override - { assert(false); return nullptr; } - bool Allocated() const override - { return nullptr != ptr_; } - - bool Allocate(size_t size, size_t align, bool zero) override; - bool Copy(size_t offset, const void *src, size_t size) override; - void Free() override; - bool Freeze() override; - -private: - MappedMemory(const MappedMemory&); - MappedMemory& operator=(const MappedMemory&); - - bool is_kv_; - void *ptr_; - size_t size_; -}; - -bool MappedMemory::Allocate(size_t size, size_t align, bool zero) -{ - assert(!this->Allocated()); - assert(0 < size); - assert(0 < align && 0 == (align & (align - 1))); -#if defined(_WIN32) || defined(_WIN64) - ptr_ = (void*)VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); -#else - ptr_ = is_kv_ ? - mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0) : - mmap(nullptr, size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); -#endif // _WIN32 || _WIN64 - if (nullptr == ptr_) { - return false; - } - assert(0 == ((uintptr_t)ptr_) % align); - if (HSA_STATUS_SUCCESS != HSA::hsa_memory_register(ptr_, size)) { -#if defined(_WIN32) || defined(_WIN64) - VirtualFree(ptr_, size, MEM_DECOMMIT); - VirtualFree(ptr_, 0, MEM_RELEASE); -#else - munmap(ptr_, size); -#endif // _WIN32 || _WIN64 - ptr_ = nullptr; - return false; - } - if (zero) { - memset(ptr_, 0x0, size); - } - size_ = size; - return true; -} - -bool MappedMemory::Copy(size_t offset, const void *src, size_t size) -{ - assert(this->Allocated()); - assert(nullptr != src); - assert(0 < size); - memcpy(this->Address(offset), src, size); - return true; -} - -void MappedMemory::Free() -{ - assert(this->Allocated()); - HSA::hsa_memory_deregister(ptr_, size_); -#if defined(_WIN32) || defined(_WIN64) - VirtualFree(ptr_, size_, MEM_DECOMMIT); - VirtualFree(ptr_, 0, MEM_RELEASE); -#else - munmap(ptr_, size_); -#endif // _WIN32 || _WIN64 - ptr_ = nullptr; - size_ = 0; -} - -bool MappedMemory::Freeze() -{ - assert(this->Allocated()); - return true; -} - -class RegionMemory final: public SegmentMemory { -public: - static hsa_region_t AgentLocal(hsa_agent_t agent); - static hsa_region_t System(); - - RegionMemory(hsa_region_t region): SegmentMemory(), region_(region), ptr_(nullptr), host_ptr_(nullptr), size_(0) {} - ~RegionMemory() {} - - void* Address(size_t offset = 0) const override - { assert(this->Allocated()); return (char*)ptr_ + offset; } - void* HostAddress(size_t offset = 0) const override - { assert(this->Allocated()); return (char*)host_ptr_ + offset; } - bool Allocated() const override - { return nullptr != ptr_; } - - bool Allocate(size_t size, size_t align, bool zero) override; - bool Copy(size_t offset, const void *src, size_t size) override; - void Free() override; - bool Freeze() override; - -private: - RegionMemory(const RegionMemory&); - RegionMemory& operator=(const RegionMemory&); - - hsa_region_t region_; - void *ptr_; - void *host_ptr_; - size_t size_; -}; - -hsa_region_t RegionMemory::AgentLocal(hsa_agent_t agent) -{ - hsa_region_t invalid_region; invalid_region.handle = 0; - amd::GpuAgent *amd_agent = (amd::GpuAgent*)core::Agent::Convert(agent); - if (nullptr == amd_agent) { - return invalid_region; - } - auto agent_local_region = std::find_if(amd_agent->regions().begin(), amd_agent->regions().end(), IsLocalRegion); - return agent_local_region == amd_agent->regions().end() ? - invalid_region : core::MemoryRegion::Convert(*agent_local_region); -} - -hsa_region_t RegionMemory::System() { - const core::MemoryRegion* default_system_region = - core::Runtime::runtime_singleton_->system_regions_fine()[0]; - - assert(default_system_region != NULL); - - return core::MemoryRegion::Convert(default_system_region); -} - -bool RegionMemory::Allocate(size_t size, size_t align, bool zero) -{ - assert(!this->Allocated()); - assert(0 < size); - assert(0 < align && 0 == (align & (align - 1))); - if (HSA_STATUS_SUCCESS != HSA::hsa_memory_allocate(region_, size, &ptr_)) { - ptr_ = nullptr; - return false; - } - assert(0 == ((uintptr_t)ptr_) % align); - if (HSA_STATUS_SUCCESS != HSA::hsa_memory_allocate(RegionMemory::System(), size, &host_ptr_)) { - HSA::hsa_memory_free(ptr_); - ptr_ = nullptr; - host_ptr_ = nullptr; - return false; - } - if (zero) { - memset(host_ptr_, 0x0, size); - } - size_ = size; - return true; -} - -bool RegionMemory::Copy(size_t offset, const void *src, size_t size) -{ - assert(this->Allocated() && nullptr != host_ptr_); - assert(nullptr != src); - assert(0 < size); - memcpy((char*)host_ptr_ + offset, src, size); - return true; -} - -void RegionMemory::Free() -{ - assert(this->Allocated()); - HSA::hsa_memory_free(ptr_); - if (nullptr != host_ptr_) { - HSA::hsa_memory_free(host_ptr_); - } - ptr_ = nullptr; - host_ptr_ = nullptr; - size_ = 0; -} - -bool RegionMemory::Freeze() { - assert(this->Allocated() && nullptr != host_ptr_); - - core::Agent* agent = reinterpret_cast( - core::MemoryRegion::Convert(region_))->owner(); - if (agent != NULL && agent->device_type() == core::Agent::kAmdGpuDevice) { - if (HSA_STATUS_SUCCESS != agent->DmaCopy(ptr_, host_ptr_, size_)) { - return false; - } - } else { - memcpy(ptr_, host_ptr_, size_); - } - - return true; -} - -} // namespace anonymous - -namespace amd { - -hsa_isa_t LoaderContext::IsaFromName(const char *name) { - assert(name); - - hsa_status_t hsa_status = HSA_STATUS_SUCCESS; - hsa_isa_t isa_handle; - isa_handle.handle = 0; - - hsa_status = HSA::hsa_isa_from_name(name, &isa_handle); - if (HSA_STATUS_SUCCESS != hsa_status) { - isa_handle.handle = 0; - return isa_handle; - } - - return isa_handle; -} - -bool LoaderContext::IsaSupportedByAgent(hsa_agent_t agent, - hsa_isa_t code_object_isa) { - assert(agent.handle); - - hsa_status_t hsa_status = HSA_STATUS_SUCCESS; - hsa_isa_t agent_isa; - agent_isa.handle = 0; - - hsa_status = HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa); - if (HSA_STATUS_SUCCESS != hsa_status) { - return false; - } - - bool result = false; - - hsa_status = HSA::hsa_isa_compatible(code_object_isa, agent_isa, &result); - if (HSA_STATUS_SUCCESS != hsa_status) { - return false; - } - - return result; -} - -void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, - hsa_agent_t agent, - size_t size, - size_t align, - bool zero) -{ - assert(0 < size); - assert(0 < align && 0 == (align & (align - 1))); - hsa_profile_t agent_profile; - if (HSA_STATUS_SUCCESS != HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile)) { - return nullptr; - } - SegmentMemory *mem = nullptr; - switch (segment) { - case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT: - case AMDGPU_HSA_SEGMENT_READONLY_AGENT: - switch (agent_profile) { - case HSA_PROFILE_BASE: - mem = new (std::nothrow) RegionMemory(RegionMemory::AgentLocal(agent)); - break; - case HSA_PROFILE_FULL: - mem = new (std::nothrow) RegionMemory(RegionMemory::System()); - break; - default: - assert(false); - } - break; - case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM: - switch (agent_profile) { - case HSA_PROFILE_BASE: - mem = new (std::nothrow) RegionMemory(RegionMemory::System()); - break; - case HSA_PROFILE_FULL: - mem = new (std::nothrow) MallocedMemory(); - break; - default: - assert(false); - } - break; - case AMDGPU_HSA_SEGMENT_CODE_AGENT: - switch (agent_profile) { - case HSA_PROFILE_BASE: - mem = new (std::nothrow) RegionMemory(IsDebuggerRegistered() ? - RegionMemory::System() : - RegionMemory::AgentLocal(agent)); - break; - case HSA_PROFILE_FULL: - mem = new (std::nothrow) MappedMemory(((GpuAgentInt*)core::Agent::Convert(agent))->is_kv_device()); - break; - default: - assert(false); - } - break; - default: - assert(false); - } - if (nullptr == mem) { - return nullptr; - } - mem->Allocate(size, align, zero); - return mem; -} - -bool LoaderContext::SegmentCopy(amdgpu_hsa_elf_segment_t segment, // not used. - hsa_agent_t agent, // not used. - void* dst, - size_t offset, - const void* src, - size_t size) -{ - assert(nullptr != dst); - return ((SegmentMemory*)dst)->Copy(offset, src, size); -} - -void LoaderContext::SegmentFree(amdgpu_hsa_elf_segment_t segment, // not used. - hsa_agent_t agent, // not used. - void* seg, - size_t size) // not used. -{ - assert(nullptr != seg); - SegmentMemory *mem = (SegmentMemory*)seg; - mem->Free(); - delete mem; - mem = nullptr; -} - -void* LoaderContext::SegmentAddress(amdgpu_hsa_elf_segment_t segment, // not used. - hsa_agent_t agent, // not used. - void* seg, - size_t offset) -{ - assert(nullptr != seg); - return ((SegmentMemory*)seg)->Address(offset); -} - -void* LoaderContext::SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, // not used. - hsa_agent_t agent, // not used. - void* seg, - size_t offset) -{ - assert(nullptr != seg); - return ((SegmentMemory*)seg)->HostAddress(offset); -} - -bool LoaderContext::SegmentFreeze(amdgpu_hsa_elf_segment_t segment, // not used. - hsa_agent_t agent, // not used. - void* seg, - size_t size) // not used. -{ - assert(nullptr != seg); - return ((SegmentMemory*)seg)->Freeze(); -} - -bool LoaderContext::ImageExtensionSupported() { - hsa_status_t hsa_status = HSA_STATUS_SUCCESS; - bool result = false; - - hsa_status = - HSA::hsa_system_extension_supported(HSA_EXTENSION_IMAGES, 1, 0, &result); - if (HSA_STATUS_SUCCESS != hsa_status) { - return false; - } - - return result; -} - -hsa_status_t LoaderContext::ImageCreate( - hsa_agent_t agent, hsa_access_permission_t image_permission, - const hsa_ext_image_descriptor_t *image_descriptor, const void *image_data, - hsa_ext_image_t *image_handle) { - assert(agent.handle); - assert(image_descriptor); - assert(image_data); - assert(image_handle); - - assert(ImageExtensionSupported()); - - return hsa_ext_image_create(agent, image_descriptor, image_data, - image_permission, image_handle); -} - -hsa_status_t LoaderContext::ImageDestroy(hsa_agent_t agent, - hsa_ext_image_t image_handle) { - assert(agent.handle); - assert(image_handle.handle); - - assert(ImageExtensionSupported()); - - return hsa_ext_image_destroy(agent, image_handle); -} - -hsa_status_t LoaderContext::SamplerCreate( - hsa_agent_t agent, const hsa_ext_sampler_descriptor_t *sampler_descriptor, - hsa_ext_sampler_t *sampler_handle) { - assert(agent.handle); - assert(sampler_descriptor); - assert(sampler_handle); - - assert(ImageExtensionSupported()); - - return hsa_ext_sampler_create(agent, sampler_descriptor, sampler_handle); -} - -hsa_status_t LoaderContext::SamplerDestroy(hsa_agent_t agent, - hsa_ext_sampler_t sampler_handle) { - assert(agent.handle); - assert(sampler_handle.handle); - - assert(ImageExtensionSupported()); - - return hsa_ext_sampler_destroy(agent, sampler_handle); -} - -} // namespace amd diff --git a/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp deleted file mode 100644 index b3b4179247..0000000000 --- a/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ /dev/null @@ -1,555 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/amd_memory_region.h" - -#include - -#include "core/inc/runtime.h" -#include "core/inc/amd_cpu_agent.h" -#include "core/inc/amd_gpu_agent.h" -#include "core/util/utils.h" - -namespace amd { -void* MemoryRegion::AllocateKfdMemory(const HsaMemFlags& flag, - HSAuint32 node_id, size_t size) { - void* ret = NULL; - const HSAKMT_STATUS status = hsaKmtAllocMemory(node_id, size, flag, &ret); - return (status == HSAKMT_STATUS_SUCCESS) ? ret : NULL; -} - -void MemoryRegion::FreeKfdMemory(void* ptr, size_t size) { - if (ptr == NULL || size == 0) { - return; - } - - HSAKMT_STATUS status = hsaKmtFreeMemory(ptr, size); - assert(status == HSAKMT_STATUS_SUCCESS); -} - -bool MemoryRegion::RegisterMemory(void* ptr, size_t size, size_t num_nodes, - const uint32_t* nodes) { - assert(ptr != NULL); - assert(size != 0); - assert(num_nodes != 0); - assert(nodes != NULL); - - const HSAKMT_STATUS status = hsaKmtRegisterMemoryToNodes( - ptr, size, num_nodes, const_cast(nodes)); - return (status == HSAKMT_STATUS_SUCCESS); -} - -void MemoryRegion::DeregisterMemory(void* ptr) { hsaKmtDeregisterMemory(ptr); } - -bool MemoryRegion::MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, - void* ptr, size_t size, - uint64_t* alternate_va, - HsaMemMapFlags map_flag) { - assert(num_node > 0); - assert(nodes != NULL); - - *alternate_va = 0; - const HSAKMT_STATUS status = - hsaKmtMapMemoryToGPUNodes(ptr, size, alternate_va, map_flag, num_node, - const_cast(nodes)); - - return (status == HSAKMT_STATUS_SUCCESS); -} - -void MemoryRegion::MakeKfdMemoryUnresident(void* ptr) { - hsaKmtUnmapMemoryToGPU(ptr); -} - -MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, - core::Agent* owner, - const HsaMemoryProperties& mem_props) - : core::MemoryRegion(fine_grain, full_profile, owner), - mem_props_(mem_props), - max_single_alloc_size_(0), - virtual_size_(0) { - virtual_size_ = GetPhysicalSize(); - - mem_flag_.Value = 0; - map_flag_.Value = 0; - - static const HSAuint64 kGpuVmSize = (1ULL << 40); - - if (IsLocalMemory()) { - mem_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB; - mem_flag_.ui32.NoSubstitute = 1; - mem_flag_.ui32.HostAccess = - (mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE) ? 0 : 1; - mem_flag_.ui32.NonPaged = 1; - - map_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB; - - virtual_size_ = kGpuVmSize; - } else if (IsSystem()) { - mem_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB; - mem_flag_.ui32.NoSubstitute = 1; - mem_flag_.ui32.HostAccess = 1; - mem_flag_.ui32.CachePolicy = HSA_CACHING_CACHED; - - map_flag_.ui32.HostAccess = 1; - map_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB; - - virtual_size_ = - (full_profile) ? os::GetUserModeVirtualMemorySize() : kGpuVmSize; - } - - max_single_alloc_size_ = - AlignDown(static_cast(GetPhysicalSize()), kPageSize_); - - mem_flag_.ui32.CoarseGrain = (fine_grain) ? 0 : 1; - - assert(GetVirtualSize() != 0); - assert(GetPhysicalSize() <= GetVirtualSize()); - assert(IsMultipleOf(max_single_alloc_size_, kPageSize_)); -} - -MemoryRegion::~MemoryRegion() {} - -hsa_status_t MemoryRegion::Allocate(size_t size, void** address) const { - return Allocate(false, size, address); -} - -hsa_status_t MemoryRegion::Allocate(bool restrict_access, size_t size, - void** address) const { - if (address == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - if (!IsSystem() && !IsLocalMemory()) { - return HSA_STATUS_ERROR_INVALID_ALLOCATION; - } - - if (size > max_single_alloc_size_) { - return HSA_STATUS_ERROR_INVALID_ALLOCATION; - } - - size = AlignUp(size, kPageSize_); - - *address = AllocateKfdMemory(mem_flag_, owner()->node_id(), size); - - if (*address != NULL) { - // Commit the memory. - // For system memory, on non-restricted allocation, map it to all GPUs. On - // restricted allocation, only CPU is allowed to access by default, so - // no need to map - // For local memory, only map it to the owning GPU. Mapping to other GPU, - // if the access is allowed, is performed on AllowAccess. - HsaMemMapFlags map_flag = map_flag_; - size_t map_node_count = 1; - const uint32_t owner_node_id = owner()->node_id(); - const uint32_t* map_node_id = &owner_node_id; - - if (IsSystem()) { - if (!restrict_access) { - // Map to all GPU agents. - map_node_count = core::Runtime::runtime_singleton_->gpu_ids().size(); - - if (map_node_count == 0) { - // No need to pin since no GPU in the platform. - return HSA_STATUS_SUCCESS; - } - - map_node_id = &core::Runtime::runtime_singleton_->gpu_ids()[0]; - } else { - // No need to pin it for CPU exclusive access. - return HSA_STATUS_SUCCESS; - } - } - - uint64_t alternate_va = 0; - const bool is_resident = MakeKfdMemoryResident( - map_node_count, map_node_id, *address, size, &alternate_va, map_flag); - - const bool require_pinning = - (!full_profile() || IsLocalMemory() || IsScratch()); - - if (require_pinning && !is_resident) { - FreeKfdMemory(*address, size); - *address = NULL; - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - return HSA_STATUS_SUCCESS; - } - - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; -} - -hsa_status_t MemoryRegion::Free(void* address, size_t size) const { - MakeKfdMemoryUnresident(address); - - FreeKfdMemory(address, size); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t MemoryRegion::GetInfo(hsa_region_info_t attribute, - void* value) const { - switch (attribute) { - case HSA_REGION_INFO_SEGMENT: - switch (mem_props_.HeapType) { - case HSA_HEAPTYPE_SYSTEM: - case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: - case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC: - *((hsa_region_segment_t*)value) = HSA_REGION_SEGMENT_GLOBAL; - break; - case HSA_HEAPTYPE_GPU_LDS: - *((hsa_region_segment_t*)value) = HSA_REGION_SEGMENT_GROUP; - break; - default: - assert(false && "Memory region should only be global, group"); - break; - } - break; - case HSA_REGION_INFO_GLOBAL_FLAGS: - switch (mem_props_.HeapType) { - case HSA_HEAPTYPE_SYSTEM: - *((uint32_t*)value) = fine_grain() - ? (HSA_REGION_GLOBAL_FLAG_KERNARG | - HSA_REGION_GLOBAL_FLAG_FINE_GRAINED) - : HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED; - break; - case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: - case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC: - *((uint32_t*)value) = HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED; - break; - default: - *((uint32_t*)value) = 0; - break; - } - break; - case HSA_REGION_INFO_SIZE: - switch (mem_props_.HeapType) { - case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: - case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC: - *((size_t*)value) = static_cast(GetPhysicalSize()); - break; - default: - *((size_t*)value) = static_cast( - (full_profile()) ? GetVirtualSize() : GetPhysicalSize()); - break; - } - break; - case HSA_REGION_INFO_ALLOC_MAX_SIZE: - switch (mem_props_.HeapType) { - case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: - case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC: - case HSA_HEAPTYPE_SYSTEM: - *((size_t*)value) = max_single_alloc_size_; - break; - default: - *((size_t*)value) = 0; - } - break; - case HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED: - switch (mem_props_.HeapType) { - case HSA_HEAPTYPE_SYSTEM: - case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: - case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC: - *((bool*)value) = true; - break; - default: - *((bool*)value) = false; - break; - } - break; - case HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE: - switch (mem_props_.HeapType) { - case HSA_HEAPTYPE_SYSTEM: - case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: - case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC: - *((size_t*)value) = kPageSize_; - break; - default: - *((size_t*)value) = 0; - break; - } - break; - case HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT: - switch (mem_props_.HeapType) { - case HSA_HEAPTYPE_SYSTEM: - case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: - case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC: - *((size_t*)value) = kPageSize_; - break; - default: - *((size_t*)value) = 0; - break; - } - break; - default: - switch ((hsa_amd_region_info_t)attribute) { - case HSA_AMD_REGION_INFO_HOST_ACCESSIBLE: - *((bool*)value) = - (mem_props_.HeapType == HSA_HEAPTYPE_SYSTEM) ? true : false; - break; - case HSA_AMD_REGION_INFO_BASE: - *((void**)value) = reinterpret_cast(GetBaseAddress()); - break; - case HSA_AMD_REGION_INFO_BUS_WIDTH: - *((uint32_t*)value) = BusWidth(); - break; - case HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY: - *((uint32_t*)value) = MaxMemCloc(); - break; - default: - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - break; - } - break; - } - return HSA_STATUS_SUCCESS; -} - -hsa_status_t MemoryRegion::GetPoolInfo(hsa_amd_memory_pool_info_t attribute, - void* value) const { - switch (attribute) { - case HSA_AMD_MEMORY_POOL_INFO_SEGMENT: - case HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS: - case HSA_AMD_MEMORY_POOL_INFO_SIZE: - case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED: - case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE: - case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT: - return GetInfo(static_cast(attribute), value); - break; - case HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: - *((bool*)value) = IsSystem() ? true : false; - break; - default: - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t MemoryRegion::GetAgentPoolInfo( - const core::Agent& agent, hsa_amd_agent_memory_pool_info_t attribute, - void* value) const { - const uint32_t node_id_from = agent.node_id(); - const uint32_t node_id_to = owner()->node_id(); - - const core::Runtime::LinkInfo link_info = - core::Runtime::runtime_singleton_->GetLinkInfo(node_id_from, node_id_to); - - switch (attribute) { - case HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS: - /** - * --------------------------------------------------- - * | |CPU |GPU (owner)|GPU (peer) | - * --------------------------------------------------- - * |system memory |allowed |disallowed |disallowed | - * --------------------------------------------------- - * |fb private |never |allowed |never | - * --------------------------------------------------- - * |fb public |disallowed |allowed |disallowed | - * --------------------------------------------------- - * |others |never |allowed |never | - * --------------------------------------------------- - */ - *((hsa_amd_memory_pool_access_t*)value) = - (((IsSystem()) && - (agent.device_type() == core::Agent::kAmdCpuDevice)) || - (agent.node_id() == owner()->node_id())) - ? HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT - : (IsSystem() || (IsPublic() && link_info.num_hop > 0)) - ? HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT - : HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED; - break; - case HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS: - *((uint32_t*)value) = link_info.num_hop; - case HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO: - memset(value, 0, sizeof(hsa_amd_memory_pool_link_info_t)); - if (link_info.num_hop > 0) { - memcpy(value, &link_info.info, sizeof(hsa_amd_memory_pool_link_info_t)); - } - break; - default: - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - return HSA_STATUS_SUCCESS; -} - -hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents, - const hsa_agent_t* agents, - const void* ptr, size_t size) const { - if (num_agents == 0 || agents == NULL || ptr == NULL || size == 0) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - if (!IsSystem() && !IsLocalMemory()) { - return HSA_STATUS_ERROR; - } - - bool cpu_in_list = false; - - std::vector whitelist_nodes; - for (uint32_t i = 0; i < num_agents; ++i) { - const core::Agent* agent = core::Agent::Convert(agents[i]); - if (agent == NULL || !agent->IsValid()) { - return HSA_STATUS_ERROR_INVALID_AGENT; - } - - if (agent->device_type() == core::Agent::kAmdGpuDevice) { - whitelist_nodes.push_back(agent->node_id()); - } else { - cpu_in_list = true; - } - } - - if (whitelist_nodes.size() == 0 && IsSystem()) { - assert(cpu_in_list); - // This is a system region and only CPU agents in the whitelist. - // No need to call map. - return HSA_STATUS_SUCCESS; - } - - // If this is a local memory region, the owning gpu always needs to be in - // the whitelist. - if (IsPublic() && - std::find(whitelist_nodes.begin(), whitelist_nodes.end(), - owner()->node_id()) == whitelist_nodes.end()) { - whitelist_nodes.push_back(owner()->node_id()); - } - - HsaMemMapFlags map_flag = map_flag_; - map_flag.ui32.HostAccess |= (cpu_in_list) ? 1 : 0; - - uint64_t alternate_va = 0; - return (amd::MemoryRegion::MakeKfdMemoryResident( - whitelist_nodes.size(), &whitelist_nodes[0], - const_cast(ptr), size, &alternate_va, map_flag)) - ? HSA_STATUS_SUCCESS - : HSA_STATUS_ERROR_OUT_OF_RESOURCES; -} - -hsa_status_t MemoryRegion::CanMigrate(const MemoryRegion& dst, - bool& result) const { - // TODO(bwicakso): not implemented yet. - result = false; - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; -} - -hsa_status_t MemoryRegion::Migrate(uint32_t flag, const void* ptr) const { - // TODO(bwicakso): not implemented yet. - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; -} - -hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents, - void* host_ptr, size_t size, - void** agent_ptr) const { - if (!IsSystem()) { - return HSA_STATUS_ERROR; - } - - if (full_profile()) { - // For APU, any host pointer is always accessible by the gpu. - *agent_ptr = host_ptr; - return HSA_STATUS_SUCCESS; - } - - std::vector whitelist_nodes; - if (num_agents == 0 || agents == NULL) { - // Map to all GPU agents. - whitelist_nodes = core::Runtime::runtime_singleton_->gpu_ids(); - } else { - for (int i = 0; i < num_agents; ++i) { - core::Agent* agent = core::Agent::Convert(agents[i]); - if (agent == NULL || !agent->IsValid()) { - return HSA_STATUS_ERROR_INVALID_AGENT; - } - - if (agent->device_type() == core::Agent::kAmdGpuDevice) { - whitelist_nodes.push_back(agent->node_id()); - } - } - } - - if (whitelist_nodes.size() == 0) { - // No GPU agents in the whitelist. So no need to register and map since the - // platform only has CPUs. - *agent_ptr = host_ptr; - return HSA_STATUS_SUCCESS; - } - - // Call kernel driver to register and pin the memory. - if (RegisterMemory(host_ptr, size, whitelist_nodes.size(), - &whitelist_nodes[0])) { - uint64_t alternate_va = 0; - if (MakeKfdMemoryResident(whitelist_nodes.size(), &whitelist_nodes[0], - host_ptr, size, &alternate_va, map_flag_)) { - assert(alternate_va != 0); - *agent_ptr = reinterpret_cast(alternate_va); - return HSA_STATUS_SUCCESS; - } - amd::MemoryRegion::DeregisterMemory(host_ptr); - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - return HSA_STATUS_ERROR; -} - -hsa_status_t MemoryRegion::Unlock(void* host_ptr) const { - if (!IsSystem()) { - return HSA_STATUS_ERROR; - } - - if (full_profile()) { - return HSA_STATUS_SUCCESS; - } - - MakeKfdMemoryUnresident(host_ptr); - DeregisterMemory(host_ptr); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t MemoryRegion::AssignAgent(void* ptr, size_t size, - const core::Agent& agent, - hsa_access_permission_t access) const { - return HSA_STATUS_SUCCESS; -} - -} // namespace diff --git a/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/runtime/hsa-runtime/core/runtime/amd_topology.cpp deleted file mode 100644 index 2e071f05ca..0000000000 --- a/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ /dev/null @@ -1,210 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/amd_topology.h" - -#include -#include -#include - -#include "hsakmt.h" - -#include "core/inc/runtime.h" -#include "core/inc/amd_cpu_agent.h" -#include "core/inc/amd_gpu_agent.h" -#include "core/inc/amd_memory_region.h" -#include "core/util/utils.h" - -namespace amd { -// Minimum acceptable KFD version numbers -static const uint kKfdVersionMajor = 0; -static const uint kKfdVersionMinor = 99; - -CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { - if (node_prop.NumCPUCores == 0) { - return NULL; - } - - CpuAgent* cpu = new CpuAgent(node_id, node_prop); - core::Runtime::runtime_singleton_->RegisterAgent(cpu); - - return cpu; -} - -GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { - if (node_prop.NumFComputeCores == 0) { - return NULL; - } - - GpuAgent* gpu = new GpuAgent(node_id, node_prop); - core::Runtime::runtime_singleton_->RegisterAgent(gpu); - - if (HSA_STATUS_SUCCESS != gpu->InitDma()) { - assert(false && "Fail init blit"); - delete gpu; - gpu = NULL; - } - - return gpu; -} - -void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) { - // Register connectivity links for this agent to the runtime. - if (num_link == 0) { - return; - } - - std::vector links(num_link); - if (HSAKMT_STATUS_SUCCESS != - hsaKmtGetNodeIoLinkProperties(node_id, num_link, &links[0])) { - return; - } - - for (HsaIoLinkProperties io_link : links) { - // Populate link info with thunk property. - hsa_amd_memory_pool_link_info_t link_info = {0}; - - if (io_link.Flags.ui32.Override == 1) { - if (io_link.Flags.ui32.NoPeerToPeerDMA == 1) { - // Ignore this link since peer to peer is not allowed. - continue; - } - link_info.atomic_support_32bit = (io_link.Flags.ui32.NoAtomics32bit == 0); - link_info.atomic_support_64bit = (io_link.Flags.ui32.NoAtomics64bit == 0); - link_info.coherent_support = (io_link.Flags.ui32.NonCoherent == 0); - } else { - // TODO(bwicakso): decipher HSA_IOLINKTYPE to fill out the atomic - // and coherent information. - } - - switch (io_link.IoLinkType) { - case HSA_IOLINKTYPE_HYPERTRANSPORT: - link_info.link_type = HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT; - break; - case HSA_IOLINKTYPE_PCIEXPRESS: - link_info.link_type = HSA_AMD_LINK_INFO_TYPE_PCIE; - break; - case HSA_IOLINK_TYPE_QPI_1_1: - link_info.link_type = HSA_AMD_LINK_INFO_TYPE_QPI; - break; - case HSA_IOLINK_TYPE_INFINIBAND: - link_info.link_type = HSA_AMD_LINK_INFO_TYPE_INFINBAND; - break; - default: - break; - } - - link_info.max_bandwidth = io_link.MaximumBandwidth; - link_info.max_latency = io_link.MaximumLatency; - link_info.min_bandwidth = io_link.MinimumBandwidth; - link_info.min_latency = io_link.MinimumLatency; - - core::Runtime::runtime_singleton_->RegisterLinkInfo( - io_link.NodeFrom, io_link.NodeTo, io_link.Weight, link_info); - } -} - -/// @brief Calls Kfd thunk to get the snapshot of the topology of the system, -/// which includes associations between, node, devices, memory and caches. -void BuildTopology() { - HsaVersionInfo info; - if (hsaKmtGetVersion(&info) != HSAKMT_STATUS_SUCCESS) { - return; - } - - if (info.KernelInterfaceMajorVersion == kKfdVersionMajor && - info.KernelInterfaceMinorVersion < kKfdVersionMinor) { - return; - } - - // Disable KFD event support when using open source KFD - if (info.KernelInterfaceMajorVersion == 1 && - info.KernelInterfaceMinorVersion == 0) { - core::g_use_interrupt_wait = false; - } - - HsaSystemProperties props; - hsaKmtReleaseSystemProperties(); - - if (hsaKmtAcquireSystemProperties(&props) != HSAKMT_STATUS_SUCCESS) { - return; - } - - core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes); - - // Discover agents on every node in the platform. - for (HSAuint32 node_id = 0; node_id < props.NumNodes; node_id++) { - HsaNodeProperties node_prop = {0}; - if (hsaKmtGetNodeProperties(node_id, &node_prop) != HSAKMT_STATUS_SUCCESS) { - continue; - } - - const CpuAgent* cpu = DiscoverCpu(node_id, node_prop); - const GpuAgent* gpu = DiscoverGpu(node_id, node_prop); - - assert(!(cpu == NULL && gpu == NULL)); - - RegisterLinkInfo(node_id, node_prop.NumIOLinks); - } -} - -bool Load() { - // Open connection to kernel driver. - if (hsaKmtOpenKFD() != HSAKMT_STATUS_SUCCESS) { - return false; - } - - // Build topology table. - BuildTopology(); - - return true; -} - -bool Unload() { - hsaKmtReleaseSystemProperties(); - - // Close connection to kernel driver. - hsaKmtCloseKFD(); - - return true; -} -} // namespace amd diff --git a/runtime/hsa-runtime/core/runtime/default_signal.cpp b/runtime/hsa-runtime/core/runtime/default_signal.cpp deleted file mode 100644 index 9b81de360b..0000000000 --- a/runtime/hsa-runtime/core/runtime/default_signal.cpp +++ /dev/null @@ -1,275 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/default_signal.h" -#include "core/util/timer.h" - -namespace core { - -int DefaultSignal::rtti_id_ = 0; - -DefaultSignal::DefaultSignal(hsa_signal_value_t initial_value) - : Signal(initial_value) { - signal_.kind = AMD_SIGNAL_KIND_USER; - signal_.event_mailbox_ptr = NULL; - HSA::hsa_memory_register(this, sizeof(DefaultSignal)); -} - -DefaultSignal::~DefaultSignal() { - invalid_ = true; - while (InUse()) - ; - HSA::hsa_memory_deregister(this, sizeof(DefaultSignal)); -} - -hsa_signal_value_t DefaultSignal::LoadRelaxed() { - return hsa_signal_value_t( - atomic::Load(&signal_.value, std::memory_order_relaxed)); -} - -hsa_signal_value_t DefaultSignal::LoadAcquire() { - return hsa_signal_value_t( - atomic::Load(&signal_.value, std::memory_order_acquire)); -} - -void DefaultSignal::StoreRelaxed(hsa_signal_value_t value) { - atomic::Store(&signal_.value, int64_t(value), std::memory_order_relaxed); -} - -void DefaultSignal::StoreRelease(hsa_signal_value_t value) { - atomic::Store(&signal_.value, int64_t(value), std::memory_order_release); -} - -hsa_signal_value_t DefaultSignal::WaitRelaxed(hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout, - hsa_wait_state_t wait_hint) { - atomic::Increment(&waiting_); - MAKE_SCOPE_GUARD([&]() { atomic::Decrement(&waiting_); }); - bool condition_met = false; - int64_t value; - - assert(!g_use_interrupt_wait && "Use of non-host signal in host signal wait API."); - - timer::fast_clock::time_point start_time, time; - start_time = timer::fast_clock::now(); - - uint64_t hsa_freq; - HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq); - const timer::fast_clock::duration fast_timeout = - timer::duration_from_seconds( - double(timeout) / double(hsa_freq)); - - while (true) { - if (invalid_) return 0; - - value = atomic::Load(&signal_.value, std::memory_order_relaxed); - - switch (condition) { - case HSA_SIGNAL_CONDITION_EQ: { - condition_met = (value == compare_value); - break; - } - case HSA_SIGNAL_CONDITION_NE: { - condition_met = (value != compare_value); - break; - } - case HSA_SIGNAL_CONDITION_GTE: { - condition_met = (value >= compare_value); - break; - } - case HSA_SIGNAL_CONDITION_LT: { - condition_met = (value < compare_value); - break; - } - default: - return 0; - } - if (condition_met) return hsa_signal_value_t(value); - - time = timer::fast_clock::now(); - if (time - start_time > fast_timeout) { - value = atomic::Load(&signal_.value, std::memory_order_relaxed); - return hsa_signal_value_t(value); - } - } -} - -hsa_signal_value_t DefaultSignal::WaitAcquire(hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout, - hsa_wait_state_t wait_hint) { - hsa_signal_value_t ret = - WaitRelaxed(condition, compare_value, timeout, wait_hint); - std::atomic_thread_fence(std::memory_order_acquire); - return ret; -} - -void DefaultSignal::AndRelaxed(hsa_signal_value_t value) { - atomic::And(&signal_.value, int64_t(value), std::memory_order_relaxed); -} - -void DefaultSignal::AndAcquire(hsa_signal_value_t value) { - atomic::And(&signal_.value, int64_t(value), std::memory_order_acquire); -} - -void DefaultSignal::AndRelease(hsa_signal_value_t value) { - atomic::And(&signal_.value, int64_t(value), std::memory_order_release); -} - -void DefaultSignal::AndAcqRel(hsa_signal_value_t value) { - atomic::And(&signal_.value, int64_t(value), std::memory_order_acq_rel); -} - -void DefaultSignal::OrRelaxed(hsa_signal_value_t value) { - atomic::Or(&signal_.value, int64_t(value), std::memory_order_relaxed); -} - -void DefaultSignal::OrAcquire(hsa_signal_value_t value) { - atomic::Or(&signal_.value, int64_t(value), std::memory_order_acquire); -} - -void DefaultSignal::OrRelease(hsa_signal_value_t value) { - atomic::Or(&signal_.value, int64_t(value), std::memory_order_release); -} - -void DefaultSignal::OrAcqRel(hsa_signal_value_t value) { - atomic::Or(&signal_.value, int64_t(value), std::memory_order_acq_rel); -} - -void DefaultSignal::XorRelaxed(hsa_signal_value_t value) { - atomic::Xor(&signal_.value, int64_t(value), std::memory_order_relaxed); -} - -void DefaultSignal::XorAcquire(hsa_signal_value_t value) { - atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acquire); -} - -void DefaultSignal::XorRelease(hsa_signal_value_t value) { - atomic::Xor(&signal_.value, int64_t(value), std::memory_order_release); -} - -void DefaultSignal::XorAcqRel(hsa_signal_value_t value) { - atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acq_rel); -} - -void DefaultSignal::AddRelaxed(hsa_signal_value_t value) { - atomic::Add(&signal_.value, int64_t(value), std::memory_order_relaxed); -} - -void DefaultSignal::AddAcquire(hsa_signal_value_t value) { - atomic::Add(&signal_.value, int64_t(value), std::memory_order_acquire); -} - -void DefaultSignal::AddRelease(hsa_signal_value_t value) { - atomic::Add(&signal_.value, int64_t(value), std::memory_order_release); -} - -void DefaultSignal::AddAcqRel(hsa_signal_value_t value) { - atomic::Add(&signal_.value, int64_t(value), std::memory_order_acq_rel); -} - -void DefaultSignal::SubRelaxed(hsa_signal_value_t value) { - atomic::Sub(&signal_.value, int64_t(value), std::memory_order_relaxed); -} - -void DefaultSignal::SubAcquire(hsa_signal_value_t value) { - atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acquire); -} - -void DefaultSignal::SubRelease(hsa_signal_value_t value) { - atomic::Sub(&signal_.value, int64_t(value), std::memory_order_release); -} - -void DefaultSignal::SubAcqRel(hsa_signal_value_t value) { - atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acq_rel); -} - -hsa_signal_value_t DefaultSignal::ExchRelaxed(hsa_signal_value_t value) { - return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value), - std::memory_order_relaxed)); -} - -hsa_signal_value_t DefaultSignal::ExchAcquire(hsa_signal_value_t value) { - return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value), - std::memory_order_acquire)); -} - -hsa_signal_value_t DefaultSignal::ExchRelease(hsa_signal_value_t value) { - return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value), - std::memory_order_release)); -} - -hsa_signal_value_t DefaultSignal::ExchAcqRel(hsa_signal_value_t value) { - return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value), - std::memory_order_acq_rel)); -} - -hsa_signal_value_t DefaultSignal::CasRelaxed(hsa_signal_value_t expected, - hsa_signal_value_t value) { - return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value), - int64_t(expected), - std::memory_order_relaxed)); -} - -hsa_signal_value_t DefaultSignal::CasAcquire(hsa_signal_value_t expected, - hsa_signal_value_t value) { - return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value), - int64_t(expected), - std::memory_order_acquire)); -} - -hsa_signal_value_t DefaultSignal::CasRelease(hsa_signal_value_t expected, - hsa_signal_value_t value) { - return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value), - int64_t(expected), - std::memory_order_release)); -} - -hsa_signal_value_t DefaultSignal::CasAcqRel(hsa_signal_value_t expected, - hsa_signal_value_t value) { - return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value), - int64_t(expected), - std::memory_order_acq_rel)); -} - -} // namespace core diff --git a/runtime/hsa-runtime/core/runtime/host_queue.cpp b/runtime/hsa-runtime/core/runtime/host_queue.cpp deleted file mode 100644 index 3803b6508c..0000000000 --- a/runtime/hsa-runtime/core/runtime/host_queue.cpp +++ /dev/null @@ -1,99 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/host_queue.h" - -#include "core/inc/runtime.h" -#include "core/util/utils.h" - -namespace core { -HostQueue::HostQueue(hsa_region_t region, uint32_t ring_size, - hsa_queue_type_t type, uint32_t features, - hsa_signal_t doorbell_signal) - : Queue(), - size_(ring_size), - active_(false) { - if (!Shared::IsSharedObjectAllocationValid()) { - return; - } - - HSA::hsa_memory_register(this, sizeof(HostQueue)); - - const size_t queue_buffer_size = size_ * sizeof(AqlPacket); - if (HSA_STATUS_SUCCESS != - HSA::hsa_memory_allocate(region, queue_buffer_size, &ring_)) { - return; - } - - assert(IsMultipleOf(ring_, kRingAlignment)); - assert(ring_ != NULL); - - amd_queue_.hsa_queue.base_address = ring_; - amd_queue_.hsa_queue.size = size_; - amd_queue_.hsa_queue.doorbell_signal = doorbell_signal; - amd_queue_.hsa_queue.id = Runtime::runtime_singleton_->GetQueueId(); - amd_queue_.hsa_queue.type = type; - amd_queue_.hsa_queue.features = features; -#ifdef HSA_LARGE_MODEL - AMD_HSA_BITS_SET( - amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64, 1); -#else - AMD_HSA_BITS_SET( - amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64, 0); -#endif - amd_queue_.write_dispatch_id = amd_queue_.read_dispatch_id = 0; - AMD_HSA_BITS_SET( - amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 0); - - active_ = true; -} - -HostQueue::~HostQueue() { - if (!Shared::IsSharedObjectAllocationValid()) { - return; - } - - HSA::hsa_memory_free(ring_); - HSA::hsa_memory_deregister(this, sizeof(HostQueue)); -} - -} // namespace core diff --git a/runtime/hsa-runtime/core/runtime/hsa.cpp b/runtime/hsa-runtime/core/runtime/hsa.cpp deleted file mode 100644 index 7683eabb08..0000000000 --- a/runtime/hsa-runtime/core/runtime/hsa.cpp +++ /dev/null @@ -1,1710 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// HSA C to C++ interface implementation. -// This file does argument checking and conversion to C++. -#include -#include - -#include "core/inc/runtime.h" -#include "core/inc/agent.h" -#include "core/inc/host_queue.h" -#include "core/inc/isa.h" -#include "core/inc/memory_region.h" -#include "core/inc/queue.h" -#include "core/inc/signal.h" -#include "core/inc/default_signal.h" -#include "core/inc/interrupt_signal.h" -#include "core/inc/amd_load_map.h" -#include "core/inc/amd_loader_context.hpp" - -using namespace amd::hsa::code; - -template -struct ValidityError; -template <> -struct ValidityError { - enum { kValue = HSA_STATUS_ERROR_INVALID_SIGNAL }; -}; -template <> -struct ValidityError { - enum { kValue = HSA_STATUS_ERROR_INVALID_AGENT }; -}; -template <> -struct ValidityError { - enum { kValue = HSA_STATUS_ERROR_INVALID_REGION }; -}; -template <> -struct ValidityError { - enum { kValue = HSA_STATUS_ERROR_INVALID_QUEUE }; -}; -template <> -struct ValidityError { - enum { kValue = HSA_STATUS_ERROR_INVALID_ISA }; -}; -template -struct ValidityError { - enum { kValue = ValidityError::kValue }; -}; - -#define IS_BAD_PTR(ptr) \ - do { \ - if ((ptr) == NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT; \ - } while (false) -#define IS_BAD_PROFILE(profile) \ - do { \ - if (profile != HSA_PROFILE_BASE && \ - profile != HSA_PROFILE_FULL) { \ - return HSA_STATUS_ERROR_INVALID_ARGUMENT; \ - } \ - } while (false) -#define IS_VALID(ptr) \ - do { \ - if (((ptr) == NULL) || !((ptr)->IsValid())) \ - return hsa_status_t(ValidityError::kValue); \ - } while (false) -#define CHECK_ALLOC(ptr) \ - do { \ - if ((ptr) == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; \ - } while (false) -#define IS_OPEN() \ - do { \ - if (!core::Runtime::runtime_singleton_->IsOpen()) \ - return HSA_STATUS_ERROR_NOT_INITIALIZED; \ - } while (false) - -template -static __forceinline bool IsValid(T* ptr) { - return (ptr == NULL) ? NULL : ptr->IsValid(); -} - -//----------------------------------------------------------------------------- -// Basic Checks -//----------------------------------------------------------------------------- -static_assert(sizeof(hsa_barrier_and_packet_t) == - sizeof(hsa_kernel_dispatch_packet_t), - "AQL packet definitions have wrong sizes!"); -static_assert(sizeof(hsa_barrier_and_packet_t) == - sizeof(hsa_agent_dispatch_packet_t), - "AQL packet definitions have wrong sizes!"); -static_assert(sizeof(hsa_barrier_and_packet_t) == 64, - "AQL packet definitions have wrong sizes!"); -static_assert(sizeof(hsa_barrier_and_packet_t) == - sizeof(hsa_barrier_or_packet_t), - "AQL packet definitions have wrong sizes!"); -#ifdef HSA_LARGE_MODEL -static_assert(sizeof(void*) == 8, "HSA_LARGE_MODEL is set incorrectly!"); -#else -static_assert(sizeof(void*) == 4, "HSA_LARGE_MODEL is set incorrectly!"); -#endif - -namespace HSA { - -//---------------------------------------------------------------------------// -// Init/Shutdown routines -//---------------------------------------------------------------------------// -hsa_status_t hsa_init() { - if (core::Runtime::runtime_singleton_->Acquire()) return HSA_STATUS_SUCCESS; - return HSA_STATUS_ERROR_REFCOUNT_OVERFLOW; -} - -hsa_status_t hsa_shut_down() { - IS_OPEN(); - if (core::Runtime::runtime_singleton_->Release()) return HSA_STATUS_SUCCESS; - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} - -//---------------------------------------------------------------------------// -// System -//---------------------------------------------------------------------------// -hsa_status_t - hsa_system_get_info(hsa_system_info_t attribute, void* value) { - IS_OPEN(); - IS_BAD_PTR(value); - return core::Runtime::runtime_singleton_->GetSystemInfo(attribute, value); -} - -hsa_status_t - hsa_system_extension_supported(uint16_t extension, uint16_t version_major, - uint16_t version_minor, bool* result) { - IS_OPEN(); - - if ((extension > HSA_EXTENSION_AMD_PROFILER && - extension != AMD_EXTENSION_LOAD_MAP) || result == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - *result = false; - - uint16_t system_version_major = 0; - hsa_status_t status = core::Runtime::runtime_singleton_->GetSystemInfo( - HSA_SYSTEM_INFO_VERSION_MAJOR, &system_version_major); - assert(status == HSA_STATUS_SUCCESS); - - if (version_major <= system_version_major) { - uint16_t system_version_minor = 0; - status = core::Runtime::runtime_singleton_->GetSystemInfo( - HSA_SYSTEM_INFO_VERSION_MINOR, &system_version_minor); - assert(status == HSA_STATUS_SUCCESS); - - if (version_minor <= system_version_minor) { - *result = true; - } - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t - hsa_system_get_extension_table(uint16_t extension, uint16_t version_major, - uint16_t version_minor, void* table) { - if (table == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - IS_OPEN(); - - bool supported = false; - hsa_status_t status = hsa_system_extension_supported( - extension, version_major, version_minor, &supported); - - if (HSA_STATUS_SUCCESS != status) { - return status; - } - - if (supported) { - ExtTable& runtime_ext_table = - core::Runtime::runtime_singleton_->extensions_.table; - - if (extension == HSA_EXTENSION_IMAGES) { - // Currently there is only version 1.00. - hsa_ext_images_1_00_pfn_t* ext_table = - reinterpret_cast(table); - ext_table->hsa_ext_image_clear = hsa_ext_image_clear; - ext_table->hsa_ext_image_copy = hsa_ext_image_copy; - ext_table->hsa_ext_image_create = hsa_ext_image_create; - ext_table->hsa_ext_image_data_get_info = hsa_ext_image_data_get_info; - ext_table->hsa_ext_image_destroy = hsa_ext_image_destroy; - ext_table->hsa_ext_image_export = hsa_ext_image_export; - ext_table->hsa_ext_image_get_capability = hsa_ext_image_get_capability; - ext_table->hsa_ext_image_import = hsa_ext_image_import; - ext_table->hsa_ext_sampler_create = hsa_ext_sampler_create; - ext_table->hsa_ext_sampler_destroy = hsa_ext_sampler_destroy; - - return HSA_STATUS_SUCCESS; - } else if (extension == HSA_EXTENSION_FINALIZER) { - // Currently there is only version 1.00. - hsa_ext_finalizer_1_00_pfn_s* ext_table = - reinterpret_cast(table); - ext_table->hsa_ext_program_add_module = hsa_ext_program_add_module; - ext_table->hsa_ext_program_create = hsa_ext_program_create; - ext_table->hsa_ext_program_destroy = hsa_ext_program_destroy; - ext_table->hsa_ext_program_finalize = hsa_ext_program_finalize; - ext_table->hsa_ext_program_get_info = hsa_ext_program_get_info; - ext_table->hsa_ext_program_iterate_modules = - hsa_ext_program_iterate_modules; - - return HSA_STATUS_SUCCESS; - } else if (extension == AMD_EXTENSION_LOAD_MAP) { - // Currently there is only version 1.00. - amd_load_map_1_00_pfn_t* amd_table = - reinterpret_cast(table); - amd_table->amd_executable_load_code_object = amd_executable_load_code_object; - amd_table->amd_iterate_executables = amd_iterate_executables; - amd_table->amd_executable_iterate_loaded_code_objects = amd_executable_iterate_loaded_code_objects; - amd_table->amd_loaded_code_object_get_info = amd_loaded_code_object_get_info; - amd_table->amd_loaded_code_object_iterate_loaded_segments = amd_loaded_code_object_iterate_loaded_segments; - amd_table->amd_loaded_segment_get_info = amd_loaded_segment_get_info; - } else { - // TODO: other extensions are not yet implemented. - return HSA_STATUS_ERROR; - } - } - - return HSA_STATUS_SUCCESS; -} - -//---------------------------------------------------------------------------// -// Agent -//---------------------------------------------------------------------------// -hsa_status_t - hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void* data), - void* data) { - IS_OPEN(); - IS_BAD_PTR(callback); - return core::Runtime::runtime_singleton_->IterateAgent(callback, data); -} - -hsa_status_t hsa_agent_get_info(hsa_agent_t agent_handle, - hsa_agent_info_t attribute, - void* value) { - IS_OPEN(); - IS_BAD_PTR(value); - const core::Agent* agent = core::Agent::Convert(agent_handle); - IS_VALID(agent); - return agent->GetInfo(attribute, value); -} - -hsa_status_t hsa_agent_get_exception_policies(hsa_agent_t agent_handle, - hsa_profile_t profile, - uint16_t* mask) { - IS_OPEN(); - IS_BAD_PTR(mask); - IS_BAD_PROFILE(profile); - const core::Agent* agent = core::Agent::Convert(agent_handle); - IS_VALID(agent); - - // TODO: Fix me when exception policies are supported. - *mask = 0; - return HSA_STATUS_SUCCESS; -} - -hsa_status_t - hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent_handle, - uint16_t version_major, - uint16_t version_minor, bool* result) { - IS_OPEN(); - - if ((result == NULL) || (extension > HSA_EXTENSION_AMD_PROFILER)) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - *result = false; - - const core::Agent* agent = core::Agent::Convert(agent_handle); - IS_VALID(agent); - - if (agent->device_type() == core::Agent::kAmdGpuDevice) { - uint16_t agent_version_major = 0; - hsa_status_t status = - agent->GetInfo(HSA_AGENT_INFO_VERSION_MAJOR, &agent_version_major); - assert(status == HSA_STATUS_SUCCESS); - - if (version_major <= agent_version_major) { - uint16_t agent_version_minor = 0; - status = - agent->GetInfo(HSA_AGENT_INFO_VERSION_MINOR, &agent_version_minor); - assert(status == HSA_STATUS_SUCCESS); - - if (version_minor <= agent_version_minor) { - *result = true; - } - } - } - - return HSA_STATUS_SUCCESS; -} - -/// @brief Api to create a user mode queue. -/// -/// @param agent Hsa Agent which will execute Aql commands -/// -/// @param size Size of Queue in terms of Aql packet size -/// -/// @param type of Queue Single Writer or Multiple Writer -/// -/// @param callback Callback function to register in case Quee -/// encounters an error -/// -/// @param service_queue Pointer to a service queue -/// -/// @param queue Output parameter updated with a pointer to the -/// queue being created -/// -/// @return hsa_status -hsa_status_t hsa_queue_create( - hsa_agent_t agent_handle, uint32_t size, hsa_queue_type_t type, - void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), - void* data, uint32_t private_segment_size, uint32_t group_segment_size, - hsa_queue_t** queue) { - IS_OPEN(); - - if ((queue == NULL) || (size == 0) || (!IsPowerOfTwo(size)) || - (type < HSA_QUEUE_TYPE_MULTI) || (type > HSA_QUEUE_TYPE_SINGLE)) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - core::Agent* agent = core::Agent::Convert(agent_handle); - IS_VALID(agent); - - hsa_queue_type_t agent_queue_type = HSA_QUEUE_TYPE_MULTI; - hsa_status_t status = - agent->GetInfo(HSA_AGENT_INFO_QUEUE_TYPE, &agent_queue_type); - assert(HSA_STATUS_SUCCESS == status); - - if (agent_queue_type == HSA_QUEUE_TYPE_SINGLE && - type != HSA_QUEUE_TYPE_SINGLE) { - return HSA_STATUS_ERROR_INVALID_QUEUE_CREATION; - } - - // TODO: private_segment_size and group_segment_size. - core::Queue* cmd_queue = NULL; - status = agent->QueueCreate(size, type, callback, data, private_segment_size, - group_segment_size, &cmd_queue); - if (cmd_queue != NULL) { - *queue = core::Queue::Convert(cmd_queue); - if (*queue == NULL) { - delete cmd_queue; - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - } else { - *queue = NULL; - } - - return status; -} - -hsa_status_t hsa_soft_queue_create(hsa_region_t region, uint32_t size, - hsa_queue_type_t type, uint32_t features, - hsa_signal_t doorbell_signal, - hsa_queue_t** queue) { - IS_OPEN(); - - if ((queue == NULL) || (region.handle == 0) || - (doorbell_signal.handle == 0) || (size == 0) || (!IsPowerOfTwo(size)) || - (type < HSA_QUEUE_TYPE_MULTI) || (type > HSA_QUEUE_TYPE_SINGLE) || - (features == 0)) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region); - IS_VALID(mem_region); - - const core::Signal* signal = core::Signal::Convert(doorbell_signal); - IS_VALID(signal); - - core::HostQueue* host_queue = - new core::HostQueue(region, size, type, features, doorbell_signal); - - if (!host_queue->active()) { - delete host_queue; - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - *queue = core::Queue::Convert(host_queue); - - return HSA_STATUS_SUCCESS; -} - -/// @brief Api to destroy a user mode queue -/// -/// @param queue Pointer to the queue being destroyed -/// -/// @return hsa_status -hsa_status_t hsa_queue_destroy(hsa_queue_t* queue) { - IS_OPEN(); - IS_BAD_PTR(queue); - core::Queue* cmd_queue = core::Queue::Convert(queue); - IS_VALID(cmd_queue); - delete cmd_queue; - return HSA_STATUS_SUCCESS; -} - -/// @brief Api to inactivate a user mode queue -/// -/// @param queue Pointer to the queue being inactivated -/// -/// @return hsa_status -hsa_status_t hsa_queue_inactivate(hsa_queue_t* queue) { - IS_OPEN(); - IS_BAD_PTR(queue); - core::Queue* cmd_queue = core::Queue::Convert(queue); - IS_VALID(cmd_queue); - cmd_queue->Inactivate(); - return HSA_STATUS_SUCCESS; -} - -/// @brief Api to read the Read Index of Queue using Acquire semantics -/// -/// @param queue Pointer to the queue whose read index is being read -/// -/// @return uint64_t Value of Read index -uint64_t hsa_queue_load_read_index_acquire(const hsa_queue_t* queue) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->LoadReadIndexAcquire(); -} - -/// @brief Api to read the Read Index of Queue using Relaxed semantics -/// -/// @param queue Pointer to the queue whose read index is being read -/// -/// @return uint64_t Value of Read index -uint64_t hsa_queue_load_read_index_relaxed(const hsa_queue_t* queue) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->LoadReadIndexRelaxed(); -} - -/// @brief Api to read the Write Index of Queue using Acquire semantics -/// -/// @param queue Pointer to the queue whose write index is being read -/// -/// @return uint64_t Value of Write index -uint64_t hsa_queue_load_write_index_acquire(const hsa_queue_t* queue) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->LoadWriteIndexAcquire(); -} - -/// @brief Api to read the Write Index of Queue using Relaxed semantics -/// -/// @param queue Pointer to the queue whose write index is being read -/// -/// @return uint64_t Value of Write index -uint64_t hsa_queue_load_write_index_relaxed(const hsa_queue_t* queue) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->LoadWriteIndexAcquire(); -} - -/// @brief Api to store the Read Index of Queue using Relaxed semantics -/// -/// @param queue Pointer to the queue whose read index is being updated -/// -/// @param value Value of new read index -void hsa_queue_store_read_index_relaxed(const hsa_queue_t* queue, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - cmd_queue->StoreReadIndexRelaxed(value); -} - -/// @brief Api to store the Read Index of Queue using Release semantics -/// -/// @param queue Pointer to the queue whose read index is being updated -/// -/// @param value Value of new read index -void hsa_queue_store_read_index_release(const hsa_queue_t* queue, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - cmd_queue->StoreReadIndexRelease(value); -} - -/// @brief Api to store the Write Index of Queue using Relaxed semantics -/// -/// @param queue Pointer to the queue whose write index is being updated -/// -/// @param value Value of new write index -void hsa_queue_store_write_index_relaxed(const hsa_queue_t* queue, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - cmd_queue->StoreWriteIndexRelaxed(value); -} - -/// @brief Api to store the Write Index of Queue using Release semantics -/// -/// @param queue Pointer to the queue whose write index is being updated -/// -/// @param value Value of new write index -void hsa_queue_store_write_index_release(const hsa_queue_t* queue, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - cmd_queue->StoreWriteIndexRelease(value); -} - -/// @brief Api to compare and swap the Write Index of Queue using Acquire and -/// Release semantics -/// -/// @param queue Pointer to the queue whose write index is being updated -/// -/// @param expected Current value of write index -/// -/// @param value Value of new write index -/// -/// @return uint64_t Value of write index before the update -uint64_t hsa_queue_cas_write_index_acq_rel(const hsa_queue_t* queue, - uint64_t expected, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->CasWriteIndexAcqRel(expected, value); -} - -/// @brief Api to compare and swap the Write Index of Queue using Acquire -/// Semantics -/// -/// @param queue Pointer to the queue whose write index is being updated -/// -/// @param expected Current value of write index -/// -/// @param value Value of new write index -/// -/// @return uint64_t Value of write index before the update -uint64_t hsa_queue_cas_write_index_acquire(const hsa_queue_t* queue, - uint64_t expected, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->CasWriteIndexAcquire(expected, value); -} - -/// @brief Api to compare and swap the Write Index of Queue using Relaxed -/// Semantics -/// -/// @param queue Pointer to the queue whose write index is being updated -/// -/// @param expected Current value of write index -/// -/// @param value Value of new write index -/// -/// @return uint64_t Value of write index before the update -uint64_t hsa_queue_cas_write_index_relaxed(const hsa_queue_t* queue, - uint64_t expected, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->CasWriteIndexRelaxed(expected, value); -} - -/// @brief Api to compare and swap the Write Index of Queue using Release -/// Semantics -/// -/// @param queue Pointer to the queue whose write index is being updated -/// -/// @param expected Current value of write index -/// -/// @param value Value of new write index -/// -/// @return uint64_t Value of write index before the update -uint64_t hsa_queue_cas_write_index_release(const hsa_queue_t* queue, - uint64_t expected, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->CasWriteIndexRelease(expected, value); -} - -/// @brief Api to Add to the Write Index of Queue using Acquire and Release -/// Semantics -/// -/// @param queue Pointer to the queue whose write index is being updated -/// -/// @param value Value to add to write index -/// -/// @return uint64_t Value of write index before the update -uint64_t hsa_queue_add_write_index_acq_rel(const hsa_queue_t* queue, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->AddWriteIndexAcqRel(value); -} - -/// @brief Api to Add to the Write Index of Queue using Acquire Semantics -/// -/// @param queue Pointer to the queue whose write index is being updated -/// -/// @param value Value to add to write index -/// -/// @return uint64_t Value of write index before the update -uint64_t hsa_queue_add_write_index_acquire(const hsa_queue_t* queue, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->AddWriteIndexAcquire(value); -} - -/// @brief Api to Add to the Write Index of Queue using Relaxed Semantics -/// -/// @param queue Pointer to the queue whose write index is being updated -/// -/// @param value Value to add to write index -/// -/// @return uint64_t Value of write index before the update -uint64_t hsa_queue_add_write_index_relaxed(const hsa_queue_t* queue, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->AddWriteIndexRelaxed(value); -} - -/// @brief Api to Add to the Write Index of Queue using Release Semantics -/// -/// @param queue Pointer to the queue whose write index is being updated -/// -/// @param value Value to add to write index -/// -/// @return uint64_t Value of write index before the update -uint64_t hsa_queue_add_write_index_release(const hsa_queue_t* queue, - uint64_t value) { - core::Queue* cmd_queue = core::Queue::Convert(queue); - assert(IsValid(cmd_queue)); - return cmd_queue->AddWriteIndexRelease(value); -} - -//----------------------------------------------------------------------------- -// Memory -//----------------------------------------------------------------------------- -hsa_status_t hsa_agent_iterate_regions( - hsa_agent_t agent_handle, - hsa_status_t (*callback)(hsa_region_t region, void* data), void* data) { - IS_OPEN(); - IS_BAD_PTR(callback); - const core::Agent* agent = core::Agent::Convert(agent_handle); - IS_VALID(agent); - return agent->IterateRegion(callback, data); -} - -hsa_status_t hsa_region_get_info(hsa_region_t region, - hsa_region_info_t attribute, - void* value) { - IS_OPEN(); - IS_BAD_PTR(value); - - const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region); - IS_VALID(mem_region); - - return mem_region->GetInfo(attribute, value); -} - -hsa_status_t hsa_memory_register(void* address, size_t size) { - IS_OPEN(); - - if (size == 0 && address != NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t hsa_memory_deregister(void* address, size_t size) { - IS_OPEN(); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t - hsa_memory_allocate(hsa_region_t region, size_t size, void** ptr) { - IS_OPEN(); - - if (size == 0 || ptr == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region); - IS_VALID(mem_region); - - return core::Runtime::runtime_singleton_->AllocateMemory(mem_region, size, - ptr); -} - -hsa_status_t hsa_memory_free(void* ptr) { - IS_OPEN(); - - if (ptr == NULL) { - return HSA_STATUS_SUCCESS; - } - - return core::Runtime::runtime_singleton_->FreeMemory(ptr); -} - -hsa_status_t hsa_memory_assign_agent(void* ptr, - hsa_agent_t agent_handle, - hsa_access_permission_t access) { - IS_OPEN(); - - if ((ptr == NULL) || (access < HSA_ACCESS_PERMISSION_RO) || - (access > HSA_ACCESS_PERMISSION_RW)) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - const core::Agent* agent = core::Agent::Convert(agent_handle); - IS_VALID(agent); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t hsa_memory_copy(void* dst, const void* src, size_t size) { - IS_OPEN(); - - if (dst == NULL || src == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - if (size == 0) { - return HSA_STATUS_SUCCESS; - } - - return core::Runtime::runtime_singleton_->CopyMemory(dst, src, size); -} - -//----------------------------------------------------------------------------- -// Signals -//----------------------------------------------------------------------------- - -typedef struct { - bool operator()(const hsa_agent_t& lhs, const hsa_agent_t& rhs) const { - return lhs.handle < rhs.handle; - } -} AgentHandleCompare; - -hsa_status_t - hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers, - const hsa_agent_t* consumers, hsa_signal_t* hsa_signal) { - IS_OPEN(); - IS_BAD_PTR(hsa_signal); - - core::Signal* ret; - - bool useshost = true; - - if (num_consumers > 0) { - IS_BAD_PTR(consumers); - - // Check for duplicates in consumers. - std::set consumer_set = - std::set(consumers, - consumers + num_consumers); - if (consumer_set.size() != num_consumers) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - useshost = - (consumer_set.find( - core::Runtime::runtime_singleton_->host_agent()->public_handle()) != - consumer_set.end()); - } - - if (core::g_use_interrupt_wait && useshost) { - ret = new core::InterruptSignal(initial_value); - } else { - ret = new core::DefaultSignal(initial_value); - } - CHECK_ALLOC(ret); - - *hsa_signal = core::Signal::Convert(ret); - - if (hsa_signal->handle == 0) { - delete ret; - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t hsa_signal_destroy(hsa_signal_t hsa_signal) { - IS_OPEN(); - - if (hsa_signal.handle == 0) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - core::Signal* signal = core::Signal::Convert(hsa_signal); - IS_VALID(signal); - delete signal; - return HSA_STATUS_SUCCESS; -} - -hsa_signal_value_t hsa_signal_load_relaxed(hsa_signal_t hsa_signal) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->LoadRelaxed(); -} - -hsa_signal_value_t hsa_signal_load_acquire(hsa_signal_t hsa_signal) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->LoadAcquire(); -} - -void hsa_signal_store_relaxed(hsa_signal_t hsa_signal, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->StoreRelaxed(value); -} - -void hsa_signal_store_release(hsa_signal_t hsa_signal, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->StoreRelease(value); -} - -hsa_signal_value_t - hsa_signal_wait_relaxed(hsa_signal_t hsa_signal, - hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout_hint, - hsa_wait_state_t wait_state_hint) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->WaitRelaxed(condition, compare_value, timeout_hint, - wait_state_hint); -} - -hsa_signal_value_t - hsa_signal_wait_acquire(hsa_signal_t hsa_signal, - hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout_hint, - hsa_wait_state_t wait_state_hint) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->WaitAcquire(condition, compare_value, timeout_hint, - wait_state_hint); -} - -void - hsa_signal_and_relaxed(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->AndRelaxed(value); -} - -void - hsa_signal_and_acquire(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->AndAcquire(value); -} - -void - hsa_signal_and_release(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->AndRelease(value); -} - -void - hsa_signal_and_acq_rel(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->AndAcqRel(value); -} - -void - hsa_signal_or_relaxed(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->OrRelaxed(value); -} - -void - hsa_signal_or_acquire(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->OrAcquire(value); -} - -void - hsa_signal_or_release(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->OrRelease(value); -} - -void - hsa_signal_or_acq_rel(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->OrAcqRel(value); -} - -void - hsa_signal_xor_relaxed(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->XorRelaxed(value); -} - -void - hsa_signal_xor_acquire(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->XorAcquire(value); -} - -void - hsa_signal_xor_release(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->XorRelease(value); -} - -void - hsa_signal_xor_acq_rel(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->XorAcqRel(value); -} - -void - hsa_signal_add_relaxed(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->AddRelaxed(value); -} - -void - hsa_signal_add_acquire(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->AddAcquire(value); -} - -void - hsa_signal_add_release(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->AddRelease(value); -} - -void - hsa_signal_add_acq_rel(hsa_signal_t hsa_signal, hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->AddAcqRel(value); -} - -void hsa_signal_subtract_relaxed(hsa_signal_t hsa_signal, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->SubRelaxed(value); -} - -void hsa_signal_subtract_acquire(hsa_signal_t hsa_signal, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->SubAcquire(value); -} - -void hsa_signal_subtract_release(hsa_signal_t hsa_signal, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->SubRelease(value); -} - -void hsa_signal_subtract_acq_rel(hsa_signal_t hsa_signal, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - signal->SubAcqRel(value); -} - -hsa_signal_value_t - hsa_signal_exchange_relaxed(hsa_signal_t hsa_signal, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->ExchRelaxed(value); -} - -hsa_signal_value_t - hsa_signal_exchange_acquire(hsa_signal_t hsa_signal, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->ExchAcquire(value); -} - -hsa_signal_value_t - hsa_signal_exchange_release(hsa_signal_t hsa_signal, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->ExchRelease(value); -} - -hsa_signal_value_t - hsa_signal_exchange_acq_rel(hsa_signal_t hsa_signal, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->ExchAcqRel(value); -} - -hsa_signal_value_t hsa_signal_cas_relaxed(hsa_signal_t hsa_signal, - hsa_signal_value_t expected, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->CasRelaxed(expected, value); -} - -hsa_signal_value_t hsa_signal_cas_acquire(hsa_signal_t hsa_signal, - hsa_signal_value_t expected, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->CasAcquire(expected, value); -} - -hsa_signal_value_t hsa_signal_cas_release(hsa_signal_t hsa_signal, - hsa_signal_value_t expected, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->CasRelease(expected, value); -} - -hsa_signal_value_t hsa_signal_cas_acq_rel(hsa_signal_t hsa_signal, - hsa_signal_value_t expected, - hsa_signal_value_t value) { - core::Signal* signal = core::Signal::Convert(hsa_signal); - assert(IsValid(signal)); - return signal->CasAcqRel(expected, value); -} - -//----------------------------------------------------------------------------- -// Isa -//----------------------------------------------------------------------------- - -hsa_status_t hsa_isa_from_name(const char* name, hsa_isa_t* isa) { - IS_OPEN(); - IS_BAD_PTR(name); - IS_BAD_PTR(isa); - - const core::Isa* isa_object = core::IsaRegistry::GetIsa(name); - if (!isa_object) { - return HSA_STATUS_ERROR_INVALID_ISA_NAME; - } - - *isa = core::Isa::Handle(isa_object); - return HSA_STATUS_SUCCESS; -} - -hsa_status_t hsa_isa_get_info(hsa_isa_t isa, hsa_isa_info_t attribute, - uint32_t index, void* value) { - IS_OPEN(); - IS_BAD_PTR(value); - - if (index != 0) { - return HSA_STATUS_ERROR_INVALID_INDEX; - } - - const core::Isa* isa_object = core::Isa::Object(isa); - IS_VALID(isa_object); - - return isa_object->GetInfo(attribute, value) ? - HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_INVALID_ARGUMENT; -} - -hsa_status_t hsa_isa_compatible(hsa_isa_t code_object_isa, - hsa_isa_t agent_isa, bool* result) { - IS_OPEN(); - IS_BAD_PTR(result); - - const core::Isa* code_object_isa_object = core::Isa::Object(code_object_isa); - IS_VALID(code_object_isa_object); - - const core::Isa* agent_isa_object = core::Isa::Object(agent_isa); - IS_VALID(agent_isa_object); - - *result = code_object_isa_object->IsCompatible(agent_isa_object); - return HSA_STATUS_SUCCESS; -} - -//----------------------------------------------------------------------------- -// Code object. -//----------------------------------------------------------------------------- - -namespace { - -hsa_status_t IsCodeObjectAllocRegion(hsa_region_t region, void *data) -{ - assert(nullptr != data); - assert(0 == ((hsa_region_t*)data)->handle); - - hsa_status_t status = HSA_STATUS_SUCCESS; - bool alloc_allowed; - if (HSA_STATUS_SUCCESS != (status = HSA::hsa_region_get_info(region, HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, &alloc_allowed))) { - return status; - } - if (true == alloc_allowed) { - ((hsa_region_t*)data)->handle = region.handle; - return HSA_STATUS_INFO_BREAK; - } - return HSA_STATUS_SUCCESS; -} - -hsa_status_t FindCodeObjectAllocRegionFromAgent(hsa_agent_t agent, void *data) -{ - assert(nullptr != data); - assert(0 == ((hsa_region_t*)data)->handle); - - hsa_status_t status = HSA_STATUS_SUCCESS; - hsa_device_type_t agent_type; - if (HSA_STATUS_SUCCESS != (status = HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_type))) { - return status; - } - if (HSA_DEVICE_TYPE_CPU == agent_type) { - return HSA::hsa_agent_iterate_regions(agent, IsCodeObjectAllocRegion, data); - } - return HSA_STATUS_SUCCESS; -} - -hsa_status_t FindCodeObjectAllocRegionFromSystem(void *data) -{ - assert(nullptr != data); - - ((hsa_region_t*)data)->handle = 0; - return HSA::hsa_iterate_agents(FindCodeObjectAllocRegionFromAgent, data); -} - -} // namespace anonymous - -hsa_status_t hsa_code_object_serialize( - hsa_code_object_t code_object, - hsa_status_t (*alloc_callback)(size_t size, hsa_callback_data_t data, - void** address), - hsa_callback_data_t callback_data, const char* options, - void** serialized_code_object, size_t* serialized_code_object_size) { - IS_OPEN(); - IS_BAD_PTR(alloc_callback); - IS_BAD_PTR(serialized_code_object); - IS_BAD_PTR(serialized_code_object_size); - - AmdHsaCode* code = core::Runtime::runtime_singleton_->code_manager()->FromHandle(code_object); - if (!code) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } - size_t elfmemsz = code->ElfSize(); - const char* elfmemrd = code->ElfData(); - - hsa_status_t hsc = alloc_callback(elfmemsz, - callback_data, - serialized_code_object); - if (HSA_STATUS_SUCCESS != hsc) { - return hsc; - } - - memcpy(*serialized_code_object, elfmemrd, elfmemsz); - *serialized_code_object_size = elfmemsz; - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t - hsa_code_object_deserialize(void* serialized_code_object, - size_t serialized_code_object_size, - const char* options, - hsa_code_object_t* code_object) { - IS_OPEN(); - IS_BAD_PTR(serialized_code_object); - IS_BAD_PTR(code_object); - - if (!serialized_code_object_size) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - hsa_status_t status = HSA_STATUS_SUCCESS; - - // Find code object allocation region. - hsa_region_t code_object_alloc_region; - status = FindCodeObjectAllocRegionFromSystem(&code_object_alloc_region); - if (HSA_STATUS_SUCCESS != status && HSA_STATUS_INFO_BREAK != status) { - return status; - } - assert(0 != code_object_alloc_region.handle); - - // Allocate code object memory. - void *code_object_alloc_mem = nullptr; - status = HSA::hsa_memory_allocate(code_object_alloc_region, - serialized_code_object_size, - &code_object_alloc_mem); - if (HSA_STATUS_SUCCESS != status) { - return status; - } - assert(nullptr != code_object_alloc_mem); - - // Copy code object into allocated code object memory. - status = HSA::hsa_memory_copy(code_object_alloc_mem, - serialized_code_object, - serialized_code_object_size); - if (HSA_STATUS_SUCCESS != status) { - return status; - } - code_object->handle = (uint64_t) (uintptr_t) code_object_alloc_mem; - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t hsa_code_object_destroy(hsa_code_object_t code_object) { - IS_OPEN(); - - void *elfmemrd = reinterpret_cast(code_object.handle); - if (!elfmemrd) { - return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; - } - - if (!core::Runtime::runtime_singleton_->code_manager()->Destroy(code_object)) { - return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; - } - - HSA::hsa_memory_free(elfmemrd); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t hsa_code_object_get_info(hsa_code_object_t code_object, - hsa_code_object_info_t attribute, - void* value) { - IS_OPEN(); - IS_BAD_PTR(value); - - AmdHsaCode* code = core::Runtime::runtime_singleton_->code_manager()->FromHandle(code_object); - if (!code) { - return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; - } - switch (attribute) { - case HSA_CODE_OBJECT_INFO_ISA: { - // TODO: currently AmdHsaCode::GetInfo return string representation. - // Fix when compute capability is available in libamdhsacode. - char isa_name[64]; - hsa_status_t status = code->GetInfo(attribute, &isa_name); - if (status != HSA_STATUS_SUCCESS) { return status; } - if (HSA_STATUS_SUCCESS != HSA::hsa_isa_from_name(isa_name, (hsa_isa_t*)value)) { - return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; - } - return HSA_STATUS_SUCCESS; - } - default: - return code->GetInfo(attribute, value); - } -} - -hsa_status_t hsa_code_object_get_symbol(hsa_code_object_t code_object, - const char *symbol_name, - hsa_code_symbol_t *symbol) { - IS_OPEN(); - IS_BAD_PTR(symbol_name); - IS_BAD_PTR(symbol); - - AmdHsaCode* code = core::Runtime::runtime_singleton_->code_manager()->FromHandle(code_object); - if (!code) { - return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; - } - - // TODO(kzhuravl): module_name is NULL until spec is changed, waiting for - // Mario. - return code->GetSymbol(NULL, symbol_name, symbol); -} - -hsa_status_t hsa_code_symbol_get_info(hsa_code_symbol_t code_symbol, - hsa_code_symbol_info_t attribute, - void* value) { - IS_OPEN(); - IS_BAD_PTR(value); - - Symbol* sym = Symbol::FromHandle(code_symbol); - return sym->GetInfo(attribute, value); -} - -hsa_status_t hsa_code_object_iterate_symbols( - hsa_code_object_t code_object, - hsa_status_t (*callback)(hsa_code_object_t code_object, - hsa_code_symbol_t symbol, void* data), - void* data) { - IS_OPEN(); - IS_BAD_PTR(callback); - - AmdHsaCode* code = core::Runtime::runtime_singleton_->code_manager()->FromHandle(code_object); - if (!code) { - return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; - } - - return code->IterateSymbols(code_object, callback, data); -} - -//----------------------------------------------------------------------------- -// Executable -//----------------------------------------------------------------------------- - -hsa_status_t - hsa_executable_create(hsa_profile_t profile, - hsa_executable_state_t executable_state, - const char* options, hsa_executable_t* executable) { - IS_OPEN(); - IS_BAD_PTR(executable); - - if (HSA_PROFILE_BASE != profile && HSA_PROFILE_FULL != profile) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - if (HSA_EXECUTABLE_STATE_FROZEN != executable_state && - HSA_EXECUTABLE_STATE_UNFROZEN != executable_state) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - amd::hsa::loader::Executable *exec = core::Runtime::runtime_singleton_->loader()->CreateExecutable( - profile, options); - if (!exec) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - // @todo(spec): why did we make it possible to create frozen executable? - if (HSA_EXECUTABLE_STATE_FROZEN == executable_state) { - exec->Freeze(NULL); - } - - *executable = amd::hsa::loader::Executable::Handle(exec); - return HSA_STATUS_SUCCESS; -} - -hsa_status_t hsa_executable_destroy(hsa_executable_t executable) { - IS_OPEN(); - - amd::hsa::loader::Executable *exec = amd::hsa::loader::Executable::Object(executable); - if (!exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - - core::Runtime::runtime_singleton_->loader()->DestroyExecutable(exec); - return HSA_STATUS_SUCCESS; -} - -hsa_status_t - hsa_executable_load_code_object(hsa_executable_t executable, - hsa_agent_t agent, - hsa_code_object_t code_object, - const char* options) { - amd_loaded_code_object_t loaded_code_object = {0}; - return amd_executable_load_code_object( - executable, agent, code_object, options, &loaded_code_object); -} - -hsa_status_t - hsa_executable_freeze(hsa_executable_t executable, const char* options) { - IS_OPEN(); - - amd::hsa::loader::Executable *exec = amd::hsa::loader::Executable::Object(executable); - if (!exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - - return exec->Freeze(options); -} - -hsa_status_t hsa_executable_get_info(hsa_executable_t executable, - hsa_executable_info_t attribute, - void* value) { - IS_OPEN(); - IS_BAD_PTR(value); - - amd::hsa::loader::Executable *exec = amd::hsa::loader::Executable::Object(executable); - if (!exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - - return exec->GetInfo(attribute, value); -} - -hsa_status_t - hsa_executable_global_variable_define(hsa_executable_t executable, - const char* variable_name, - void* address) { - IS_OPEN(); - IS_BAD_PTR(variable_name); - IS_BAD_PTR(address); - - amd::hsa::loader::Executable *exec = amd::hsa::loader::Executable::Object(executable); - if (!exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - - return exec->DefineProgramExternalVariable(variable_name, address); -} - -hsa_status_t - hsa_executable_agent_global_variable_define(hsa_executable_t executable, - hsa_agent_t agent, - const char* variable_name, - void* address) { - IS_OPEN(); - IS_BAD_PTR(variable_name); - IS_BAD_PTR(address); - - amd::hsa::loader::Executable *exec = amd::hsa::loader::Executable::Object(executable); - if (!exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - - return exec->DefineAgentExternalVariable( - variable_name, agent, HSA_VARIABLE_SEGMENT_GLOBAL, address); -} - -hsa_status_t - hsa_executable_readonly_variable_define(hsa_executable_t executable, - hsa_agent_t agent, - const char* variable_name, - void* address) { - IS_OPEN(); - IS_BAD_PTR(variable_name); - IS_BAD_PTR(address); - - amd::hsa::loader::Executable *exec = amd::hsa::loader::Executable::Object(executable); - if (!exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - - return exec->DefineAgentExternalVariable( - variable_name, agent, HSA_VARIABLE_SEGMENT_READONLY, address); -} - -hsa_status_t - hsa_executable_validate(hsa_executable_t executable, uint32_t* result) { - IS_OPEN(); - IS_BAD_PTR(result); - - amd::hsa::loader::Executable *exec = amd::hsa::loader::Executable::Object(executable); - if (!exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - - return exec->Validate(result); -} - -hsa_status_t - hsa_executable_get_symbol(hsa_executable_t executable, - const char* module_name, const char* symbol_name, - hsa_agent_t agent, int32_t call_convention, - hsa_executable_symbol_t* symbol) { - IS_OPEN(); - IS_BAD_PTR(symbol_name); - IS_BAD_PTR(symbol); - - amd::hsa::loader::Executable *exec = amd::hsa::loader::Executable::Object(executable); - if (!exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - - amd::hsa::loader::Symbol *sym = - exec->GetSymbol(module_name == NULL ? "" : module_name, symbol_name, agent, call_convention); - if (!sym) { - return HSA_STATUS_ERROR_INVALID_SYMBOL_NAME; - } - *symbol = amd::hsa::loader::Symbol::Handle(sym); - return HSA_STATUS_SUCCESS; -} - -hsa_status_t - hsa_executable_symbol_get_info(hsa_executable_symbol_t executable_symbol, - hsa_executable_symbol_info_t attribute, - void* value) { - IS_OPEN(); - IS_BAD_PTR(value); - - amd::hsa::loader::Symbol *sym = amd::hsa::loader::Symbol::Object(executable_symbol); - if (!sym) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - return sym->GetInfo(attribute, value) ? - HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_INVALID_ARGUMENT; -} - -hsa_status_t hsa_executable_iterate_symbols( - hsa_executable_t executable, - hsa_status_t (*callback)(hsa_executable_t executable, - hsa_executable_symbol_t symbol, void* data), - void* data) { - IS_OPEN(); - IS_BAD_PTR(callback); - - amd::hsa::loader::Executable *exec = amd::hsa::loader::Executable::Object(executable); - if (!exec) { - return HSA_STATUS_ERROR_INVALID_EXECUTABLE; - } - - return exec->IterateSymbols(callback, data); -} - -//----------------------------------------------------------------------------- -// Errors -//----------------------------------------------------------------------------- - -hsa_status_t - hsa_status_string(hsa_status_t status, const char** status_string) { - IS_OPEN(); - IS_BAD_PTR(status_string); - const size_t status_u = static_cast(status); - switch (status_u) { - case HSA_STATUS_SUCCESS: - *status_string = - "HSA_STATUS_SUCCESS: The function has been executed successfully."; - break; - case HSA_STATUS_INFO_BREAK: - *status_string = - "HSA_STATUS_INFO_BREAK: A traversal over a list of " - "elements has been interrupted by the application before " - "completing."; - break; - case HSA_STATUS_ERROR: - *status_string = "HSA_STATUS_ERROR: A generic error has occurred."; - break; - case HSA_STATUS_ERROR_INVALID_ARGUMENT: - *status_string = - "HSA_STATUS_ERROR_INVALID_ARGUMENT: One of the actual " - "arguments does not meet a precondition stated in the " - "documentation of the corresponding formal argument."; - break; - case HSA_STATUS_ERROR_INVALID_QUEUE_CREATION: - *status_string = - "HSA_STATUS_ERROR_INVALID_QUEUE_CREATION: The requested " - "queue creation is not valid."; - break; - case HSA_STATUS_ERROR_INVALID_ALLOCATION: - *status_string = - "HSA_STATUS_ERROR_INVALID_ALLOCATION: The requested " - "allocation is not valid."; - break; - case HSA_STATUS_ERROR_INVALID_AGENT: - *status_string = - "HSA_STATUS_ERROR_INVALID_AGENT: The agent is invalid."; - break; - case HSA_STATUS_ERROR_INVALID_REGION: - *status_string = - "HSA_STATUS_ERROR_INVALID_REGION: The memory region is invalid."; - break; - case HSA_STATUS_ERROR_INVALID_SIGNAL: - *status_string = - "HSA_STATUS_ERROR_INVALID_SIGNAL: The signal is invalid."; - break; - case HSA_STATUS_ERROR_INVALID_QUEUE: - *status_string = - "HSA_STATUS_ERROR_INVALID_QUEUE: The queue is invalid."; - break; - case HSA_STATUS_ERROR_OUT_OF_RESOURCES: - *status_string = - "HSA_STATUS_ERROR_OUT_OF_RESOURCES: The runtime failed to " - "allocate the necessary resources. This error may also " - "occur when the core runtime library needs to spawn " - "threads or create internal OS-specific events."; - break; - case HSA_STATUS_ERROR_INVALID_PACKET_FORMAT: - *status_string = - "HSA_STATUS_ERROR_INVALID_PACKET_FORMAT: The AQL packet " - "is malformed."; - break; - case HSA_STATUS_ERROR_RESOURCE_FREE: - *status_string = - "HSA_STATUS_ERROR_RESOURCE_FREE: An error has been " - "detected while releasing a resource."; - break; - case HSA_STATUS_ERROR_NOT_INITIALIZED: - *status_string = - "HSA_STATUS_ERROR_NOT_INITIALIZED: An API other than " - "hsa_init has been invoked while the reference count of " - "the HSA runtime is zero."; - break; - case HSA_STATUS_ERROR_REFCOUNT_OVERFLOW: - *status_string = - "HSA_STATUS_ERROR_REFCOUNT_OVERFLOW: The maximum " - "reference count for the object has been reached."; - break; - case HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS: - *status_string = - "HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS: The arguments passed to " - "a functions are not compatible."; - break; - case HSA_STATUS_ERROR_INVALID_INDEX: - *status_string = "The index is invalid."; - break; - case HSA_STATUS_ERROR_INVALID_ISA: - *status_string = "The instruction set architecture is invalid."; - break; - case HSA_STATUS_ERROR_INVALID_CODE_OBJECT: - *status_string = "The code object is invalid."; - break; - case HSA_STATUS_ERROR_INVALID_EXECUTABLE: - *status_string = "The executable is invalid."; - break; - case HSA_STATUS_ERROR_FROZEN_EXECUTABLE: - *status_string = "The executable is frozen."; - break; - case HSA_STATUS_ERROR_INVALID_SYMBOL_NAME: - *status_string = "There is no symbol with the given name."; - break; - case HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED: - *status_string = "The variable is already defined."; - break; - case HSA_STATUS_ERROR_VARIABLE_UNDEFINED: - *status_string = "The variable is undefined."; - break; - case HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED: - *status_string = - "HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED: Image " - "format is not supported."; - break; - case HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED: - *status_string = - "HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED: Image size " - "is not supported."; - break; - case HSA_EXT_STATUS_ERROR_INVALID_PROGRAM: - *status_string = - "HSA_EXT_STATUS_ERROR_INVALID_PROGRAM: Invalid program"; - break; - case HSA_EXT_STATUS_ERROR_INVALID_MODULE: - *status_string = "HSA_EXT_STATUS_ERROR_INVALID_MODULE: Invalid module"; - break; - case HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE: - *status_string = - "HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE: Incompatible module"; - break; - case HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED: - *status_string = - "HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED: Module already " - "included"; - break; - case HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH: - *status_string = - "HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH: Symbol mismatch"; - break; - case HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED: - *status_string = - "HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED: Finalization failed"; - break; - case HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH: - *status_string = - "HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH: Directive mismatch"; - break; - default: - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - return HSA_STATUS_SUCCESS; -} - -} // end of namespace HSA diff --git a/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp b/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp deleted file mode 100644 index 0eeab7448c..0000000000 --- a/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp +++ /dev/null @@ -1,191 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/hsa_api_trace_int.h" -#include "core/inc/runtime.h" -#include "core/inc/hsa_table_interface.h" - -namespace core { - -ApiTable hsa_api_table_; -ApiTable hsa_internal_api_table_; - -ApiTable::ApiTable() { - table.std_exts_ = NULL; - Reset(); -} - -void ApiTable::LinkExts(ExtTable* ptr) { - assert(ptr != NULL && "Invalid extension table linked."); - extension_backup = *ptr; - table.std_exts_ = ptr; -} - -void ApiTable::Reset() { - table.hsa_init_fn = HSA::hsa_init; - table.hsa_shut_down_fn = HSA::hsa_shut_down; - table.hsa_system_get_info_fn = HSA::hsa_system_get_info; - table.hsa_system_extension_supported_fn = HSA::hsa_system_extension_supported; - table.hsa_system_get_extension_table_fn = HSA::hsa_system_get_extension_table; - table.hsa_iterate_agents_fn = HSA::hsa_iterate_agents; - table.hsa_agent_get_info_fn = HSA::hsa_agent_get_info; - table.hsa_agent_get_exception_policies_fn = - HSA::hsa_agent_get_exception_policies; - table.hsa_agent_extension_supported_fn = HSA::hsa_agent_extension_supported; - table.hsa_queue_create_fn = HSA::hsa_queue_create; - table.hsa_soft_queue_create_fn = HSA::hsa_soft_queue_create; - table.hsa_queue_destroy_fn = HSA::hsa_queue_destroy; - table.hsa_queue_inactivate_fn = HSA::hsa_queue_inactivate; - table.hsa_queue_load_read_index_acquire_fn = - HSA::hsa_queue_load_read_index_acquire; - table.hsa_queue_load_read_index_relaxed_fn = - HSA::hsa_queue_load_read_index_relaxed; - table.hsa_queue_load_write_index_acquire_fn = - HSA::hsa_queue_load_write_index_acquire; - table.hsa_queue_load_write_index_relaxed_fn = - HSA::hsa_queue_load_write_index_relaxed; - table.hsa_queue_store_write_index_relaxed_fn = - HSA::hsa_queue_store_write_index_relaxed; - table.hsa_queue_store_write_index_release_fn = - HSA::hsa_queue_store_write_index_release; - table.hsa_queue_cas_write_index_acq_rel_fn = - HSA::hsa_queue_cas_write_index_acq_rel; - table.hsa_queue_cas_write_index_acquire_fn = - HSA::hsa_queue_cas_write_index_acquire; - table.hsa_queue_cas_write_index_relaxed_fn = - HSA::hsa_queue_cas_write_index_relaxed; - table.hsa_queue_cas_write_index_release_fn = - HSA::hsa_queue_cas_write_index_release; - table.hsa_queue_add_write_index_acq_rel_fn = - HSA::hsa_queue_add_write_index_acq_rel; - table.hsa_queue_add_write_index_acquire_fn = - HSA::hsa_queue_add_write_index_acquire; - table.hsa_queue_add_write_index_relaxed_fn = - HSA::hsa_queue_add_write_index_relaxed; - table.hsa_queue_add_write_index_release_fn = - HSA::hsa_queue_add_write_index_release; - table.hsa_queue_store_read_index_relaxed_fn = - HSA::hsa_queue_store_read_index_relaxed; - table.hsa_queue_store_read_index_release_fn = - HSA::hsa_queue_store_read_index_release; - table.hsa_agent_iterate_regions_fn = HSA::hsa_agent_iterate_regions; - table.hsa_region_get_info_fn = HSA::hsa_region_get_info; - table.hsa_memory_register_fn = HSA::hsa_memory_register; - table.hsa_memory_deregister_fn = HSA::hsa_memory_deregister; - table.hsa_memory_allocate_fn = HSA::hsa_memory_allocate; - table.hsa_memory_free_fn = HSA::hsa_memory_free; - table.hsa_memory_copy_fn = HSA::hsa_memory_copy; - table.hsa_memory_assign_agent_fn = HSA::hsa_memory_assign_agent; - table.hsa_signal_create_fn = HSA::hsa_signal_create; - table.hsa_signal_destroy_fn = HSA::hsa_signal_destroy; - table.hsa_signal_load_relaxed_fn = HSA::hsa_signal_load_relaxed; - table.hsa_signal_load_acquire_fn = HSA::hsa_signal_load_acquire; - table.hsa_signal_store_relaxed_fn = HSA::hsa_signal_store_relaxed; - table.hsa_signal_store_release_fn = HSA::hsa_signal_store_release; - table.hsa_signal_wait_relaxed_fn = HSA::hsa_signal_wait_relaxed; - table.hsa_signal_wait_acquire_fn = HSA::hsa_signal_wait_acquire; - table.hsa_signal_and_relaxed_fn = HSA::hsa_signal_and_relaxed; - table.hsa_signal_and_acquire_fn = HSA::hsa_signal_and_acquire; - table.hsa_signal_and_release_fn = HSA::hsa_signal_and_release; - table.hsa_signal_and_acq_rel_fn = HSA::hsa_signal_and_acq_rel; - table.hsa_signal_or_relaxed_fn = HSA::hsa_signal_or_relaxed; - table.hsa_signal_or_acquire_fn = HSA::hsa_signal_or_acquire; - table.hsa_signal_or_release_fn = HSA::hsa_signal_or_release; - table.hsa_signal_or_acq_rel_fn = HSA::hsa_signal_or_acq_rel; - table.hsa_signal_xor_relaxed_fn = HSA::hsa_signal_xor_relaxed; - table.hsa_signal_xor_acquire_fn = HSA::hsa_signal_xor_acquire; - table.hsa_signal_xor_release_fn = HSA::hsa_signal_xor_release; - table.hsa_signal_xor_acq_rel_fn = HSA::hsa_signal_xor_acq_rel; - table.hsa_signal_exchange_relaxed_fn = HSA::hsa_signal_exchange_relaxed; - table.hsa_signal_exchange_acquire_fn = HSA::hsa_signal_exchange_acquire; - table.hsa_signal_exchange_release_fn = HSA::hsa_signal_exchange_release; - table.hsa_signal_exchange_acq_rel_fn = HSA::hsa_signal_exchange_acq_rel; - table.hsa_signal_add_relaxed_fn = HSA::hsa_signal_add_relaxed; - table.hsa_signal_add_acquire_fn = HSA::hsa_signal_add_acquire; - table.hsa_signal_add_release_fn = HSA::hsa_signal_add_release; - table.hsa_signal_add_acq_rel_fn = HSA::hsa_signal_add_acq_rel; - table.hsa_signal_subtract_relaxed_fn = HSA::hsa_signal_subtract_relaxed; - table.hsa_signal_subtract_acquire_fn = HSA::hsa_signal_subtract_acquire; - table.hsa_signal_subtract_release_fn = HSA::hsa_signal_subtract_release; - table.hsa_signal_subtract_acq_rel_fn = HSA::hsa_signal_subtract_acq_rel; - table.hsa_signal_cas_relaxed_fn = HSA::hsa_signal_cas_relaxed; - table.hsa_signal_cas_acquire_fn = HSA::hsa_signal_cas_acquire; - table.hsa_signal_cas_release_fn = HSA::hsa_signal_cas_release; - table.hsa_signal_cas_acq_rel_fn = HSA::hsa_signal_cas_acq_rel; - table.hsa_isa_from_name_fn = HSA::hsa_isa_from_name; - table.hsa_isa_get_info_fn = HSA::hsa_isa_get_info; - table.hsa_isa_compatible_fn = HSA::hsa_isa_compatible; - table.hsa_code_object_serialize_fn = HSA::hsa_code_object_serialize; - table.hsa_code_object_deserialize_fn = HSA::hsa_code_object_deserialize; - table.hsa_code_object_destroy_fn = HSA::hsa_code_object_destroy; - table.hsa_code_object_get_info_fn = HSA::hsa_code_object_get_info; - table.hsa_code_object_get_symbol_fn = HSA::hsa_code_object_get_symbol; - table.hsa_code_symbol_get_info_fn = HSA::hsa_code_symbol_get_info; - table.hsa_code_object_iterate_symbols_fn = - HSA::hsa_code_object_iterate_symbols; - table.hsa_executable_create_fn = HSA::hsa_executable_create; - table.hsa_executable_destroy_fn = HSA::hsa_executable_destroy; - table.hsa_executable_load_code_object_fn = - HSA::hsa_executable_load_code_object; - table.hsa_executable_freeze_fn = HSA::hsa_executable_freeze; - table.hsa_executable_get_info_fn = HSA::hsa_executable_get_info; - table.hsa_executable_global_variable_define_fn = - HSA::hsa_executable_global_variable_define; - table.hsa_executable_agent_global_variable_define_fn = - HSA::hsa_executable_agent_global_variable_define; - table.hsa_executable_readonly_variable_define_fn = - HSA::hsa_executable_readonly_variable_define; - table.hsa_executable_validate_fn = HSA::hsa_executable_validate; - table.hsa_executable_get_symbol_fn = HSA::hsa_executable_get_symbol; - table.hsa_executable_symbol_get_info_fn = HSA::hsa_executable_symbol_get_info; - table.hsa_executable_iterate_symbols_fn = HSA::hsa_executable_iterate_symbols; - table.hsa_status_string_fn = HSA::hsa_status_string; - - if (table.std_exts_ != NULL) *table.std_exts_ = extension_backup; -} - -class Init { - public: - Init() { hsa_table_interface_init(&hsa_api_table_.table); } -}; -static Init LinkAtLoad; -} diff --git a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp deleted file mode 100644 index 9394c30062..0000000000 --- a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ /dev/null @@ -1,555 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "hsakmt.h" - -#include "core/inc/runtime.h" -#include "core/inc/agent.h" -#include "core/inc/amd_cpu_agent.h" -#include "core/inc/amd_gpu_agent.h" -#include "core/inc/amd_memory_region.h" -#include "core/inc/signal.h" -#include "core/inc/interrupt_signal.h" - -template -struct ValidityError; -template <> -struct ValidityError { - enum { value = HSA_STATUS_ERROR_INVALID_SIGNAL }; -}; - -template <> -struct ValidityError { - enum { value = HSA_STATUS_ERROR_INVALID_AGENT }; -}; - -template <> -struct ValidityError { - enum { value = HSA_STATUS_ERROR_INVALID_REGION }; -}; - -template <> -struct ValidityError { - enum { value = HSA_STATUS_ERROR_INVALID_REGION }; -}; - -template <> -struct ValidityError { - enum { value = HSA_STATUS_ERROR_INVALID_QUEUE }; -}; - -template -struct ValidityError { - enum { value = ValidityError::value }; -}; - -#define IS_BAD_PTR(ptr) \ - do { \ - if ((ptr) == NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT; \ - } while (false) - -#define IS_VALID(ptr) \ - do { \ - if ((ptr) == NULL || !(ptr)->IsValid()) \ - return hsa_status_t(ValidityError::value); \ - } while (false) - -#define CHECK_ALLOC(ptr) \ - do { \ - if ((ptr) == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; \ - } while (false) - -#define IS_OPEN() \ - do { \ - if (!core::Runtime::runtime_singleton_->IsOpen()) \ - return HSA_STATUS_ERROR_NOT_INITIALIZED; \ - } while (false) - -template -static __forceinline bool IsValid(T* ptr) { - return (ptr == NULL) ? NULL : ptr->IsValid(); -} - -hsa_status_t HSA_API - hsa_amd_coherency_get_type(hsa_agent_t agent_handle, - hsa_amd_coherency_type_t* type) { - IS_OPEN(); - - const core::Agent* agent = core::Agent::Convert(agent_handle); - - IS_VALID(agent); - - IS_BAD_PTR(type); - - if (agent->device_type() != core::Agent::kAmdGpuDevice) { - return HSA_STATUS_ERROR_INVALID_AGENT; - } - - const amd::GpuAgentInt* gpu_agent = - static_cast(agent); - - *type = gpu_agent->current_coherency_type(); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t HSA_API hsa_amd_coherency_set_type(hsa_agent_t agent_handle, - hsa_amd_coherency_type_t type) { - IS_OPEN(); - - core::Agent* agent = core::Agent::Convert(agent_handle); - - IS_VALID(agent); - - if (type < HSA_AMD_COHERENCY_TYPE_COHERENT || - type > HSA_AMD_COHERENCY_TYPE_NONCOHERENT) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - if (agent->device_type() != core::Agent::kAmdGpuDevice) { - return HSA_STATUS_ERROR_INVALID_AGENT; - } - - amd::GpuAgent* gpu_agent = static_cast(agent); - - if (!gpu_agent->current_coherency_type(type)) { - return HSA_STATUS_ERROR; - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t HSA_API - hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count) { - IS_OPEN(); - - if (ptr == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - if (count == 0) { - return HSA_STATUS_SUCCESS; - } - - return core::Runtime::runtime_singleton_->FillMemory(ptr, value, count); -} - -hsa_status_t HSA_API - hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent_handle, - const void* src, hsa_agent_t src_agent_handle, - size_t size, uint32_t num_dep_signals, - const hsa_signal_t* dep_signals, - hsa_signal_t completion_signal) { - if (dst == NULL || src == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - if ((num_dep_signals == 0 && dep_signals != NULL) || - (num_dep_signals > 0 && dep_signals == NULL)) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle); - IS_VALID(dst_agent); - - core::Agent* src_agent = core::Agent::Convert(src_agent_handle); - IS_VALID(src_agent); - - std::vector dep_signal_list(num_dep_signals); - if (num_dep_signals > 0) { - for (size_t i = 0; i < num_dep_signals; ++i) { - core::Signal* dep_signal_obj = core::Signal::Convert(dep_signals[i]); - IS_VALID(dep_signal_obj); - dep_signal_list[i] = dep_signal_obj; - } - } - - core::Signal* out_signal_obj = core::Signal::Convert(completion_signal); - IS_VALID(out_signal_obj); - - if (size > 0) { - return core::Runtime::runtime_singleton_->CopyMemory( - dst, *dst_agent, src, *src_agent, size, dep_signal_list, - *out_signal_obj); - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t HSA_API - hsa_amd_profiling_set_profiler_enabled(hsa_queue_t* queue, int enable) { - IS_OPEN(); - - core::Queue* cmd_queue = core::Queue::Convert(queue); - - IS_VALID(cmd_queue); - - AMD_HSA_BITS_SET(cmd_queue->amd_queue_.queue_properties, - AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, (enable != 0)); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t HSA_API hsa_amd_profiling_get_dispatch_time( - hsa_agent_t agent_handle, hsa_signal_t hsa_signal, - hsa_amd_profiling_dispatch_time_t* time) { - IS_OPEN(); - - IS_BAD_PTR(time); - - core::Agent* agent = core::Agent::Convert(agent_handle); - - IS_VALID(agent); - - core::Signal* signal = core::Signal::Convert(hsa_signal); - - IS_VALID(signal); - - if (agent->device_type() != core::Agent::kAmdGpuDevice) { - return HSA_STATUS_ERROR_INVALID_AGENT; - } - - amd::GpuAgentInt* gpu_agent = static_cast(agent); - - gpu_agent->TranslateTime(signal, *time); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t HSA_API - hsa_amd_profiling_convert_tick_to_system_domain(hsa_agent_t agent_handle, - uint64_t agent_tick, - uint64_t* system_tick) { - IS_OPEN(); - - IS_BAD_PTR(system_tick); - - core::Agent* agent = core::Agent::Convert(agent_handle); - - IS_VALID(agent); - - if (agent->device_type() != core::Agent::kAmdGpuDevice) { - return HSA_STATUS_ERROR_INVALID_AGENT; - } - - amd::GpuAgentInt* gpu_agent = static_cast(agent); - - *system_tick = gpu_agent->TranslateTime(agent_tick); - - return HSA_STATUS_SUCCESS; -} - -uint32_t HSA_API - hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* hsa_signals, - hsa_signal_condition_t* conds, - hsa_signal_value_t* values, uint64_t timeout_hint, - hsa_wait_state_t wait_hint, - hsa_signal_value_t* satisfying_value) { - // Do not check for signal invalidation. Invalidation may occur during async - // signal handler loop and is not an error. - for (uint i = 0; i < signal_count; i++) - assert(hsa_signals[i].handle != 0 && - static_cast*>( - core::Signal::Convert(hsa_signals[i]))->IsValid() && - "Invalid signal."); - - return core::Signal::WaitAny(signal_count, hsa_signals, conds, values, - timeout_hint, wait_hint, satisfying_value); -} - -hsa_status_t HSA_API - hsa_amd_signal_async_handler(hsa_signal_t hsa_signal, - hsa_signal_condition_t cond, - hsa_signal_value_t value, - hsa_amd_signal_handler handler, void* arg) { - IS_OPEN(); - - core::Signal* signal = core::Signal::Convert(hsa_signal); - IS_VALID(signal); - IS_BAD_PTR(handler); - if (!core::InterruptSignal::IsType(signal)) - return HSA_STATUS_ERROR_INVALID_SIGNAL; - return core::Runtime::runtime_singleton_->SetAsyncSignalHandler( - hsa_signal, cond, value, handler, arg); -} - -hsa_status_t HSA_API - hsa_amd_async_function(void (*callback)(void* arg), void* arg) { - IS_OPEN(); - - IS_BAD_PTR(callback); - static const hsa_signal_t null_signal = {0}; - return core::Runtime::runtime_singleton_->SetAsyncSignalHandler( - null_signal, HSA_SIGNAL_CONDITION_EQ, 0, (hsa_amd_signal_handler)callback, - arg); -} - -hsa_status_t HSA_API hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue, - uint32_t num_cu_mask_count, - const uint32_t* cu_mask) { - IS_OPEN(); - IS_BAD_PTR(cu_mask); - - core::Queue* cmd_queue = core::Queue::Convert(queue); - IS_VALID(cmd_queue); - return cmd_queue->SetCUMasking(num_cu_mask_count, cu_mask); -} - -hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size, - hsa_agent_t* agents, int num_agent, - void** agent_ptr) { - *agent_ptr = NULL; - - IS_OPEN(); - - if (size == 0 || host_ptr == NULL || agent_ptr == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - if ((agents != NULL && num_agent == 0) || - (agents == NULL && num_agent != 0)) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - const amd::MemoryRegion* system_region = - reinterpret_cast( - core::Runtime::runtime_singleton_->system_regions_fine()[0]); - - return system_region->Lock(num_agent, agents, host_ptr, size, agent_ptr); -} - -hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr) { - IS_OPEN(); - - const amd::MemoryRegion* system_region = - reinterpret_cast( - core::Runtime::runtime_singleton_->system_regions_fine()[0]); - - return system_region->Unlock(host_ptr); -} - -hsa_status_t HSA_API - hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool, - hsa_amd_memory_pool_info_t attribute, - void* value) { - IS_OPEN(); - IS_BAD_PTR(value); - - hsa_region_t region = {memory_pool.handle}; - const amd::MemoryRegion* mem_region = amd::MemoryRegion::Convert(region); - if (mem_region == NULL) { - return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL; - } - - return mem_region->GetPoolInfo(attribute, value); -} - -hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools( - hsa_agent_t agent_handle, - hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data), - void* data) { - IS_OPEN(); - IS_BAD_PTR(callback); - const core::Agent* agent = core::Agent::Convert(agent_handle); - IS_VALID(agent); - - if (agent->device_type() == core::Agent::kAmdCpuDevice) { - return reinterpret_cast(agent)->VisitRegion( - false, reinterpret_cast(callback), - data); - } - - return reinterpret_cast(agent)->VisitRegion( - false, - reinterpret_cast( - callback), - data); -} - -hsa_status_t HSA_API - hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, size_t size, - uint32_t flags, void** ptr) { - IS_OPEN(); - - if (size == 0 || ptr == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - hsa_region_t region = {memory_pool.handle}; - const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region); - - if (mem_region == NULL || !mem_region->IsValid()) { - return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL; - } - - return core::Runtime::runtime_singleton_->AllocateMemory(true, mem_region, - size, ptr); -} - -hsa_status_t HSA_API hsa_amd_memory_pool_free(void* ptr) { - return HSA::hsa_memory_free(ptr); -} - -hsa_status_t HSA_API - hsa_amd_agents_allow_access(uint32_t num_agents, const hsa_agent_t* agents, - const uint32_t* flags, const void* ptr) { - IS_OPEN(); - - if (num_agents == 0 || agents == NULL || flags != NULL || ptr == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - return core::Runtime::runtime_singleton_->AllowAccess(num_agents, agents, - ptr); -} - -hsa_status_t HSA_API - hsa_amd_memory_pool_can_migrate(hsa_amd_memory_pool_t src_memory_pool, - hsa_amd_memory_pool_t dst_memory_pool, - bool* result) { - IS_OPEN(); - - if (result == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - hsa_region_t src_region_handle = {src_memory_pool.handle}; - const amd::MemoryRegion* src_mem_region = - amd::MemoryRegion::Convert(src_region_handle); - - if (src_mem_region == NULL || !src_mem_region->IsValid()) { - return static_cast(HSA_STATUS_ERROR_INVALID_MEMORY_POOL); - } - - hsa_region_t dst_region_handle = {dst_memory_pool.handle}; - const amd::MemoryRegion* dst_mem_region = - amd::MemoryRegion::Convert(dst_region_handle); - - if (dst_mem_region == NULL || !dst_mem_region->IsValid()) { - return static_cast(HSA_STATUS_ERROR_INVALID_MEMORY_POOL); - } - - return src_mem_region->CanMigrate(*dst_mem_region, *result); -} - -hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr, - hsa_amd_memory_pool_t memory_pool, - uint32_t flags) { - IS_OPEN(); - - if (ptr == NULL || flags != 0) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - hsa_region_t dst_region_handle = {memory_pool.handle}; - const amd::MemoryRegion* dst_mem_region = - amd::MemoryRegion::Convert(dst_region_handle); - - if (dst_mem_region == NULL || !dst_mem_region->IsValid()) { - return static_cast(HSA_STATUS_ERROR_INVALID_MEMORY_POOL); - } - - return dst_mem_region->Migrate(flags, ptr); -} - -hsa_status_t HSA_API hsa_amd_agent_memory_pool_get_info( - hsa_agent_t agent_handle, hsa_amd_memory_pool_t memory_pool, - hsa_amd_agent_memory_pool_info_t attribute, void* value) { - IS_OPEN(); - - if (value == NULL) { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - const core::Agent* agent = core::Agent::Convert(agent_handle); - IS_VALID(agent); - - hsa_region_t region_handle = {memory_pool.handle}; - const amd::MemoryRegion* mem_region = - amd::MemoryRegion::Convert(region_handle); - - if (mem_region == NULL || !mem_region->IsValid()) { - return static_cast(HSA_STATUS_ERROR_INVALID_MEMORY_POOL); - } - - return mem_region->GetAgentPoolInfo(*agent, attribute, value); -} - -hsa_status_t hsa_amd_interop_map_buffer(uint32_t num_agents, - hsa_agent_t* agents, int interop_handle, - uint32_t flags, size_t* size, - void** ptr, size_t* metadata_size, - const void** metadata) { - IS_OPEN(); - IS_BAD_PTR(agents); - IS_BAD_PTR(size); - IS_BAD_PTR(ptr); - if (flags != 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT; - if (num_agents == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT; - - core::Agent* short_agents[64]; - core::Agent** core_agents = short_agents; - if (num_agents > 64) { - core_agents = new core::Agent* [num_agents]; - if (core_agents == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - for (int i = 0; i < num_agents; i++) { - core::Agent* device = core::Agent::Convert(agents[i]); - IS_VALID(device); - core_agents[i] = device; - } - - auto ret = core::Runtime::runtime_singleton_->InteropMap( - num_agents, core_agents, interop_handle, flags, size, ptr, metadata_size, - metadata); - - if (num_agents > 64) delete[] core_agents; - return ret; -} - -hsa_status_t hsa_amd_interop_unmap_buffer(void* ptr) { - IS_OPEN(); - if (ptr != NULL) core::Runtime::runtime_singleton_->InteropUnmap(ptr); - return HSA_STATUS_SUCCESS; -} diff --git a/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp b/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp deleted file mode 100644 index 268e98ebdf..0000000000 --- a/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp +++ /dev/null @@ -1,530 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/hsa_ext_interface.h" - -#include "core/inc/runtime.h" - -namespace core { -// Implementations for missing / unsupported extensions -template -static T0 hsa_ext_null() { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, - T14) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, - T14, T15) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, - T14, T15, T16) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, - T14, T15, T16, T17) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, - T14, T15, T16, T17, T18) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, - T14, T15, T16, T17, T18, T19) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} -template -static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, - T14, T15, T16, T17, T18, T19, T20) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} - -ExtensionEntryPoints::ExtensionEntryPoints() { InitTable(); } - -void ExtensionEntryPoints::InitTable() { - table.hsa_ext_program_create_fn = hsa_ext_null; - table.hsa_ext_program_destroy_fn = hsa_ext_null; - table.hsa_ext_program_add_module_fn = hsa_ext_null; - table.hsa_ext_program_iterate_modules_fn = hsa_ext_null; - table.hsa_ext_program_get_info_fn = hsa_ext_null; - table.hsa_ext_program_finalize_fn = hsa_ext_null; - table.hsa_ext_image_get_capability_fn = hsa_ext_null; - table.hsa_ext_image_data_get_info_fn = hsa_ext_null; - table.hsa_ext_image_create_fn = hsa_ext_null; - table.hsa_ext_image_import_fn = hsa_ext_null; - table.hsa_ext_image_export_fn = hsa_ext_null; - table.hsa_ext_image_copy_fn = hsa_ext_null; - table.hsa_ext_image_clear_fn = hsa_ext_null; - table.hsa_ext_image_destroy_fn = hsa_ext_null; - table.hsa_ext_sampler_create_fn = hsa_ext_null; - table.hsa_ext_sampler_destroy_fn = hsa_ext_null; - table.hsa_amd_image_get_info_max_dim_fn = hsa_ext_null; - table.hsa_amd_image_create_fn = hsa_ext_null; -} - -void ExtensionEntryPoints::Unload() { - for (int i = 0; i < libs_.size(); i++) { - void* ptr = os::GetExportAddress(libs_[i], "Unload"); - if (ptr) { - ((Unload_t)ptr)(); - } - } - // Due to valgrind bug, runtime cannot dlclose extensions see: - // http://valgrind.org/docs/manual/faq.html#faq.unhelpful - if (os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND") != "1") { - for (int i = 0; i < libs_.size(); i++) { - os::CloseLib(libs_[i]); - } - } - libs_.clear(); - InitTable(); -} - -bool ExtensionEntryPoints::Load(std::string library_name) { - os::LibHandle lib = os::LoadLib(library_name); - if (lib == NULL) { - return false; - } - libs_.push_back(lib); - - void* ptr; - - ptr = os::GetExportAddress(lib, "hsa_ext_program_create_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_program_create_fn == - (decltype(::hsa_ext_program_create)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_program_create_fn = (decltype(::hsa_ext_program_create)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_program_destroy_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_program_destroy_fn == - (decltype(::hsa_ext_program_destroy)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_program_destroy_fn = - (decltype(::hsa_ext_program_destroy)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_program_add_module_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_program_add_module_fn == - (decltype(::hsa_ext_program_add_module)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_program_add_module_fn = - (decltype(::hsa_ext_program_add_module)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_program_iterate_modules_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_program_iterate_modules_fn == - (decltype(::hsa_ext_program_iterate_modules)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_program_iterate_modules_fn = - (decltype(::hsa_ext_program_iterate_modules)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_program_get_info_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_program_get_info_fn == - (decltype(::hsa_ext_program_get_info)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_program_get_info_fn = - (decltype(::hsa_ext_program_get_info)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_program_finalize_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_program_finalize_fn == - (decltype(::hsa_ext_program_finalize)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_program_finalize_fn = - (decltype(::hsa_ext_program_finalize)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_image_get_capability_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_image_get_capability_fn == - (decltype(::hsa_ext_image_get_capability)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_image_get_capability_fn = - (decltype(::hsa_ext_image_get_capability)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_image_data_get_info_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_image_data_get_info_fn == - (decltype(::hsa_ext_image_data_get_info)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_image_data_get_info_fn = - (decltype(::hsa_ext_image_data_get_info)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_image_create_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_image_create_fn == - (decltype(::hsa_ext_image_create)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_image_create_fn = (decltype(::hsa_ext_image_create)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_image_import_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_image_import_fn == - (decltype(::hsa_ext_image_import)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_image_import_fn = (decltype(::hsa_ext_image_import)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_image_export_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_image_export_fn == - (decltype(::hsa_ext_image_export)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_image_export_fn = (decltype(::hsa_ext_image_export)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_image_copy_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_image_copy_fn == - (decltype(::hsa_ext_image_copy)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_image_copy_fn = (decltype(::hsa_ext_image_copy)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_image_clear_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_image_clear_fn == - (decltype(::hsa_ext_image_clear)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_image_clear_fn = (decltype(::hsa_ext_image_clear)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_image_destroy_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_image_destroy_fn == - (decltype(::hsa_ext_image_destroy)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_image_destroy_fn = (decltype(::hsa_ext_image_destroy)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_sampler_create_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_sampler_create_fn == - (decltype(::hsa_ext_sampler_create)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_sampler_create_fn = (decltype(::hsa_ext_sampler_create)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_ext_sampler_destroy_impl"); - if (ptr != NULL) { - assert(table.hsa_ext_sampler_destroy_fn == - (decltype(::hsa_ext_sampler_destroy)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_ext_sampler_destroy_fn = - (decltype(::hsa_ext_sampler_destroy)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_amd_image_get_info_max_dim_impl"); - if (ptr != NULL) { - assert(table.hsa_amd_image_get_info_max_dim_fn == - (decltype(::hsa_amd_image_get_info_max_dim)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_amd_image_get_info_max_dim_fn = - (decltype(::hsa_amd_image_get_info_max_dim)*)ptr; - } - - ptr = os::GetExportAddress(lib, "hsa_amd_image_create_impl"); - if (ptr != NULL) { - assert(table.hsa_amd_image_create_fn == - (decltype(::hsa_amd_image_create)*)hsa_ext_null && - "Duplicate load of extension import."); - table.hsa_amd_image_create_fn = - (decltype(::hsa_amd_image_create)*)ptr; - } - - core::hsa_internal_api_table_.extension_backup=table; - core::hsa_internal_api_table_.table.std_exts_=&core::hsa_internal_api_table_.extension_backup; - - ptr = os::GetExportAddress(lib, "Load"); - if (ptr != NULL) { - ((Load_t)ptr)(&core::hsa_internal_api_table_.table); - } - - return true; -} -} // namespace core - -//---------------------------------------------------------------------------// -// Exported extension stub functions -//---------------------------------------------------------------------------// - -hsa_status_t hsa_ext_program_create( - hsa_machine_model_t machine_model, hsa_profile_t profile, - hsa_default_float_rounding_mode_t default_float_rounding_mode, - const char* options, hsa_ext_program_t* program) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_program_create_fn(machine_model, profile, - default_float_rounding_mode, options, program); -} - -hsa_status_t hsa_ext_program_destroy(hsa_ext_program_t program) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_program_destroy_fn(program); -} - -hsa_status_t hsa_ext_program_add_module(hsa_ext_program_t program, - hsa_ext_module_t module) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_program_add_module_fn(program, module); -} - -hsa_status_t hsa_ext_program_iterate_modules( - hsa_ext_program_t program, - hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module, - void* data), - void* data) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_program_iterate_modules_fn(program, callback, data); -} - -hsa_status_t hsa_ext_program_get_info(hsa_ext_program_t program, - hsa_ext_program_info_t attribute, - void* value) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_program_get_info_fn(program, attribute, value); -} - -hsa_status_t hsa_ext_program_finalize( - hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention, - hsa_ext_control_directives_t control_directives, const char* options, - hsa_code_object_type_t code_object_type, hsa_code_object_t* code_object) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_program_finalize_fn(program, isa, call_convention, - control_directives, options, - code_object_type, code_object); -} - -hsa_status_t hsa_ext_image_get_capability( - hsa_agent_t agent, hsa_ext_image_geometry_t geometry, - const hsa_ext_image_format_t* image_format, uint32_t* capability_mask) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_image_get_capability_fn(agent, geometry, image_format, - capability_mask); -} - -hsa_status_t hsa_ext_image_data_get_info( - hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor, - hsa_access_permission_t access_permission, - hsa_ext_image_data_info_t* image_data_info) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_image_data_get_info_fn(agent, image_descriptor, - access_permission, image_data_info); -} - -hsa_status_t hsa_ext_image_create( - hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor, - const void* image_data, hsa_access_permission_t access_permission, - hsa_ext_image_t* image) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_image_create_fn(agent, image_descriptor, image_data, - access_permission, image); -} - -hsa_status_t hsa_ext_image_import(hsa_agent_t agent, const void* src_memory, - size_t src_row_pitch, size_t src_slice_pitch, - hsa_ext_image_t dst_image, - const hsa_ext_image_region_t* image_region) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_image_import_fn(agent, src_memory, src_row_pitch, - src_slice_pitch, dst_image, image_region); -} - -hsa_status_t hsa_ext_image_export(hsa_agent_t agent, hsa_ext_image_t src_image, - void* dst_memory, size_t dst_row_pitch, - size_t dst_slice_pitch, - const hsa_ext_image_region_t* image_region) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_image_export_fn(agent, src_image, dst_memory, dst_row_pitch, - dst_slice_pitch, image_region); -} - -hsa_status_t hsa_ext_image_copy(hsa_agent_t agent, hsa_ext_image_t src_image, - const hsa_dim3_t* src_offset, - hsa_ext_image_t dst_image, - const hsa_dim3_t* dst_offset, - const hsa_dim3_t* range) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_image_copy_fn(agent, src_image, src_offset, dst_image, - dst_offset, range); -} - -hsa_status_t hsa_ext_image_clear(hsa_agent_t agent, hsa_ext_image_t image, - const void* data, - const hsa_ext_image_region_t* image_region) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_image_clear_fn(agent, image, data, image_region); -} - -hsa_status_t hsa_ext_image_destroy(hsa_agent_t agent, hsa_ext_image_t image) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_image_destroy_fn(agent, image); -} - -hsa_status_t hsa_ext_sampler_create( - hsa_agent_t agent, const hsa_ext_sampler_descriptor_t* sampler_descriptor, - hsa_ext_sampler_t* sampler) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_sampler_create_fn(agent, sampler_descriptor, sampler); -} - -hsa_status_t hsa_ext_sampler_destroy(hsa_agent_t agent, - hsa_ext_sampler_t sampler) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_ext_sampler_destroy_fn(agent, sampler); -} - -//---------------------------------------------------------------------------// -// Stubs for internal extension functions -//---------------------------------------------------------------------------// - -hsa_status_t hsa_amd_image_get_info_max_dim(hsa_agent_t component, - hsa_agent_info_t attribute, - void* value) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_amd_image_get_info_max_dim_fn(component, attribute, value); -} - -hsa_status_t hsa_amd_image_create( - hsa_agent_t agent, - const hsa_ext_image_descriptor_t *image_descriptor, - const hsa_amd_image_descriptor_t *image_layout, - const void *image_data, - hsa_access_permission_t access_permission, - hsa_ext_image_t *image) { - return core::Runtime::runtime_singleton_->extensions_.table - .hsa_amd_image_create_fn(agent, image_descriptor, image_layout, image_data, access_permission, image); -} diff --git a/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp b/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp deleted file mode 100644 index 67c95867dd..0000000000 --- a/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp +++ /dev/null @@ -1,372 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/interrupt_signal.h" -#include "core/util/timer.h" - -namespace core { - -HsaEvent* InterruptSignal::CreateEvent(HSA_EVENTTYPE type, bool manual_reset) { - HsaEventDescriptor event_descriptor; - event_descriptor.EventType = type; - event_descriptor.SyncVar.SyncVar.UserData = NULL; - event_descriptor.SyncVar.SyncVarSize = sizeof(hsa_signal_value_t); - event_descriptor.NodeId = 0; - - HsaEvent* ret = NULL; - if (HSAKMT_STATUS_SUCCESS == - hsaKmtCreateEvent(&event_descriptor, manual_reset, false, &ret)) { - if (type == HSA_EVENTTYPE_MEMORY) { - memset(&ret->EventData.EventData.MemoryAccessFault.Failure, 0, - sizeof(HsaAccessAttributeFailure)); - } - } - - return ret; -} - -int InterruptSignal::rtti_id_ = 0; - -void InterruptSignal::DestroyEvent(HsaEvent* evt) { hsaKmtDestroyEvent(evt); } - -InterruptSignal::InterruptSignal(hsa_signal_value_t initial_value, - HsaEvent* use_event) - : Signal(initial_value) { - if (use_event != NULL) { - event_ = use_event; - free_event_ = false; - } else { - event_ = CreateEvent(HSA_EVENTTYPE_SIGNAL, false); - free_event_ = true; - } - - if (event_ != NULL) { - signal_.event_id = event_->EventId; - signal_.event_mailbox_ptr = event_->EventData.HWData2; - } else { - signal_.event_id = 0; - signal_.event_mailbox_ptr = 0; - } - signal_.kind = AMD_SIGNAL_KIND_USER; - - wait_on_event_ = true; -} - -InterruptSignal::~InterruptSignal() { - invalid_ = true; - SetEvent(); - while (InUse()) - ; - if (free_event_) hsaKmtDestroyEvent(event_); -} - -hsa_signal_value_t InterruptSignal::LoadRelaxed() { - return hsa_signal_value_t( - atomic::Load(&signal_.value, std::memory_order_relaxed)); -} - -hsa_signal_value_t InterruptSignal::LoadAcquire() { - return hsa_signal_value_t( - atomic::Load(&signal_.value, std::memory_order_acquire)); -} - -void InterruptSignal::StoreRelaxed(hsa_signal_value_t value) { - wait_on_event_ = true; - atomic::Store(&signal_.value, int64_t(value), std::memory_order_relaxed); - SetEvent(); -} - -void InterruptSignal::StoreRelease(hsa_signal_value_t value) { - wait_on_event_ = true; - atomic::Store(&signal_.value, int64_t(value), std::memory_order_release); - SetEvent(); -} - -hsa_signal_value_t InterruptSignal::WaitRelaxed( - hsa_signal_condition_t condition, hsa_signal_value_t compare_value, - uint64_t timeout, hsa_wait_state_t wait_hint) { - uint32_t prior = atomic::Increment(&waiting_); - - // assert(prior == 0 && "Multiple waiters on interrupt signal!"); - // Allow only the first waiter to sleep (temporary, known to be bad). - if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE; - - MAKE_SCOPE_GUARD([&]() { atomic::Decrement(&waiting_); }); - - int64_t value; - - timer::fast_clock::time_point start_time = timer::fast_clock::now(); - - // Set a polling timeout value - // Exact time is not hugely important, it should just be a short while which - // is smaller than the thread scheduling quantum (usually around 16ms) - const timer::fast_clock::duration kMaxElapsed = std::chrono::milliseconds(5); - - uint64_t hsa_freq; - HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq); - const timer::fast_clock::duration fast_timeout = - timer::duration_from_seconds( - double(timeout) / double(hsa_freq)); - - bool condition_met = false; - while (true) { - if (invalid_) return 0; - - value = atomic::Load(&signal_.value, std::memory_order_relaxed); - - switch (condition) { - case HSA_SIGNAL_CONDITION_EQ: { - condition_met = (value == compare_value); - break; - } - case HSA_SIGNAL_CONDITION_NE: { - condition_met = (value != compare_value); - break; - } - case HSA_SIGNAL_CONDITION_GTE: { - condition_met = (value >= compare_value); - break; - } - case HSA_SIGNAL_CONDITION_LT: { - condition_met = (value < compare_value); - break; - } - default: - return 0; - } - if (condition_met) return hsa_signal_value_t(value); - - timer::fast_clock::time_point time = timer::fast_clock::now(); - if (time - start_time > kMaxElapsed) { - if (time - start_time > fast_timeout) { - value = atomic::Load(&signal_.value, std::memory_order_relaxed); - return hsa_signal_value_t(value); - } - if (wait_on_event_ && wait_hint != HSA_WAIT_STATE_ACTIVE) { - uint32_t wait_ms; - auto time_remaining = fast_timeout - (time - start_time); - if ((timeout == -1) || - (time_remaining > std::chrono::milliseconds(uint32_t(-1)))) - wait_ms = uint32_t(-1); - else - wait_ms = timer::duration_cast( - time_remaining).count(); - hsaKmtWaitOnEvent(event_, wait_ms); - } - } - } -} - -hsa_signal_value_t InterruptSignal::WaitAcquire( - hsa_signal_condition_t condition, hsa_signal_value_t compare_value, - uint64_t timeout, hsa_wait_state_t wait_hint) { - hsa_signal_value_t ret = - WaitRelaxed(condition, compare_value, timeout, wait_hint); - std::atomic_thread_fence(std::memory_order_acquire); - return ret; -} - -void InterruptSignal::AndRelaxed(hsa_signal_value_t value) { - atomic::And(&signal_.value, int64_t(value), std::memory_order_relaxed); - SetEvent(); -} - -void InterruptSignal::AndAcquire(hsa_signal_value_t value) { - atomic::And(&signal_.value, int64_t(value), std::memory_order_acquire); - SetEvent(); -} - -void InterruptSignal::AndRelease(hsa_signal_value_t value) { - atomic::And(&signal_.value, int64_t(value), std::memory_order_release); - SetEvent(); -} - -void InterruptSignal::AndAcqRel(hsa_signal_value_t value) { - atomic::And(&signal_.value, int64_t(value), std::memory_order_acq_rel); - SetEvent(); -} - -void InterruptSignal::OrRelaxed(hsa_signal_value_t value) { - atomic::Or(&signal_.value, int64_t(value), std::memory_order_relaxed); - SetEvent(); -} - -void InterruptSignal::OrAcquire(hsa_signal_value_t value) { - atomic::Or(&signal_.value, int64_t(value), std::memory_order_acquire); - SetEvent(); -} - -void InterruptSignal::OrRelease(hsa_signal_value_t value) { - atomic::Or(&signal_.value, int64_t(value), std::memory_order_release); - SetEvent(); -} - -void InterruptSignal::OrAcqRel(hsa_signal_value_t value) { - atomic::Or(&signal_.value, int64_t(value), std::memory_order_acq_rel); - SetEvent(); -} - -void InterruptSignal::XorRelaxed(hsa_signal_value_t value) { - atomic::Xor(&signal_.value, int64_t(value), std::memory_order_relaxed); - SetEvent(); -} - -void InterruptSignal::XorAcquire(hsa_signal_value_t value) { - atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acquire); - SetEvent(); -} - -void InterruptSignal::XorRelease(hsa_signal_value_t value) { - atomic::Xor(&signal_.value, int64_t(value), std::memory_order_release); - SetEvent(); -} - -void InterruptSignal::XorAcqRel(hsa_signal_value_t value) { - atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acq_rel); - SetEvent(); -} - -void InterruptSignal::AddRelaxed(hsa_signal_value_t value) { - atomic::Add(&signal_.value, int64_t(value), std::memory_order_relaxed); - SetEvent(); -} - -void InterruptSignal::AddAcquire(hsa_signal_value_t value) { - atomic::Add(&signal_.value, int64_t(value), std::memory_order_acquire); - SetEvent(); -} - -void InterruptSignal::AddRelease(hsa_signal_value_t value) { - atomic::Add(&signal_.value, int64_t(value), std::memory_order_release); - SetEvent(); -} - -void InterruptSignal::AddAcqRel(hsa_signal_value_t value) { - atomic::Add(&signal_.value, int64_t(value), std::memory_order_acq_rel); - SetEvent(); -} - -void InterruptSignal::SubRelaxed(hsa_signal_value_t value) { - atomic::Sub(&signal_.value, int64_t(value), std::memory_order_relaxed); - SetEvent(); -} - -void InterruptSignal::SubAcquire(hsa_signal_value_t value) { - atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acquire); - SetEvent(); -} - -void InterruptSignal::SubRelease(hsa_signal_value_t value) { - atomic::Sub(&signal_.value, int64_t(value), std::memory_order_release); - SetEvent(); -} - -void InterruptSignal::SubAcqRel(hsa_signal_value_t value) { - atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acq_rel); - SetEvent(); -} - -hsa_signal_value_t InterruptSignal::ExchRelaxed(hsa_signal_value_t value) { - hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange( - &signal_.value, int64_t(value), std::memory_order_relaxed)); - SetEvent(); - return ret; -} - -hsa_signal_value_t InterruptSignal::ExchAcquire(hsa_signal_value_t value) { - hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange( - &signal_.value, int64_t(value), std::memory_order_acquire)); - SetEvent(); - return ret; -} - -hsa_signal_value_t InterruptSignal::ExchRelease(hsa_signal_value_t value) { - hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange( - &signal_.value, int64_t(value), std::memory_order_release)); - SetEvent(); - return ret; -} - -hsa_signal_value_t InterruptSignal::ExchAcqRel(hsa_signal_value_t value) { - hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange( - &signal_.value, int64_t(value), std::memory_order_acq_rel)); - SetEvent(); - return ret; -} - -hsa_signal_value_t InterruptSignal::CasRelaxed(hsa_signal_value_t expected, - hsa_signal_value_t value) { - hsa_signal_value_t ret = hsa_signal_value_t( - atomic::Cas(&signal_.value, int64_t(value), int64_t(expected), - std::memory_order_relaxed)); - SetEvent(); - return ret; -} - -hsa_signal_value_t InterruptSignal::CasAcquire(hsa_signal_value_t expected, - hsa_signal_value_t value) { - hsa_signal_value_t ret = hsa_signal_value_t( - atomic::Cas(&signal_.value, int64_t(value), int64_t(expected), - std::memory_order_acquire)); - SetEvent(); - return ret; -} - -hsa_signal_value_t InterruptSignal::CasRelease(hsa_signal_value_t expected, - hsa_signal_value_t value) { - hsa_signal_value_t ret = hsa_signal_value_t( - atomic::Cas(&signal_.value, int64_t(value), int64_t(expected), - std::memory_order_release)); - SetEvent(); - return ret; -} - -hsa_signal_value_t InterruptSignal::CasAcqRel(hsa_signal_value_t expected, - hsa_signal_value_t value) { - hsa_signal_value_t ret = hsa_signal_value_t( - atomic::Cas(&signal_.value, int64_t(value), int64_t(expected), - std::memory_order_acq_rel)); - SetEvent(); - return ret; -} - -} // namespace core diff --git a/runtime/hsa-runtime/core/runtime/isa.cpp b/runtime/hsa-runtime/core/runtime/isa.cpp deleted file mode 100644 index 86891aaef8..0000000000 --- a/runtime/hsa-runtime/core/runtime/isa.cpp +++ /dev/null @@ -1,130 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/isa.h" - -#include -#include - -namespace core { - -const IsaRegistry::IsaMap IsaRegistry::supported_isas_ = - IsaRegistry::GetSupportedIsas(); - -const Isa *IsaRegistry::GetIsa(const std::string &full_name) { - auto isareg_iter = supported_isas_.find(full_name); - return isareg_iter == supported_isas_.end() ? nullptr : &isareg_iter->second; -} - -const Isa *IsaRegistry::GetIsa(const Isa::Version &version) { - auto isareg_iter = supported_isas_.find(Isa(version).GetFullName()); - return isareg_iter == supported_isas_.end() ? nullptr : &isareg_iter->second; -} - -const IsaRegistry::IsaMap IsaRegistry::GetSupportedIsas() { -#define ISAREG_ENTRY_GEN(maj, min, stp) \ - Isa amd_amdgpu_##maj##min##stp; \ - amd_amdgpu_##maj##min##stp.version_ = Isa::Version(maj, min, stp); \ - supported_isas.insert( \ - std::make_pair( \ - amd_amdgpu_##maj##min##stp.GetFullName(), amd_amdgpu_##maj##min##stp)); \ - - IsaMap supported_isas; - - ISAREG_ENTRY_GEN(7, 0, 0) - ISAREG_ENTRY_GEN(7, 0, 1) - ISAREG_ENTRY_GEN(8, 0, 0) - ISAREG_ENTRY_GEN(8, 0, 1) - ISAREG_ENTRY_GEN(8, 0, 2) - ISAREG_ENTRY_GEN(8, 0, 3) - ISAREG_ENTRY_GEN(8, 1, 0) - ISAREG_ENTRY_GEN(9, 0, 0) - - return supported_isas; -} - -std::string Isa::GetFullName() const { - std::stringstream full_name; - full_name << GetVendor() << ":" << GetArchitecture() << ":" - << GetMajorVersion() << ":" << GetMinorVersion() << ":" - << GetStepping(); - return full_name.str(); -} - -bool Isa::GetInfo(const hsa_isa_info_t &attribute, void *value) const { - if (!value) { - return false; - } - - switch (attribute) { - case HSA_ISA_INFO_NAME_LENGTH: { - std::string full_name = GetFullName(); - *((uint32_t *)value) = static_cast(full_name.size()); - return true; - } - case HSA_ISA_INFO_NAME: { - std::string full_name = GetFullName(); - memcpy(value, full_name.c_str(), full_name.size()); - return true; - } - // @todo: following case needs to be removed - case HSA_ISA_INFO_CALL_CONVENTION_COUNT: { - *((uint32_t *)value) = 1; - return true; - } - // @todo: following case needs to be removed - case HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE: { - *((uint32_t *)value) = 64; - return true; - } - // @todo: following needs to be removed - case HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT: { - *((uint32_t *)value) = 40; - return true; - } - default: { - return false; - } - } -} - -} // namespace core diff --git a/runtime/hsa-runtime/core/runtime/runtime.cpp b/runtime/hsa-runtime/core/runtime/runtime.cpp deleted file mode 100644 index 8ee17b2911..0000000000 --- a/runtime/hsa-runtime/core/runtime/runtime.cpp +++ /dev/null @@ -1,1010 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/inc/runtime.h" - -#include -#include -#include -#include -#include -#include - -#include "core/common/shared.h" - -#include "core/inc/hsa_ext_interface.h" -#include "core/inc/amd_cpu_agent.h" -#include "core/inc/amd_gpu_agent.h" -#include "core/inc/amd_memory_region.h" -#include "core/inc/amd_topology.h" -#include "core/inc/signal.h" -#include "core/inc/interrupt_signal.h" - -#include "core/inc/hsa_api_trace_int.h" - -#define HSA_VERSION_MAJOR 1 -#define HSA_VERSION_MINOR 0 - -namespace core { -bool g_use_interrupt_wait = true; - -Runtime* Runtime::runtime_singleton_ = NULL; - -KernelMutex Runtime::bootstrap_lock_; - -static bool loaded = true; - -class RuntimeCleanup { - public: - ~RuntimeCleanup() { - if (!Runtime::IsOpen()) { - delete Runtime::runtime_singleton_; - } - - loaded = false; - } -}; - -static RuntimeCleanup cleanup_at_unload_; - -bool Runtime::Acquire() { - // Check to see if HSA has been cleaned up (process exit) - if (!loaded) return false; - - // Handle initialization races - ScopedAcquire boot(&bootstrap_lock_); - - if (runtime_singleton_ == NULL) { - runtime_singleton_ = new Runtime(); - } - - // Serialize with release - ScopedAcquire lock(&runtime_singleton_->kernel_lock_); - - if (runtime_singleton_->ref_count_ == INT32_MAX) { - return false; - } - - runtime_singleton_->ref_count_++; - - if (runtime_singleton_->ref_count_ == 1) { - runtime_singleton_->Load(); - } - - return true; -} - -bool Runtime::Release() { - ScopedAcquire lock(&kernel_lock_); - if (ref_count_ == 0) { - return false; - } - - if (ref_count_ == 1) { - // Release all registered memory, then unload backends - Unload(); - } - - ref_count_--; - - return true; -} - -bool Runtime::IsOpen() { - return (Runtime::runtime_singleton_ != NULL) && - (Runtime::runtime_singleton_->ref_count_ != 0); -} - -void Runtime::RegisterAgent(Agent* agent) { - if (agent->device_type() == Agent::DeviceType::kAmdCpuDevice) { - cpu_agents_.push_back(agent); - - // Add cpu regions to the system region list. - for (const core::MemoryRegion* region : agent->regions()) { - if (region->fine_grain()) { - system_regions_fine_.push_back(region); - } else { - system_regions_coarse_.push_back(region); - } - } - - assert(system_regions_fine_.size() > 0); - - // Init default fine grain system region allocator using fine grain - // system region of the first discovered CPU agent. - if (cpu_agents_.size() == 1) { - if (system_regions_fine_[0]->full_profile()) { - system_allocator_ = [](size_t size, size_t alignment) -> void * { - return _aligned_malloc(size, alignment); - }; - - system_deallocator_ = [](void* ptr) { _aligned_free(ptr); }; - } else { - // Might need memory pooling to cover allocation that - // requires less than 4096 bytes. - system_allocator_ = [&](size_t size, size_t alignment) -> void * { - assert(alignment <= 4096); - void* ptr = NULL; - return (HSA_STATUS_SUCCESS == - core::Runtime::runtime_singleton_->AllocateMemory( - system_regions_fine_[0], size, &ptr)) - ? ptr - : NULL; - }; - - system_deallocator_ = [](void* ptr) { - core::Runtime::runtime_singleton_->FreeMemory(ptr); - }; - } - - BaseShared::SetAllocateAndFree(system_allocator_, system_deallocator_); - } - - // Setup system clock frequency for the first time. - if (sys_clock_freq_ == 0) { - // Cache system clock frequency - HsaClockCounters clocks; - hsaKmtGetClockCounters(0, &clocks); - sys_clock_freq_ = clocks.SystemClockFrequencyHz; - host_agent_ = agent; - } - } else if (agent->device_type() == Agent::DeviceType::kAmdGpuDevice) { - gpu_agents_.push_back(agent); - - gpu_ids_.push_back(agent->node_id()); - - // Assign the first discovered gpu agent as blit agent that will provide - // DMA operation for hsa_memory_copy. - if (blit_agent_ == NULL) { - blit_agent_ = agent; - - // Query the start and end address of the SVM address space in this - // platform. - if (reinterpret_cast(blit_agent_)->profile() == - HSA_PROFILE_BASE) { - std::vector::const_iterator it = - std::find_if(blit_agent_->regions().begin(), - blit_agent_->regions().end(), - [](const core::MemoryRegion* region) { - return ( - reinterpret_cast(region)->IsSvm()); - }); - - assert(it != blit_agent_->regions().end()); - - const amd::MemoryRegion* svm_region = - reinterpret_cast(*it); - - start_svm_address_ = - static_cast(svm_region->GetBaseAddress()); - end_svm_address_ = start_svm_address_ + svm_region->GetPhysicalSize(); - - // Bind VM fault handler when we detect the first GPU agent. - // TODO(bwicakso): validate if it works on APU. - BindVmFaultHandler(); - } else { - start_svm_address_ = 0; - end_svm_address_ = os::GetUserModeVirtualMemoryBase() + - os::GetUserModeVirtualMemorySize(); - } - } - } -} - -void Runtime::DestroyAgents() { - std::for_each(gpu_agents_.begin(), gpu_agents_.end(), DeleteObject()); - gpu_agents_.clear(); - - gpu_ids_.clear(); - - std::for_each(cpu_agents_.begin(), cpu_agents_.end(), DeleteObject()); - cpu_agents_.clear(); - - blit_agent_ = NULL; - - system_regions_fine_.clear(); - system_regions_coarse_.clear(); -} - -void Runtime::SetLinkCount(size_t num_link) { - const size_t last_index = GetIndexLinkInfo(0, num_link); - link_matrix_.resize(last_index); - - memset(&link_matrix_[0], 0, - link_matrix_.size() * sizeof(hsa_amd_memory_pool_link_info_t)); -} - -void Runtime::RegisterLinkInfo(uint32_t node_id_from, uint32_t node_id_to, - uint32_t num_hop, - hsa_amd_memory_pool_link_info_t& link_info) { - const uint32_t idx = GetIndexLinkInfo(node_id_from, node_id_to); - link_matrix_[idx].num_hop = num_hop; - link_matrix_[idx].info = link_info; -} - -const Runtime::LinkInfo& Runtime::GetLinkInfo(uint32_t node_id_from, - uint32_t node_id_to) { - return link_matrix_[GetIndexLinkInfo(node_id_from, node_id_to)]; -} - -uint32_t Runtime::GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to) { - const uint32_t node_id_max = std::max(node_id_from, node_id_to) - 1; - const uint32_t node_id_min = std::min(node_id_from, node_id_to); - return ((node_id_max * (node_id_max + 1) / 2) + node_id_min); -} - -hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent, - void* data), - void* data) { - if (!IsOpen()) { - return HSA_STATUS_ERROR_NOT_INITIALIZED; - } - - std::vector* agent_lists[2] = {&cpu_agents_, &gpu_agents_}; - for (std::vector* agent_list : agent_lists) { - for (size_t i = 0; i < agent_list->size(); ++i) { - hsa_agent_t agent = Agent::Convert(agent_list->at(i)); - hsa_status_t status = callback(agent, data); - - if (status != HSA_STATUS_SUCCESS) { - return status; - } - } - } - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t Runtime::AllocateMemory(const MemoryRegion* region, size_t size, - void** ptr) { - return AllocateMemory(false, region, size, ptr); -} - -hsa_status_t Runtime::AllocateMemory(bool restrict_access, - const MemoryRegion* region, size_t size, - void** address) { - const amd::MemoryRegion* amd_region = - reinterpret_cast(region); - hsa_status_t status = amd_region->Allocate(restrict_access, size, address); - - // Track the allocation result so that it could be freed properly. - if (status == HSA_STATUS_SUCCESS) { - ScopedAcquire lock(&memory_lock_); - allocation_map_[*address] = AllocationRegion(region, size); - } - - return status; -} - -hsa_status_t Runtime::FreeMemory(void* ptr) { - if (ptr == NULL) { - return HSA_STATUS_SUCCESS; - } - - const MemoryRegion* region = NULL; - size_t size = 0; - { - ScopedAcquire lock(&memory_lock_); - - std::map::const_iterator it = - allocation_map_.find(ptr); - - if (it == allocation_map_.end()) { - assert(false && "Can't find address in allocation map"); - return HSA_STATUS_ERROR; - } - - region = it->second.region; - size = it->second.size; - - allocation_map_.erase(it); - } - - return region->Free(ptr, size); -} - -hsa_status_t Runtime::CopyMemory(void* dst, const void* src, size_t size) { - assert(dst != NULL && src != NULL && size != 0); - - bool is_src_system = false; - bool is_dst_system = false; - const uintptr_t src_uptr = reinterpret_cast(src); - const uintptr_t dst_uptr = reinterpret_cast(dst); - - if ((reinterpret_cast(blit_agent_)->profile() == - HSA_PROFILE_FULL)) { - is_src_system = (src_uptr < end_svm_address_); - is_dst_system = (dst_uptr < end_svm_address_); - } else { - is_src_system = - ((src_uptr < start_svm_address_) || (src_uptr >= end_svm_address_)); - is_dst_system = - ((dst_uptr < start_svm_address_) || (dst_uptr >= end_svm_address_)); - - if ((is_src_system && !is_dst_system) || - (!is_src_system && is_dst_system)) { - // Use staging buffer or pin if either src or dst is gpuvm and the other - // is system memory allocated via OS or C/C++ allocator. - return CopyMemoryHostAlloc(dst, src, size, is_dst_system); - } - } - - if (is_src_system && is_dst_system) { - memmove(dst, src, size); - return HSA_STATUS_SUCCESS; - } - - return blit_agent_->DmaCopy(dst, src, size); -} - -hsa_status_t Runtime::CopyMemoryHostAlloc(void* dst, const void* src, - size_t size, bool dst_malloc) { - void* usrptr = (dst_malloc) ? dst : const_cast(src); - void* agent_ptr = NULL; - - hsa_agent_t blit_agent = core::Agent::Convert(blit_agent_); - - const amd::MemoryRegion* system_region = - reinterpret_cast(system_regions_fine_[0]); - hsa_status_t stat = - system_region->Lock(1, &blit_agent, usrptr, size, &agent_ptr); - - if (stat != HSA_STATUS_SUCCESS) { - return stat; - } - - stat = blit_agent_->DmaCopy((dst_malloc) ? agent_ptr : dst, - (dst_malloc) ? src : agent_ptr, size); - - system_region->Unlock(usrptr); - - return stat; -} - -hsa_status_t Runtime::CopyMemory(void* dst, core::Agent& dst_agent, - const void* src, core::Agent& src_agent, - size_t size, - std::vector& dep_signals, - core::Signal& completion_signal) { - const bool dst_gpu = - (dst_agent.device_type() == core::Agent::DeviceType::kAmdGpuDevice); - const bool src_gpu = - (src_agent.device_type() == core::Agent::DeviceType::kAmdGpuDevice); - if (dst_gpu || src_gpu) { - core::Agent& copy_agent = (src_gpu) ? src_agent : dst_agent; - return copy_agent.DmaCopy(dst, src, size, dep_signals, completion_signal); - } - - // For cpu to cpu, fire and forget a copy thread. - std::thread([](void* dst, const void* src, size_t size, - std::vector dep_signals, - core::Signal* completion_signal) { - for (core::Signal* dep : dep_signals) { - dep->WaitRelaxed(HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, - HSA_WAIT_STATE_BLOCKED); - } - - memcpy(dst, src, size); - - completion_signal->SubRelease(1); - }, - dst, src, size, dep_signals, &completion_signal).detach(); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t Runtime::FillMemory(void* ptr, uint32_t value, size_t count) { - assert(blit_agent_ != NULL); - return blit_agent_->DmaFill(ptr, value, count); -} - -hsa_status_t Runtime::AllowAccess(uint32_t num_agents, - const hsa_agent_t* agents, const void* ptr) { - const amd::MemoryRegion* amd_region = NULL; - size_t alloc_size = 0; - - { - ScopedAcquire lock(&memory_lock_); - - std::map::const_iterator it = - allocation_map_.find(ptr); - - if (it == allocation_map_.end()) { - return HSA_STATUS_ERROR; - } - - amd_region = reinterpret_cast(it->second.region); - alloc_size = it->second.size; - } - - return amd_region->AllowAccess(num_agents, agents, ptr, alloc_size); -} - -hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) { - switch (attribute) { - case HSA_SYSTEM_INFO_VERSION_MAJOR: - *((uint16_t*)value) = HSA_VERSION_MAJOR; - break; - case HSA_SYSTEM_INFO_VERSION_MINOR: - *((uint16_t*)value) = HSA_VERSION_MINOR; - break; - case HSA_SYSTEM_INFO_TIMESTAMP: { - HsaClockCounters clocks; - hsaKmtGetClockCounters(0, &clocks); - *((uint64_t*)value) = clocks.SystemClockCounter; - break; - } - case HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY: { - assert(sys_clock_freq_ != 0 && - "Use of HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY before HSA " - "initialization completes."); - *(uint64_t*)value = sys_clock_freq_; - break; - } - case HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT: - *((uint64_t*)value) = 0xFFFFFFFFFFFFFFFF; - break; - case HSA_SYSTEM_INFO_ENDIANNESS: -#if defined(HSA_LITTLE_ENDIAN) - *((hsa_endianness_t*)value) = HSA_ENDIANNESS_LITTLE; -#else - *((hsa_endianness_t*)value) = HSA_ENDIANNESS_BIG; -#endif - break; - case HSA_SYSTEM_INFO_MACHINE_MODEL: -#if defined(HSA_LARGE_MODEL) - *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_LARGE; -#else - *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_SMALL; -#endif - break; - case HSA_SYSTEM_INFO_EXTENSIONS: - memset(value, 0, sizeof(uint8_t) * 128); - - if (extensions_.table.hsa_ext_program_finalize_fn != NULL) { - *((uint8_t*)value) = 1 << HSA_EXTENSION_FINALIZER; - } - - if (extensions_.table.hsa_ext_image_create_fn != NULL) { - *((uint8_t*)value) |= 1 << HSA_EXTENSION_IMAGES; - } - - *((uint8_t*)value) |= 1 << HSA_EXTENSION_AMD_PROFILER; - - break; - default: - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - return HSA_STATUS_SUCCESS; -} - -uint32_t Runtime::GetQueueId() { return atomic::Increment(&queue_count_); } - -hsa_status_t Runtime::SetAsyncSignalHandler(hsa_signal_t signal, - hsa_signal_condition_t cond, - hsa_signal_value_t value, - hsa_amd_signal_handler handler, - void* arg) { - // Asyncronous signal handler is only supported when KFD events are on. - if (!core::g_use_interrupt_wait) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - - // Indicate that this signal is in use. - if (signal.handle != 0) hsa_signal_handle(signal)->Retain(); - - ScopedAcquire scope_lock(&async_events_control_.lock); - - // Lazy initializer - if (async_events_control_.async_events_thread_ == NULL) { - // Create monitoring thread control signal - auto err = HSA::hsa_signal_create(0, 0, NULL, &async_events_control_.wake); - if (err != HSA_STATUS_SUCCESS) { - assert(false && "Asyncronous events control signal creation error."); - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - async_events_.PushBack(async_events_control_.wake, HSA_SIGNAL_CONDITION_NE, - 0, NULL, NULL); - - // Start event monitoring thread - async_events_control_.exit = false; - async_events_control_.async_events_thread_ = - os::CreateThread(AsyncEventsLoop, NULL); - if (async_events_control_.async_events_thread_ == NULL) { - assert(false && "Asyncronous events thread creation error."); - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - } - - new_async_events_.PushBack(signal, cond, value, handler, arg); - - hsa_signal_handle(async_events_control_.wake)->StoreRelease(1); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t Runtime::InteropMap(uint32_t num_agents, Agent** agents, - int interop_handle, uint32_t flags, - size_t* size, void** ptr, - size_t* metadata_size, const void** metadata) { - HsaGraphicsResourceInfo info; - - HSAuint32 short_nodes[64]; - HSAuint32* nodes = short_nodes; - if (num_agents > 64) { - nodes = new HSAuint32[num_agents]; - if (nodes == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - MAKE_SCOPE_GUARD([&]() { - if (num_agents > 64) delete[] nodes; - }); - - for (int i = 0; i < num_agents; i++) - agents[i]->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, - &nodes[i]); - - if (hsaKmtRegisterGraphicsHandleToNodes(interop_handle, &info, num_agents, - nodes) != HSAKMT_STATUS_SUCCESS) - return HSA_STATUS_ERROR; - - HSAuint64 altAddress; - HsaMemMapFlags map_flags; - map_flags.Value = 0; - map_flags.ui32.PageSize = HSA_PAGE_SIZE_64KB; - if (hsaKmtMapMemoryToGPUNodes(info.MemoryAddress, info.SizeInBytes, - &altAddress, map_flags, num_agents, - nodes) != HSAKMT_STATUS_SUCCESS) { - map_flags.ui32.PageSize = HSA_PAGE_SIZE_4KB; - if (hsaKmtMapMemoryToGPUNodes(info.MemoryAddress, info.SizeInBytes, - &altAddress, map_flags, num_agents, - nodes) != HSAKMT_STATUS_SUCCESS) - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - if (metadata_size != NULL) *metadata_size = info.MetadataSizeInBytes; - if (metadata != NULL) *metadata = info.Metadata; - - *size = info.SizeInBytes; - *ptr = info.MemoryAddress; - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t Runtime::InteropUnmap(void* ptr) -{ - if(hsaKmtUnmapMemoryToGPU(ptr)!=HSAKMT_STATUS_SUCCESS) - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - if(hsaKmtDeregisterMemory(ptr)!=HSAKMT_STATUS_SUCCESS) - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - return HSA_STATUS_SUCCESS; -} - -void Runtime::AsyncEventsLoop(void*) { - auto& async_events_control_ = runtime_singleton_->async_events_control_; - auto& async_events_ = runtime_singleton_->async_events_; - auto& new_async_events_ = runtime_singleton_->new_async_events_; - - while (!async_events_control_.exit) { - // Wait for a signal - hsa_signal_value_t value; - uint32_t index = hsa_amd_signal_wait_any( - uint32_t(async_events_.Size()), &async_events_.signal_[0], - &async_events_.cond_[0], &async_events_.value_[0], uint64_t(-1), - HSA_WAIT_STATE_BLOCKED, &value); - - // Reset the control signal - if (index == 0) { - hsa_signal_handle(async_events_control_.wake)->StoreRelaxed(0); - } else if (index != -1) { - // No error or timout occured, process the handler - assert(async_events_.handler_[index] != NULL); - bool keep = - async_events_.handler_[index](value, async_events_.arg_[index]); - if (!keep) { - hsa_signal_handle(async_events_.signal_[index])->Release(); - async_events_.CopyIndex(index, async_events_.Size() - 1); - async_events_.PopBack(); - } - } - - // Check for dead signals - index = 0; - while (index != async_events_.Size()) { - if (!hsa_signal_handle(async_events_.signal_[index])->IsValid()) { - hsa_signal_handle(async_events_.signal_[index])->Release(); - async_events_.CopyIndex(index, async_events_.Size() - 1); - async_events_.PopBack(); - continue; - } - index++; - } - - // Insert new signals and find plain functions - typedef std::pair func_arg_t; - std::vector functions; - { - ScopedAcquire scope_lock(&async_events_control_.lock); - for (size_t i = 0; i < new_async_events_.Size(); i++) { - if (new_async_events_.signal_[i].handle == 0) { - functions.push_back( - func_arg_t((void (*)(void*))new_async_events_.handler_[i], - new_async_events_.arg_[i])); - continue; - } - async_events_.PushBack( - new_async_events_.signal_[i], new_async_events_.cond_[i], - new_async_events_.value_[i], new_async_events_.handler_[i], - new_async_events_.arg_[i]); - } - new_async_events_.Clear(); - } - - // Call plain functions - for (size_t i = 0; i < functions.size(); i++) - functions[i].first(functions[i].second); - functions.clear(); - } - - // Release wait count of all pending signals - for (size_t i = 1; i < async_events_.Size(); i++) - hsa_signal_handle(async_events_.signal_[i])->Release(); - async_events_.Clear(); - - for (size_t i = 0; i < new_async_events_.Size(); i++) - hsa_signal_handle(new_async_events_.signal_[i])->Release(); - new_async_events_.Clear(); -} - -void Runtime::BindVmFaultHandler() { - if (core::g_use_interrupt_wait) { - // Create memory event with manual reset to avoid racing condition - // with driver in case of multiple concurrent VM faults. - vm_fault_event_ = - core::InterruptSignal::CreateEvent(HSA_EVENTTYPE_MEMORY, true); - - // Create an interrupt signal object to contain the memory event. - // This signal object will be registered with the async handler global - // thread. - vm_fault_signal_ = new core::InterruptSignal(0, vm_fault_event_); - - if (!vm_fault_signal_->IsValid() || vm_fault_signal_->EopEvent() == NULL) { - assert(false && "Failed on creating VM fault signal"); - return; - } - - SetAsyncSignalHandler(core::Signal::Convert(vm_fault_signal_), - HSA_SIGNAL_CONDITION_NE, 0, VMFaultHandler, - reinterpret_cast(vm_fault_signal_)); - } -} - -bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) { - core::InterruptSignal* vm_fault_signal = - reinterpret_cast(arg); - - assert(vm_fault_signal != NULL); - - if (vm_fault_signal == NULL) { - return false; - } - - std::string print_vm_message = os::GetEnvVar("HSA_ENABLE_VM_FAULT_MESSAGE"); - if (print_vm_message == "1") { - HsaEvent* vm_fault_event = vm_fault_signal->EopEvent(); - - const HsaMemoryAccessFault& fault = - vm_fault_event->EventData.EventData.MemoryAccessFault; - - std::string reason = ""; - if (fault.Failure.NotPresent == 1) { - reason += "Page not present or supervisor privilege"; - } else if (fault.Failure.ReadOnly == 1) { - reason += "Write access to a read-only page"; - } else if (fault.Failure.NoExecute == 1) { - reason += "Execute access to a page marked NX"; - } else if (fault.Failure.GpuAccess == 1) { - reason += "Host access only"; - } else if (fault.Failure.ECC == 1) { - reason += "ECC failure (if supported by HW)"; - } - - fprintf(stderr, - "Memory access fault by GPU node-%u on address %p%s. Reason: %s.\n", - fault.NodeId, reinterpret_cast(fault.VirtualAddress), - (fault.Failure.Imprecise == 1) ? "(may not be exact address)" : "", - reason.c_str()); - } else { - assert(false && "GPU memory access fault."); - } - - std::abort(); - - // No need to keep the signal because we are done. - return false; -} - -Runtime::Runtime() - : host_agent_(NULL), - blit_agent_(NULL), - queue_count_(0), - sys_clock_freq_(0), - vm_fault_event_(NULL), - vm_fault_signal_(NULL), - ref_count_(0) { - start_svm_address_ = 0; -#if defined(HSA_LARGE_MODEL) - end_svm_address_ = UINT64_MAX; -#else - end_svm_address_ = UINT32_MAX; -#endif -} - -void Runtime::Load() { - // Load interrupt enable option - std::string interrupt = os::GetEnvVar("HSA_ENABLE_INTERRUPT"); - g_use_interrupt_wait = (interrupt != "0"); - - if (!amd::Load()) { - return; - } - - loader_ = amd::hsa::loader::Loader::Create(&loader_context_); - - // Load extensions - LoadExtensions(); - - // Load tools libraries - LoadTools(); -} - -void Runtime::Unload() { - UnloadTools(); - UnloadExtensions(); - - amd::hsa::loader::Loader::Destroy(loader_); - loader_ = nullptr; - - async_events_control_.Shutdown(); - - delete vm_fault_signal_; - core::InterruptSignal::DestroyEvent(vm_fault_event_); - - DestroyAgents(); - - CloseTools(); - - amd::Unload(); -} - -void Runtime::LoadExtensions() { -// Load finalizer and extension library -#ifdef HSA_LARGE_MODEL - static const std::string kFinalizerLib[] = {"hsa-ext-finalize64.dll", - "libhsa-ext-finalize64.so.1"}; - static const std::string kImageLib[] = {"hsa-ext-image64.dll", - "libhsa-ext-image64.so.1"}; -#else - static const std::string kFinalizerLib[] = {"hsa-ext-finalize.dll", - "libhsa-ext-finalize.so.1"}; - static const std::string kImageLib[] = {"hsa-ext-image.dll", - "libhsa-ext-image.so.1"}; -#endif - extensions_.Load(kFinalizerLib[os_index(os::current_os)]); - extensions_.Load(kImageLib[os_index(os::current_os)]); -} - -void Runtime::UnloadExtensions() { extensions_.Unload(); } - -static std::vector parse_tool_names(std::string tool_names) { - std::vector names; - std::string name = ""; - bool quoted = false; - while (tool_names.size() != 0) { - auto index = tool_names.find_first_of(" \"\\"); - if (index == std::string::npos) { - name += tool_names; - break; - } - switch (tool_names[index]) { - case ' ': { - if (!quoted) { - name += tool_names.substr(0, index); - tool_names.erase(0, index + 1); - names.push_back(name); - name = ""; - } else { - name += tool_names.substr(0, index + 1); - tool_names.erase(0, index + 1); - } - break; - } - case '\"': { - if (quoted) { - quoted = false; - name += tool_names.substr(0, index); - tool_names.erase(0, index + 1); - names.push_back(name); - name = ""; - } else { - quoted = true; - tool_names.erase(0, index + 1); - } - break; - } - case '\\': { - if (tool_names.size() > index + 1) { - name += tool_names.substr(0, index) + tool_names[index + 1]; - tool_names.erase(0, index + 2); - } - break; - } - } // end switch - } // end while - - if (name != "") names.push_back(name); - return names; -} - -void Runtime::LoadTools() { - typedef bool (*tool_init_t)(::ApiTable*, uint64_t, uint64_t, - const char* const*); - typedef Agent* (*tool_wrap_t)(Agent*); - typedef void (*tool_add_t)(Runtime*); - - // Link extensions to API interception - hsa_api_table_.LinkExts(&extensions_.table); - - // Load tool libs - std::string tool_names = os::GetEnvVar("HSA_TOOLS_LIB"); - if (tool_names != "") { - std::vector names = parse_tool_names(tool_names); - std::vector failed; - for (int i = 0; i < names.size(); i++) { - os::LibHandle tool = os::LoadLib(names[i]); - - if (tool != NULL) { - tool_libs_.push_back(tool); - - tool_init_t ld; - ld = (tool_init_t)os::GetExportAddress(tool, "OnLoad"); - if (ld) { - if (!ld(&hsa_api_table_.table, 0, failed.size(), &failed[0])) { - failed.push_back(names[i].c_str()); - os::CloseLib(tool); - continue; - } - } - - tool_wrap_t wrap; - wrap = (tool_wrap_t)os::GetExportAddress(tool, "WrapAgent"); - if (wrap) { - std::vector* agent_lists[2] = {&cpu_agents_, - &gpu_agents_}; - for (std::vector* agent_list : agent_lists) { - for (size_t agent_idx = 0; agent_idx < agent_list->size(); - ++agent_idx) { - Agent* agent = wrap(agent_list->at(agent_idx)); - if (agent != NULL) { - assert(agent->IsValid() && - "Agent returned from WrapAgent is not valid"); - agent_list->at(agent_idx) = agent; - } - } - } - } - - tool_add_t add; - add = (tool_add_t)os::GetExportAddress(tool, "AddAgent"); - if (add) add(this); - } - } - } -} - -void Runtime::UnloadTools() { - typedef void (*tool_unload_t)(); - for (size_t i = tool_libs_.size(); i != 0; i--) { - tool_unload_t unld; - unld = (tool_unload_t)os::GetExportAddress(tool_libs_[i - 1], "OnUnload"); - if (unld) unld(); - } - - // Reset API table in case some tool doesn't cleanup properly - hsa_api_table_.Reset(); -} - -void Runtime::CloseTools() { - // Due to valgrind bug, runtime cannot dlclose extensions see: - // http://valgrind.org/docs/manual/faq.html#faq.unhelpful - if (os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND") != "1") { - for (int i = 0; i < tool_libs_.size(); i++) os::CloseLib(tool_libs_[i]); - } - tool_libs_.clear(); -} - -void Runtime::AsyncEventsControl::Shutdown() { - if (async_events_thread_ != NULL) { - exit = true; - hsa_signal_handle(wake)->StoreRelaxed(1); - os::WaitForThread(async_events_thread_); - os::CloseThread(async_events_thread_); - async_events_thread_ = NULL; - HSA::hsa_signal_destroy(wake); - } -} - -void Runtime::AsyncEvents::PushBack(hsa_signal_t signal, - hsa_signal_condition_t cond, - hsa_signal_value_t value, - hsa_amd_signal_handler handler, void* arg) { - signal_.push_back(signal); - cond_.push_back(cond); - value_.push_back(value); - handler_.push_back(handler); - arg_.push_back(arg); -} - -void Runtime::AsyncEvents::CopyIndex(size_t dst, size_t src) { - signal_[dst] = signal_[src]; - cond_[dst] = cond_[src]; - value_[dst] = value_[src]; - handler_[dst] = handler_[src]; - arg_[dst] = arg_[src]; -} - -size_t Runtime::AsyncEvents::Size() { return signal_.size(); } - -void Runtime::AsyncEvents::PopBack() { - signal_.pop_back(); - cond_.pop_back(); - value_.pop_back(); - handler_.pop_back(); - arg_.pop_back(); -} - -void Runtime::AsyncEvents::Clear() { - signal_.clear(); - cond_.clear(); - value_.clear(); - handler_.clear(); - arg_.clear(); -} - -} // namespace core diff --git a/runtime/hsa-runtime/core/runtime/signal.cpp b/runtime/hsa-runtime/core/runtime/signal.cpp deleted file mode 100644 index 0b417b60cf..0000000000 --- a/runtime/hsa-runtime/core/runtime/signal.cpp +++ /dev/null @@ -1,187 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTME_CORE_SIGNAL_CPP_ -#define HSA_RUNTME_CORE_SIGNAL_CPP_ - -#include "core/inc/signal.h" -#include "core/util/timer.h" -#include - -namespace core { - -uint32_t Signal::WaitAny(uint32_t signal_count, hsa_signal_t* hsa_signals, - hsa_signal_condition_t* conds, - hsa_signal_value_t* values, uint64_t timeout, - hsa_wait_state_t wait_hint, - hsa_signal_value_t* satisfying_value) { - hsa_signal_handle* signals = - reinterpret_cast(hsa_signals); - uint32_t prior = 0; - for (uint32_t i = 0; i < signal_count; i++) - prior = Max(prior, atomic::Increment(&signals[i]->waiting_)); - - MAKE_SCOPE_GUARD([&]() { - for (uint32_t i = 0; i < signal_count; i++) - atomic::Decrement(&signals[i]->waiting_); - }); - - // Allow only the first waiter to sleep (temporary, known to be bad). - if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE; - - // Ensure that all signals in the list can be slept on. - if (wait_hint != HSA_WAIT_STATE_ACTIVE) { - for (uint32_t i = 0; i < signal_count; i++) { - if (signals[i]->EopEvent() == NULL) { - wait_hint = HSA_WAIT_STATE_ACTIVE; - break; - } - } - } - - const uint32_t small_size = 10; - HsaEvent* short_evts[small_size]; - HsaEvent** evts = NULL; - uint32_t unique_evts = 0; - if (wait_hint != HSA_WAIT_STATE_ACTIVE) { - if (signal_count > small_size) - evts = new HsaEvent* [signal_count]; - else - evts = short_evts; - for (uint32_t i = 0; i < signal_count; i++) - evts[i] = signals[i]->EopEvent(); - std::sort(evts, evts + signal_count); - HsaEvent** end = std::unique(evts, evts + signal_count); - unique_evts = uint32_t(end - evts); - } - MAKE_SCOPE_GUARD([&]() { - if (signal_count > small_size) delete[] evts; - }); - - int64_t value; - - timer::fast_clock::time_point start_time = timer::fast_clock::now(); - - // Set a polling timeout value - // Exact time is not hugely important, it should just be a short while which - // is smaller than the thread scheduling quantum (usually around 16ms) - const timer::fast_clock::duration kMaxElapsed = std::chrono::milliseconds(5); - - // Convert timeout value into the fast_clock domain - uint64_t hsa_freq; - HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq); - const timer::fast_clock::duration fast_timeout = - timer::duration_from_seconds( - double(timeout) / double(hsa_freq)); - - bool condition_met = false; - while (true) { - for (uint32_t i = 0; i < signal_count; i++) { - if (signals[i]->invalid_) return uint32_t(-1); - - // Handling special event. - if (signals[i]->EopEvent() != NULL) { - const HSA_EVENTTYPE event_type = - signals[i]->EopEvent()->EventData.EventType; - if (event_type == HSA_EVENTTYPE_MEMORY) { - const HsaMemoryAccessFault& fault = - signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault; - const uint32_t* failure = - reinterpret_cast(&fault.Failure); - if (*failure != 0) { - return i; - } - } - } - - value = - atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed); - - switch (conds[i]) { - case HSA_SIGNAL_CONDITION_EQ: { - condition_met = (value == values[i]); - break; - } - case HSA_SIGNAL_CONDITION_NE: { - condition_met = (value != values[i]); - break; - } - case HSA_SIGNAL_CONDITION_GTE: { - condition_met = (value >= values[i]); - break; - } - case HSA_SIGNAL_CONDITION_LT: { - condition_met = (value < values[i]); - break; - } - default: - return uint32_t(-1); - } - if (condition_met) { - if (satisfying_value != NULL) *satisfying_value = value; - return i; - } - } - - timer::fast_clock::time_point time = timer::fast_clock::now(); - if (time - start_time > kMaxElapsed) { - if (time - start_time > fast_timeout) { - return uint32_t(-1); - } - if (wait_hint != HSA_WAIT_STATE_ACTIVE) { - uint32_t wait_ms; - auto time_remaining = fast_timeout - (time - start_time); - if ((timeout == -1) || - (time_remaining > std::chrono::milliseconds(uint32_t(-1)))) - wait_ms = uint32_t(-1); - else - wait_ms = timer::duration_cast( - time_remaining).count(); - hsaKmtWaitOnMultipleEvents(evts, unique_evts, false, wait_ms); - } - } - } -} - -} // namespace core - -#endif // header guard diff --git a/runtime/hsa-runtime/core/util/atomic_helpers.h b/runtime/hsa-runtime/core/util/atomic_helpers.h deleted file mode 100644 index 1675c19da8..0000000000 --- a/runtime/hsa-runtime/core/util/atomic_helpers.h +++ /dev/null @@ -1,405 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// Helpers to use non-atomic types with C++11 atomic operations. - -#ifndef HSA_RUNTIME_CORE_UTIL_ATOMIC_HELPERS_H_ -#define HSA_RUNTIME_CORE_UTIL_ATOMIC_HELPERS_H_ - -#include -#include "utils.h" - -/// @brief: Special assert used here to check each atomic variable for lock free -/// implementation. -/// ANY locked atomics are very likely incompatable with out-of-library -/// concurrent access (HW access for instance) -#define lockless_check(exp) assert(exp) - -namespace atomic { -/// @brief: Checks if type T is compatible with its atomic representation. -/// @param: ptr(Input), a pointer to type T for check. -/// @return: void. -template -static __forceinline void BasicCheck(const T* ptr) { - static_assert(sizeof(T) == sizeof(std::atomic), - "Type is size incompatible with its atomic representation!"); - lockless_check( - reinterpret_cast*>(ptr)->is_lock_free() && - "Atomic operation is not lock free! Use may conflict with peripheral HW " - "atomics!"); -}; - -/// @brief: function overloading, for more info, see previous one. -/// @param: ptr(Input), a pointer to a volatile type. -/// @return: void. -template -static __forceinline void BasicCheck(const volatile T* ptr) { - static_assert(sizeof(T) == sizeof(std::atomic), - "Type is size incompatible with its atomic representation!"); - lockless_check( - reinterpret_cast*>(ptr)->is_lock_free() && - "Atomic operation is not lock free! Use may conflict with peripheral HW " - "atomics!"); -}; - -/// @brief: Load value of type T atomically with specified memory order. -/// @param: ptr(Input), a pointer to type T. -/// @param: order(Input), memory order with atomic load, relaxed by default. -/// @return: T, loaded value. -template -static __forceinline T - Load(const T* ptr, std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - const std::atomic* aptr = reinterpret_cast*>(ptr); - return aptr->load(order); -} - -/// @brief: function overloading, for more info, see previous one. -/// @param: ptr(Input), a pointer to volatile type T. -/// @param: order(Input), memory order with atomic load, relaxed by default. -/// @return: T, loaded value. -template -static __forceinline T - Load(const volatile T* ptr, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile const std::atomic* aptr = - reinterpret_cast*>(ptr); - return aptr->load(order); -} - -/// @brief: Store value of type T with specified memory order. -/// @param: ptr(Input), a pointer to instance which will be stored. -/// @param: val(Input), value to be stored. -/// @param: order(Input), memory order with atomic store, relaxed by default. -/// @return: void. -template -static __forceinline void Store( - T* ptr, T val, std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - std::atomic* aptr = reinterpret_cast*>(ptr); - aptr->store(val, order); -} - -/// @brief: Function overloading, for more info, see previous one. -/// @param: ptr(Input), a pointer to volatile instance which will be stored. -/// @param: val(Input), value to be stored. -/// @param: order(Input), memory order with atomic store, relaxed by default. -/// @return: void. -template -static __forceinline void Store( - volatile T* ptr, T val, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile std::atomic* aptr = - reinterpret_cast*>(ptr); - aptr->store(val, order); -} - -/// @brief: Compare and swap value atomically with specified memory order. -/// @param: ptr(Input), a pointer to variable which is operated on. -/// @param: val(Input), value to be stored if condition is satisfied. -/// @param: expected(Input), value which is expected. -/// @param: order(Input), memory order with atomic operation. -/// @return: T, observed value of type T. -template -static __forceinline T - Cas(T* ptr, T val, T expected, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - std::atomic* aptr = reinterpret_cast*>(ptr); - aptr->compare_exchange_strong(expected, val, order); - return expected; -} - -/// @brief: Function overloading, for more info, see previous one. -/// @param: ptr(Input), a pointer to volatile variable which is operated on. -/// @param: val(Input), value to be stored if condition is satisfied. -/// @param: expected(Input), value which is expected. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, observed value of type T. -template -static __forceinline T - Cas(volatile T* ptr, T val, T expected, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile std::atomic* aptr = - reinterpret_cast*>(ptr); - aptr->compare_exchange_strong(expected, val, order); - return expected; -} - -/// @brief: Exchange the value atomically with specified memory order. -/// @param: ptr(Input), a pointer to variable which is operated on. -/// @param: val(Input), value to be stored. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, the value prior to the exchange. -template -static __forceinline T - Exchange(T* ptr, T val, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - std::atomic* aptr = reinterpret_cast*>(ptr); - return aptr->exchange(val, order); -} - -/// @brief: Function overloading, for more info, see previous one. -/// @param: ptr(Input), a pointer to variable which is operated on. -/// @param: val(Input), value to be stored. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, the value prior to the exchange. -template -static __forceinline T - Exchange(volatile T* ptr, T val, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile std::atomic* aptr = - reinterpret_cast*>(ptr); - return aptr->exchange(val, order); -} - -/// @brief: Add value to variable atomically with specified memory order. -/// @param: ptr(Input), a pointer to variable which is operated on. -/// @param: val(Input), value to be added. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, the value of the variable prior to the addition. -template -static __forceinline T - Add(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - std::atomic* aptr = reinterpret_cast*>(ptr); - return aptr->fetch_add(val, order); -} - -/// @brief: Subtract value from the variable atomically with specified memory -/// order. -/// @param: ptr(Input), a pointer to variable which is operated on. -/// @param: val(Input), value to be subtraced. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, value of the variable prior to the subtraction. -template -static __forceinline T - Sub(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - std::atomic* aptr = reinterpret_cast*>(ptr); - return aptr->fetch_sub(val, order); -} - -/// @brief: Bit And operation on variable atomically with specified memory -/// order. -/// @param: ptr(Input), a pointer to variable which is operated on. -/// @param: val(Input), value which is ANDed with variable. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, value of variable prior to the operation. -template -static __forceinline T - And(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - std::atomic* aptr = reinterpret_cast*>(ptr); - return aptr->fetch_and(val, order); -} - -/// @brief: Bit Or operation on variable atomically with specified memory order. -/// @param: ptr(Input), a pointer to variable which is operated on. -/// @param: val(Input), value which is ORed with variable. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, value of variable prior to the operation. -template -static __forceinline T - Or(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - std::atomic* aptr = reinterpret_cast*>(ptr); - return aptr->fetch_or(val, order); -} - -/// @brief: Bit Xor operation on variable atomically with specified memory -/// order. -/// @param: ptr(Input), a pointer to variable which is operated on. -/// @param: val(Input), value which is XORed with variable. -/// @order: order(Input), memory order which is relaxed by default. -/// @return: T, valud of variable prior to the opertaion. -template -static __forceinline T - Xor(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - std::atomic* aptr = reinterpret_cast*>(ptr); - return aptr->fetch_xor(val, order); -} - -/// @brief: Increase the value of variable atomically with specified memory -/// order. -/// @param: ptr(Input), a pointer to variable which is operated on. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, value of variable prior to the operation. -template -static __forceinline T - Increment(T* ptr, std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - std::atomic* aptr = reinterpret_cast*>(ptr); - return aptr->fetch_add(1, order); -} - -/// @brief: Decrease the value of the variable atomically with specified memory -/// order. -/// @param: ptr(Input), a pointer to variable which is operated on. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, value of variable prior to the operation. -template -static __forceinline T - Decrement(T* ptr, std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - std::atomic* aptr = reinterpret_cast*>(ptr); - return aptr->fetch_sub(1, order); -} - -/// @brief: Add value to variable atomically with specified memory order. -/// @param: ptr(Input), a pointer to volatile variable which is operated on. -/// @param: val(Input), value to be added. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, the value of the variable prior to the addition. -template -static __forceinline T - Add(volatile T* ptr, T val, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile std::atomic* aptr = - reinterpret_cast*>(ptr); - return aptr->fetch_add(val, order); -} - -/// @brief: Subtract value from the variable atomically with specified memory -/// order. -/// @param: ptr(Input), a pointer to volatile variable which is operated on. -/// @param: val(Input), value to be subtraced. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, value of the variable prior to the subtraction. -template -static __forceinline T - Sub(volatile T* ptr, T val, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile std::atomic* aptr = - reinterpret_cast*>(ptr); - return aptr->fetch_sub(val, order); -} - -/// @brief: Bit And operation on variable atomically with specified memory -/// order. -/// @param: ptr(Input), a pointer to volatile variable which is operated on. -/// @param: val(Input), value which is ANDed with variable. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, value of variable prior to the operation. -template -static __forceinline T - And(volatile T* ptr, T val, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile std::atomic* aptr = - reinterpret_cast*>(ptr); - return aptr->fetch_and(val, order); -} - -/// @brief: Bit Or operation on variable atomically with specified memory order. -/// @param: ptr(Input), a pointer to volatile variable which is operated on. -/// @param: val(Input), value which is ORed with variable. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, value of variable prior to the operation. -template -static __forceinline T Or(volatile T* ptr, T val, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile std::atomic* aptr = - reinterpret_cast*>(ptr); - return aptr->fetch_or(val, order); -} - -/// @brief: Bit Xor operation on variable atomically with specified memory -/// order. -/// @param: ptr(Input), a pointer to volatile variable which is operated on. -/// @param: val(Input), value which is XORed with variable. -/// @order: order(Input), memory order which is relaxed by default. -/// @return: T, valud of variable prior to the opertaion. -template -static __forceinline T - Xor(volatile T* ptr, T val, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile std::atomic* aptr = - reinterpret_cast*>(ptr); - return aptr->fetch_xor(val, order); -} - -/// @brief: Increase the value of variable atomically with specified memory -/// order. -/// @param: ptr(Input), a pointer to volatile variable which is operated on. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, value of variable prior to the operation. -template -static __forceinline T - Increment(volatile T* ptr, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile std::atomic* aptr = - reinterpret_cast*>(ptr); - return aptr->fetch_add(1, order); -} - -/// @brief: Decrease the value of the variable atomically with specified memory -/// order. -/// @param: ptr(Input), a pointer to volatile variable which is operated on. -/// @param: order(Input), memory order which is relaxed by default. -/// @return: T, value of variable prior to the operation. -template -static __forceinline T - Decrement(volatile T* ptr, - std::memory_order order = std::memory_order_relaxed) { - BasicCheck(ptr); - volatile std::atomic* aptr = - reinterpret_cast*>(ptr); - return aptr->fetch_sub(1, order); -} -} - -// Remove special assert to avoid name polution -#undef lockless_check - -#endif // HSA_RUNTIME_CORE_UTIL_ATOMIC_HELPERS_H_ diff --git a/runtime/hsa-runtime/core/util/lnx/os_linux.cpp b/runtime/hsa-runtime/core/util/lnx/os_linux.cpp deleted file mode 100644 index c83c765144..0000000000 --- a/runtime/hsa-runtime/core/util/lnx/os_linux.cpp +++ /dev/null @@ -1,344 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifdef __linux__ -#include "core/util/os.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace os { - -static_assert(sizeof(LibHandle) == sizeof(void*), - "OS abstraction size mismatch"); -static_assert(sizeof(Mutex) == sizeof(pthread_mutex_t*), - "OS abstraction size mismatch"); -static_assert(sizeof(Thread) == sizeof(pthread_t), - "OS abstraction size mismatch"); - -LibHandle LoadLib(std::string filename) { - void* ret = dlopen(filename.c_str(), RTLD_LAZY); - return *(LibHandle*)&ret; -} - -void* GetExportAddress(LibHandle lib, std::string export_name) { - void* ret = dlsym(*(void**)&lib, export_name.c_str()); - - // dlsym searches the given library and all the library's load dependencies. - // Remaining code limits symbol lookup to only the library handle given. - // This lookup pattern matches Windows. - if (ret == NULL) return ret; - - link_map* map; - int err = dlinfo(*(void**)&lib, RTLD_DI_LINKMAP, &map); - assert(err != -1 && "dlinfo failed."); - - Dl_info info; - err = dladdr(ret, &info); - assert(err != 0 && "dladdr failed."); - - if (strcmp(info.dli_fname, map->l_name) == 0) return ret; - - return NULL; -} - -void CloseLib(LibHandle lib) { dlclose(*(void**)&lib); } - -Mutex CreateMutex() { - pthread_mutex_t* mutex = new pthread_mutex_t; - pthread_mutex_init(mutex, NULL); - return *(Mutex*)&mutex; -} - -bool TryAcquireMutex(Mutex lock) { - return pthread_mutex_trylock(*(pthread_mutex_t**)&lock) == 0; -} - -bool AcquireMutex(Mutex lock) { - return pthread_mutex_lock(*(pthread_mutex_t**)&lock) == 0; -} - -void ReleaseMutex(Mutex lock) { - pthread_mutex_unlock(*(pthread_mutex_t**)&lock); -} - -void DestroyMutex(Mutex lock) { - pthread_mutex_destroy(*(pthread_mutex_t**)&lock); - delete *(pthread_mutex_t**)&lock; -} - -void Sleep(int delay_in_millisec) { usleep(delay_in_millisec * 1000); } - -void YieldThread() { sched_yield(); } - -struct ThreadArgs { - void* entry_args; - ThreadEntry entry_function; -}; - -void* __stdcall ThreadTrampoline(void* arg) { - ThreadArgs* ar = (ThreadArgs*)arg; - ThreadEntry CallMe = ar->entry_function; - void* Data = ar->entry_args; - delete ar; - CallMe(Data); - return NULL; -} - -Thread CreateThread(ThreadEntry function, void* threadArgument, - uint stackSize) { - ThreadArgs* args = new ThreadArgs; - args->entry_args = threadArgument; - args->entry_function = function; - pthread_t thread; - pthread_attr_t attrib; - pthread_attr_init(&attrib); - if (stackSize != 0) pthread_attr_setstacksize(&attrib, stackSize); - bool success = - (pthread_create(&thread, &attrib, ThreadTrampoline, args) == 0); - pthread_attr_destroy(&attrib); - if (!success) { - pthread_join(thread, NULL); - return NULL; - } - return *(Thread*)&thread; -} - -void CloseThread(Thread thread) { pthread_detach(*(pthread_t*)&thread); } - -bool WaitForThread(Thread thread) { - return pthread_join(*(pthread_t*)&thread, NULL); -} - -bool WaitForAllThreads(Thread* threads, uint threadCount) { - for (uint i = 0; i < threadCount; i++) WaitForThread(threads[i]); - return true; -} - -void SetEnvVar(std::string env_var_name, std::string env_var_value) { - setenv(env_var_name.c_str(), env_var_value.c_str(), 1); -} - -std::string GetEnvVar(std::string env_var_name) { - char* buff; - buff = getenv(env_var_name.c_str()); - std::string ret; - if (buff) { - ret = buff; - } - return ret; -} - -size_t GetUserModeVirtualMemorySize() { -#ifdef _LP64 - // https://www.kernel.org/doc/Documentation/x86/x86_64/mm.txt : - // user space is 0000000000000000 - 00007fffffffffff (=47 bits) - return (size_t)(0x800000000000); -#else - return (size_t)(0xffffffff); // ~4GB -#endif -} - -size_t GetUsablePhysicalHostMemorySize() { - struct sysinfo info = {0}; - if (sysinfo(&info) != 0) { - return 0; - } - - const size_t physical_size = - static_cast(info.totalram * info.mem_unit); - return std::min(GetUserModeVirtualMemorySize(), physical_size); -} - -uintptr_t GetUserModeVirtualMemoryBase() { return (uintptr_t)0; } - -// Os event implementation -typedef struct EventDescriptor_ { - pthread_cond_t event; - pthread_mutex_t mutex; - bool state; - bool auto_reset; -} EventDescriptor; - -EventHandle CreateOsEvent(bool auto_reset, bool init_state) { - EventDescriptor* eventDescrp; - eventDescrp = (EventDescriptor*)malloc(sizeof(EventDescriptor)); - - pthread_mutex_init(&eventDescrp->mutex, NULL); - pthread_cond_init(&eventDescrp->event, NULL); - eventDescrp->auto_reset = auto_reset; - eventDescrp->state = init_state; - - EventHandle handle = reinterpret_cast(eventDescrp); - - return handle; -} - -int DestroyOsEvent(EventHandle event) { - if (event == NULL) { - return -1; - } - - EventDescriptor* eventDescrp = reinterpret_cast(event); - int ret_code = pthread_cond_destroy(&eventDescrp->event); - ret_code |= pthread_mutex_destroy(&eventDescrp->mutex); - free(eventDescrp); - return ret_code; -} - -int WaitForOsEvent(EventHandle event, unsigned int milli_seconds) { - if (event == NULL) { - return -1; - } - - EventDescriptor* eventDescrp = reinterpret_cast(event); - // Event wait time is 0 and state is non-signaled, return directly - if (milli_seconds == 0) { - int tmp_ret = pthread_mutex_trylock(&eventDescrp->mutex); - if (tmp_ret == EBUSY) { - // Timeout - return 1; - } - } - - int ret_code = 0; - pthread_mutex_lock(&eventDescrp->mutex); - if (!eventDescrp->state) { - if (milli_seconds == 0) { - ret_code = 1; - } else { - struct timespec ts; - struct timeval tp; - - ret_code = gettimeofday(&tp, NULL); - ts.tv_sec = tp.tv_sec; - ts.tv_nsec = tp.tv_usec * 1000; - - unsigned int sec = milli_seconds / 1000; - unsigned int mSec = milli_seconds % 1000; - - ts.tv_sec += sec; - ts.tv_nsec += mSec * 1000000; - - // More then one second, add 1 sec to the tv_sec elem - if (ts.tv_nsec > 1000000000) { - ts.tv_sec += 1; - ts.tv_nsec = ts.tv_nsec - 1000000000; - } - - ret_code = - pthread_cond_timedwait(&eventDescrp->event, &eventDescrp->mutex, &ts); - // Time out - if (ret_code == 110) { - ret_code = 0x14003; // 1 means time out in HSA - } - - if (ret_code == 0 && eventDescrp->auto_reset) { - eventDescrp->state = false; - } - } - } else if (eventDescrp->auto_reset) { - eventDescrp->state = false; - } - pthread_mutex_unlock(&eventDescrp->mutex); - - return ret_code; -} - -int SetOsEvent(EventHandle event) { - if (event == NULL) { - return -1; - } - - EventDescriptor* eventDescrp = reinterpret_cast(event); - int ret_code = 0; - ret_code = pthread_mutex_lock(&eventDescrp->mutex); - eventDescrp->state = true; - ret_code = pthread_mutex_unlock(&eventDescrp->mutex); - ret_code |= pthread_cond_signal(&eventDescrp->event); - - return ret_code; -} - -int ResetOsEvent(EventHandle event) { - if (event == NULL) { - return -1; - } - - EventDescriptor* eventDescrp = reinterpret_cast(event); - int ret_code = 0; - ret_code = pthread_mutex_lock(&eventDescrp->mutex); - eventDescrp->state = false; - ret_code = pthread_mutex_unlock(&eventDescrp->mutex); - - return ret_code; -} - -uint64_t ReadAccurateClock() { - timespec time; - int err = clock_gettime(CLOCK_MONOTONIC_RAW, &time); - assert(err == 0 && "clock_gettime(CLOCK_MONOTONIC_RAW,...) failed"); - return uint64_t(time.tv_sec) * 1000000000ull + uint64_t(time.tv_nsec); -} - -uint64_t AccurateClockFrequency() { - timespec time; - int err = clock_getres(CLOCK_MONOTONIC_RAW, &time); - assert(err == 0 && "clock_getres(CLOCK_MONOTONIC_RAW,...) failed"); - assert(time.tv_sec == 0 && - "clock_getres(CLOCK_MONOTONIC_RAW,...) returned very low frequency " - "(<1Hz)."); - assert(time.tv_nsec < 0xFFFFFFFF && - "clock_getres(CLOCK_MONOTONIC_RAW,...) returned very low frequency " - "(<1Hz)."); - return uint64_t(time.tv_nsec) * 1000000000ull; -} -} - -#endif diff --git a/runtime/hsa-runtime/core/util/locks.h b/runtime/hsa-runtime/core/util/locks.h deleted file mode 100644 index 6ea35f5685..0000000000 --- a/runtime/hsa-runtime/core/util/locks.h +++ /dev/null @@ -1,136 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// Library of syncronization primitives - to be added to as needed. - -#ifndef HSA_RUNTIME_CORE_UTIL_LOCKS_H_ -#define HSA_RUNTIME_CORE_UTIL_LOCKS_H_ - -#include "utils.h" -#include "os.h" - -/// @brief: A class behaves as a lock in a scope. When trying to enter into the -/// critical section, creat a object of this class. After the control path goes -/// out of the scope, it will release the lock automatically. -template -class ScopedAcquire { - public: - /// @brief: When constructing, acquire the lock. - /// @param: lock(Input), pointer to an existing lock. - explicit ScopedAcquire(LockType* lock) : lock_(lock) { lock_->Acquire(); } - - /// @brief: when destructing, release the lock. - ~ScopedAcquire() { lock_->Release(); } - - private: - LockType* lock_; - /// @brief: Disable copiable and assignable ability. - DISALLOW_COPY_AND_ASSIGN(ScopedAcquire); -}; - -/// @brief: a class represents a kernel mutex. -/// Uses the kernel's scheduler to keep the waiting thread from being scheduled -/// until the lock is released (Best for long waits, though anything using -/// a kernel object is a long wait). -class KernelMutex { - public: - KernelMutex() { lock_ = os::CreateMutex(); } - ~KernelMutex() { os::DestroyMutex(lock_); } - - bool Try() { return os::TryAcquireMutex(lock_); } - bool Acquire() { return os::AcquireMutex(lock_); } - void Release() { os::ReleaseMutex(lock_); } - - private: - os::Mutex lock_; - - /// @brief: Disable copiable and assignable ability. - DISALLOW_COPY_AND_ASSIGN(KernelMutex); -}; - -/// @brief: represents a spin lock. -/// For very short hold durations on the order of the thread scheduling -/// quanta or less. -class SpinMutex { - public: - SpinMutex() { lock_ = 0; } - - bool Try() { - int old = 0; - return lock_.compare_exchange_strong(old, 1); - } - bool Acquire() { - int old = 0; - while (!lock_.compare_exchange_strong(old, 1)) - { - old=0; - os::YieldThread(); - } - return true; - } - void Release() { lock_ = 0; } - - private: - std::atomic lock_; - - /// @brief: Disable copiable and assignable ability. - DISALLOW_COPY_AND_ASSIGN(SpinMutex); -}; - -class KernelEvent { - public: - KernelEvent() { evt_ = os::CreateOsEvent(true, true); } - ~KernelEvent() { os::DestroyOsEvent(evt_); } - - bool IsSet() { return os::WaitForOsEvent(evt_, 0)==0; } - bool WaitForSet() { return os::WaitForOsEvent(evt_, 0xFFFFFFFF)==0; } - void Set() { os::SetOsEvent(evt_); } - void Reset() { os::ResetOsEvent(evt_); } - - private: - os::EventHandle evt_; - - /// @brief: Disable copiable and assignable ability. - DISALLOW_COPY_AND_ASSIGN(KernelEvent); -}; - -#endif // HSA_RUNTIME_CORE_SUTIL_LOCKS_H_ diff --git a/runtime/hsa-runtime/core/util/os.h b/runtime/hsa-runtime/core/util/os.h deleted file mode 100644 index c3936e32da..0000000000 --- a/runtime/hsa-runtime/core/util/os.h +++ /dev/null @@ -1,216 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// Minimal operating system abstraction interfaces. - -#ifndef HSA_RUNTIME_CORE_UTIL_OS_H_ -#define HSA_RUNTIME_CORE_UTIL_OS_H_ - -#include -#include "utils.h" - -namespace os { -typedef void* LibHandle; -typedef void* Mutex; -typedef void* Thread; -typedef void* EventHandle; - -enum class os_t { OS_WIN = 0, OS_LINUX, COUNT }; -static __forceinline std::underlying_type::type os_index(os_t val) { - return std::underlying_type::type(val); -} - -#ifdef _WIN32 -static const os_t current_os = os_t::OS_WIN; -#elif __linux__ -static const os_t current_os = os_t::OS_LINUX; -#else -static_assert(false, "Operating System not detected!"); -#endif - -/// @brief: Loads dynamic library based on file name. Return value will be NULL -/// if failed. -/// @param: filename(Input), file name of the library. -/// @return: LibHandle. -LibHandle LoadLib(std::string filename); - -/// @brief: Gets the address of exported symbol. Return NULl if failed. -/// @param: lib(Input), library handle which exporting from. -/// @param: export_name(Input), the name of the exported symbol. -/// @return: void*. -void* GetExportAddress(LibHandle lib, std::string export_name); - -/// @brief: Unloads the dynamic library. -/// @param: lib(Input), library handle which will be unloaded. -void CloseLib(LibHandle lib); - -/// @brief: Creates a mutex, will return NULL if failed. -/// @param: void. -/// @return: Mutex. -Mutex CreateMutex(); - -/// @brief: Tries to acquire the mutex once, if successed, return true. -/// @param: lock(Input), handle to the mutex. -/// @return: bool. -bool TryAcquireMutex(Mutex lock); - -/// @brief: Aquires the mutex, if the mutex is locked, it will wait until it is -/// released. If the mutex is acquired successfully, it will return true. -/// @param: lock(Input), handle to the mutex. -/// @return: bool. -bool AcquireMutex(Mutex lock); - -/// @brief: Releases the mutex. -/// @param: lock(Input), handle to the mutex. -/// @return: void. -void ReleaseMutex(Mutex lock); - -/// @brief: Destroys the mutex. -/// @param: lock(Input), handle to the mutex. -/// @return: void. -void DestroyMutex(Mutex lock); - -/// @brief: Puts current thread to sleep. -/// @param: delayInMs(Input), time in millisecond for sleeping. -/// @return: void. -void Sleep(int delayInMs); - -/// @brief: Yields current thread. -/// @param: void. -/// @return: void. -void YieldThread(); - -typedef void (*ThreadEntry)(void*); - -/// @brief: Creates a thread will return NULL if failed. -/// @param: entry_function(Input), a pointer to the function which the thread -/// starts from. -/// @param: entry_argument(Input), a pointer to the argument of the thread -/// function. -/// @param: stack_size(Input), size of the thread's stack, 0 by default. -/// @return: Thread, a handle to thread created. -Thread CreateThread(ThreadEntry entry_function, void* entry_argument, - uint stack_size = 0); - -/// @brief: Destroys the thread. -/// @param: thread(Input), thread handle to what will be destroyed. -/// @return: void. -void CloseThread(Thread thread); - -/// @brief: Waits for specific thread to finish, if successed, return true. -/// @param: thread(Input), handle to waiting thread. -/// @return: bool. -bool WaitForThread(Thread thread); - -/// @brief: Waits for multiple threads to finish, if successed, return ture. -/// @param; threads(Input), a pointer to a list of thread handle. -/// @param: thread_count(Input), number of threads to be waited on. -/// @return: bool. -bool WaitForAllThreads(Thread* threads, uint thread_count); - -/// @brief: Sets the environment value. -/// @param: env_var_name(Input), name of the environment value. -/// @param: env_var_value(Input), value of the environment value.s -/// @return: void. -void SetEnvVar(std::string env_var_name, std::string env_var_value); - -/// @brief: Gets the value of environment value. -/// @param: env_var_name(Input), name of the environment value. -/// @return: std::string, value of the environment value, returned as string. -std::string GetEnvVar(std::string env_var_name); - -/// @brief: Gets the max virtual memory size accessible to the application. -/// @param: void. -/// @return: size_t, size of the accessible memory to the application. -size_t GetUserModeVirtualMemorySize(); - -/// @brief: Gets the max physical host system memory size. -/// @param: void. -/// @return: size_t, size of the physical host system memory. -size_t GetUsablePhysicalHostMemorySize(); - -/// @brief: Gets the virtual memory base address. It is hardcoded to 0. -/// @param: void. -/// @return: uintptr_t, always 0. -uintptr_t GetUserModeVirtualMemoryBase(); - -/// @brief os event api, create an event -/// @param: auto_reset whether an event can reset the status automatically -/// @param: init_state initial state of the event -/// @return: event handle -EventHandle CreateOsEvent(bool auto_reset, bool init_state); - -/// @brief os event api, destroy an event -/// @param: event handle -/// @return: whether destroy is correct -int DestroyOsEvent(EventHandle event); - -/// @brief os event api, wait on event -/// @param: event Event handle -/// @param: milli_seconds wait time -/// @return: Indicate success or timeout -int WaitForOsEvent(EventHandle event, unsigned int milli_seconds); - -/// @brief os event api, set event state -/// @param: event Event handle -/// @return: Whether event set is correct -int SetOsEvent(EventHandle event); - -/// @brief os event api, reset event state -/// @param: event Event handle -/// @return: Whether event reset is correct -int ResetOsEvent(EventHandle event); - -/// @brief reads a clock which is deemed to be accurate for elapsed time -/// measurements, though not necessarilly fast to query -/// @return clock counter value -uint64_t ReadAccurateClock(); - -/// @brief retrieves the frequency in Hz of the unit used in ReadAccurateClock. -/// It does not necessarilly reflect the resolution of the clock, but is the -/// value needed to convert a difference in the clock's counter value to elapsed -/// seconds. This frequency does not change at runtime. -/// @return returns the frequency -uint64_t AccurateClockFrequency(); -} - -#endif // HSA_RUNTIME_CORE_UTIL_OS_H_ diff --git a/runtime/hsa-runtime/core/util/small_heap.cpp b/runtime/hsa-runtime/core/util/small_heap.cpp deleted file mode 100644 index 82e4909360..0000000000 --- a/runtime/hsa-runtime/core/util/small_heap.cpp +++ /dev/null @@ -1,174 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "small_heap.h" - -SmallHeap::memory_t::iterator SmallHeap::merge( - SmallHeap::memory_t::iterator& keep, - SmallHeap::memory_t::iterator& destroy) { - assert((char*)keep->first + keep->second.len == (char*)destroy->first && - "Invalid merge"); - assert(keep->second.isfree() && "Merge with allocated block"); - assert(destroy->second.isfree() && "Merge with allocated block"); - - keep->second.len += destroy->second.len; - keep->second.next_free = destroy->second.next_free; - if (!destroy->second.islastfree()) - memory[destroy->second.next_free].prior_free = keep->first; - - memory.erase(destroy); - return keep; -} - -void SmallHeap::free(void* ptr) { - if (ptr == NULL) return; - - auto iterator = memory.find(ptr); - - // Check for illegal free - if (iterator == memory.end()) { - assert(false && "Illegal free."); - return; - } - - const auto start_guard = memory.find(0); - const auto end_guard = memory.find((void*)0xFFFFFFFFFFFFFFFFull); - - // Return memory to total and link node into free list - total_free += iterator->second.len; - if (first_free < iterator->first) { - auto before = iterator; - before--; - while (before != start_guard && !before->second.isfree()) before--; - assert(before->second.next_free > iterator->first && - "Inconsistency in small heap."); - iterator->second.prior_free = before->first; - iterator->second.next_free = before->second.next_free; - before->second.next_free = iterator->first; - if (!iterator->second.islastfree()) - memory[iterator->second.next_free].prior_free = iterator->first; - } else { - iterator->second.setfirstfree(); - iterator->second.next_free = first_free; - first_free = iterator->first; - if (!iterator->second.islastfree()) - memory[iterator->second.next_free].prior_free = iterator->first; - } - - // Attempt compaction - auto before = iterator; - before--; - if (before != start_guard) { - if (before->second.isfree()) { - iterator = merge(before, iterator); - } - } - - auto after = iterator; - after++; - if (after != end_guard) { - if (after->second.isfree()) { - iterator = merge(iterator, after); - } - } -} - -void* SmallHeap::alloc(size_t bytes) { - // Is enough memory available? - if ((bytes > total_free) || (bytes == 0)) return NULL; - - memory_t::iterator current; - memory_t::iterator prior; - - // Walk the free list and allocate at first fitting location - prior = current = memory.find(first_free); - while (true) { - if (bytes <= current->second.len) { - // Decrement from total - total_free -= bytes; - - // Is allocation an exact fit? - if (bytes == current->second.len) { - if (prior == current) { - first_free = current->second.next_free; - if (!current->second.islastfree()) - memory[current->second.next_free].setfirstfree(); - } else { - prior->second.next_free = current->second.next_free; - if (!current->second.islastfree()) - memory[current->second.next_free].prior_free = prior->first; - } - current->second.next_free = NULL; - return current->first; - } else { - // Split current node - void* remaining = (char*)current->first + bytes; - Node& node = memory[remaining]; - node.next_free = current->second.next_free; - node.prior_free = current->second.prior_free; - node.len = current->second.len - bytes; - current->second.len = bytes; - - if (prior == current) { - first_free = remaining; - node.setfirstfree(); - } else { - prior->second.next_free = remaining; - node.prior_free = prior->first; - } - if (!node.islastfree()) memory[node.next_free].prior_free = remaining; - - current->second.next_free = NULL; - return current->first; - } - } - - // End of free list? - if (current->second.islastfree()) break; - - prior = current; - current = memory.find(current->second.next_free); - } - - // Can't service the request due to fragmentation - return NULL; -} diff --git a/runtime/hsa-runtime/core/util/small_heap.h b/runtime/hsa-runtime/core/util/small_heap.h deleted file mode 100644 index 0da5ac280f..0000000000 --- a/runtime/hsa-runtime/core/util/small_heap.h +++ /dev/null @@ -1,114 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// A simple first fit memory allocator with eager compaction. For use with few -// items (where list iteration is faster than trees). -// Not thread safe! - -#ifndef HSA_RUNTME_CORE_UTIL_SMALL_HEAP_H_ -#define HSA_RUNTME_CORE_UTIL_SMALL_HEAP_H_ - -#include "utils.h" - -#include - -class SmallHeap { - public: - class Node { - public: - size_t len; - void* next_free; - void* prior_free; - static const intptr_t END = -1; - - __forceinline bool isfree() const { return next_free != NULL; } - __forceinline bool islastfree() const { return intptr_t(next_free) == END; } - __forceinline bool isfirstfree() const { - return intptr_t(prior_free) == END; - } - __forceinline void setlastfree() { - *reinterpret_cast(&next_free) = END; - } - __forceinline void setfirstfree() { - *reinterpret_cast(&prior_free) = END; - } - }; - - private: - SmallHeap(const SmallHeap& rhs); - SmallHeap& operator=(const SmallHeap& rhs); - - void* const pool; - const size_t length; - - size_t total_free; - void* first_free; - std::map memory; - - typedef decltype(memory) memory_t; - memory_t::iterator merge(memory_t::iterator& keep, - memory_t::iterator& destroy); - - public: - SmallHeap() : pool(NULL), length(0), total_free(0) {} - SmallHeap(void* base, size_t length) - : pool(base), length(length), total_free(length) { - first_free = pool; - - Node& node = memory[first_free]; - node.len = length; - node.setlastfree(); - node.setfirstfree(); - - memory[0].len = 0; - memory[(void*)0xFFFFFFFFFFFFFFFFull].len = 0; - } - - void* alloc(size_t bytes); - void free(void* ptr); - - void* base() const { return pool; } - size_t size() const { return length; } - size_t remaining() const { return total_free; } -}; - -#endif diff --git a/runtime/hsa-runtime/core/util/timer.cpp b/runtime/hsa-runtime/core/util/timer.cpp deleted file mode 100644 index 1fa275b49b..0000000000 --- a/runtime/hsa-runtime/core/util/timer.cpp +++ /dev/null @@ -1,105 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#include "core/util/timer.h" - -namespace timer { - -accurate_clock::init::init() { - freq = os::AccurateClockFrequency(); - accurate_clock::period_ns = 1e9 / double(freq); -} - -// Calibrates the fast clock using the accurate clock. -fast_clock::init::init() { - typedef accurate_clock clock; - clock::duration delay(std::chrono::milliseconds(1)); - - // calibrate clock - fast_clock::raw_rep min = 0; - clock::duration elapsed = clock::duration::max(); - - do { - for (int t = 0; t < 10; t++) { - fast_clock::raw_rep r1, r2; - clock::time_point t0, t1, t2, t3; - - t0 = clock::now(); - std::atomic_signal_fence(std::memory_order_acq_rel); - r1 = fast_clock::raw_now(); - std::atomic_signal_fence(std::memory_order_acq_rel); - t1 = clock::now(); - std::atomic_signal_fence(std::memory_order_acq_rel); - - do { - t2 = clock::now(); - } while (t2 - t1 < delay); - - std::atomic_signal_fence(std::memory_order_acq_rel); - r2 = fast_clock::raw_now(); - std::atomic_signal_fence(std::memory_order_acq_rel); - t3 = clock::now(); - - // If elapsed time is shorter than last recorded time and both the start - // and end times are confirmed correlated then record the clock readings. - // This protects against inaccuracy due to thread switching - if ((t3 - t1 < elapsed) && ((t1 - t0) * 10 < (t2 - t1)) && - ((t3 - t2) * 10 < (t2 - t1))) { - elapsed = t3 - t1; - min = r2 - r1; - } - } - delay += delay; - } while (min < 1000); - - fast_clock::freq = double(min) / duration_in_seconds(elapsed); - fast_clock::period_ps = 1e12 / fast_clock::freq; -} - -double accurate_clock::period_ns; -accurate_clock::raw_frequency accurate_clock::freq; -accurate_clock::init accurate_clock::accurate_clock_init; - -double fast_clock::period_ps; -fast_clock::raw_frequency fast_clock::freq; -fast_clock::init fast_clock::fast_clock_init; -} diff --git a/runtime/hsa-runtime/core/util/timer.h b/runtime/hsa-runtime/core/util/timer.h deleted file mode 100644 index bec1d2c178..0000000000 --- a/runtime/hsa-runtime/core/util/timer.h +++ /dev/null @@ -1,162 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_CORE_UTIL_TIMER_H_ -#define HSA_RUNTIME_CORE_UTIL_TIMER_H_ - -#include "core/util/utils.h" -#include "core/util/os.h" -#include - -#include - -namespace timer { - -// Needed to patch around a mixed arithmetic bug in MSVC's duration_cast as of -// VS 2013. -template -struct wide_type { - typedef double type; -}; -template <> -struct wide_type { - typedef uintmax_t type; -}; -template <> -struct wide_type { - typedef intmax_t type; -}; - -template -static __forceinline To - duration_cast(const std::chrono::duration& d) { - typedef typename wide_type::value, - std::is_signed::value>::type wide; - typedef std::chrono::duration unit_convert_t; - - unit_convert_t temp = std::chrono::duration_cast(d); - return To(static_cast(temp.count())); -} -// End patch - -template -static __forceinline double duration_in_seconds( - std::chrono::duration delta) { - typedef std::chrono::duration> seconds; - return seconds(delta).count(); -} - -template -static __forceinline rep duration_from_seconds(double delta) { - typedef std::chrono::duration> seconds; - return std::chrono::duration_cast(seconds(delta)); -} - -// Provices a C++11 standard clock interface to the os::AccurateClock functions -class accurate_clock { - public: - typedef double rep; - typedef std::nano period; - typedef std::chrono::duration duration; - typedef std::chrono::time_point time_point; - - static const bool is_steady = true; - - static __forceinline time_point now() { - return time_point(duration(raw_now() * period_ns)); - } - - // These two extra APIs and types let us use clocks without conversion to the - // arbitrary period unit - typedef uint64_t raw_rep; - typedef uint64_t raw_frequency; - - static __forceinline raw_rep raw_now() { return os::ReadAccurateClock(); } - static __forceinline raw_frequency raw_freq() { return freq; } - - private: - static double period_ns; - static raw_frequency freq; - - class init { - public: - init(); - }; - static init accurate_clock_init; -}; - -// Provices a C++11 standard clock interface to the lowest latency approximate -// clock -class fast_clock { - public: - typedef double rep; - typedef std::pico period; - typedef std::chrono::duration duration; - typedef std::chrono::time_point time_point; - - static const bool is_steady = true; - - static __forceinline time_point now() { - return time_point(duration(raw_now() * period_ps)); - } - - // These two extra APIs and types let us use clocks without conversion to the - // arbitrary period unit - typedef uint64_t raw_rep; - typedef double raw_frequency; - - static __forceinline raw_rep raw_now() { return __rdtsc(); } - static __forceinline raw_frequency raw_freq() { return freq; } - - private: - static double period_ps; - static raw_frequency freq; - - class init { - public: - init(); - }; - static init fast_clock_init; -}; -} - -#endif diff --git a/runtime/hsa-runtime/core/util/utils.h b/runtime/hsa-runtime/core/util/utils.h deleted file mode 100644 index 7b3b7ad101..0000000000 --- a/runtime/hsa-runtime/core/util/utils.h +++ /dev/null @@ -1,267 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// Generally useful utility functions - -#ifndef HSA_RUNTIME_CORE_UTIL_UTILS_H_ -#define HSA_RUNTIME_CORE_UTIL_UTILS_H_ - -#include "stdint.h" -#include "stddef.h" -#include "stdlib.h" -#include - -typedef unsigned int uint; -typedef uint64_t uint64; - -#if defined(__GNUC__) -#include "mm_malloc.h" -#if defined(__i386__) || defined(__x86_64__) -#include -#else -#error \ - "Processor or compiler not identified. " \ - "Need to provide a lightweight approximate clock interface via function uint64_t __rdtsc() or adapt timer.h to your platform." -#endif - -#define __forceinline __inline__ __attribute__((always_inline)) -static __forceinline void __debugbreak() { __builtin_trap(); } -#define __declspec(x) __attribute__((x)) -#undef __stdcall -#define __stdcall // __attribute__((__stdcall__)) -#define __ALIGNED__(x) __attribute__((aligned(x))) - -static __forceinline void* _aligned_malloc(size_t size, size_t alignment) { - return _mm_malloc(size, alignment); -} -static __forceinline void _aligned_free(void* ptr) { return _mm_free(ptr); } -#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) -#include "intrin.h" -#define __ALIGNED__(x) __declspec(align(x)) -#if (_MSC_VER < 1800) -static __forceinline unsigned long long int strtoull(const char* str, - char** endptr, int base) { - return static_cast(_strtoui64(str, endptr, base)); -} -#endif -#else -#error "Compiler and/or processor not identified." -#endif - -#define STRING2(x) #x -#define STRING(x) STRING2(x) - -#define PASTE2(x, y) x##y -#define PASTE(x, y) PASTE2(x, y) - -// A macro to disallow the copy and move constructor and operator= functions -// This should be used in the private: declarations for a class -#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - TypeName(TypeName&&); \ - void operator=(const TypeName&); \ - void operator=(TypeName&&); - -template -class ScopeGuard { - public: - explicit __forceinline ScopeGuard(const lambda& release) - : release_(release), dismiss_(false) {} - - ScopeGuard(ScopeGuard& rhs) { *this = rhs; } - - __forceinline ~ScopeGuard() { - if (!dismiss_) release_(); - } - __forceinline ScopeGuard& operator=(ScopeGuard& rhs) { - dismiss_ = rhs.dismiss_; - release_ = rhs.release_; - rhs.dismiss_ = true; - } - __forceinline void Dismiss() { dismiss_ = true; } - - private: - lambda release_; - bool dismiss_; -}; - -template -static __forceinline ScopeGuard MakeScopeGuard(lambda rel) { - return ScopeGuard(rel); -} - -#define MAKE_SCOPE_GUARD_HELPER(lname, sname, ...) \ - auto lname = __VA_ARGS__; \ - ScopeGuard sname(lname); -#define MAKE_SCOPE_GUARD(...) \ - MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), \ - PASTE(scopeGuard, __COUNTER__), __VA_ARGS__) -#define MAKE_NAMED_SCOPE_GUARD(name, ...) \ - MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), name, \ - __VA_ARGS__) - -/// @brief: Finds out the min one of two inputs, input must support ">" -/// operator. -/// @param: a(Input), a reference to type T. -/// @param: b(Input), a reference to type T. -/// @return: T. -template -static __forceinline T Min(const T& a, const T& b) { - return (a > b) ? b : a; -} - -/// @brief: Find out the max one of two inputs, input must support ">" operator. -/// @param: a(Input), a reference to type T. -/// @param: b(Input), a reference to type T. -/// @return: T. -template -static __forceinline T Max(const T& a, const T& b) { - return (b > a) ? b : a; -} - -/// @brief: Free the memory space which is newed previously. -/// @param: ptr(Input), a pointer to memory space. Can't be NULL. -/// @return: void. -struct DeleteObject { - template - void operator()(const T* ptr) const { - delete ptr; - } -}; - -/// @brief: Checks if a value is power of two, if it is, return true. Be careful -/// when passing 0. -/// @param: val(Input), the data to be checked. -/// @return: bool. -template -static __forceinline bool IsPowerOfTwo(T val) { - return (val & (val - 1)) == 0; -} - -/// @brief: Calculates the floor value aligned based on parameter of alignment. -/// If value is at the boundary of alignment, it is unchanged. -/// @param: value(Input), value to be calculated. -/// @param: alignment(Input), alignment value. -/// @return: T. -template -static __forceinline T AlignDown(T value, size_t alignment) { - assert(IsPowerOfTwo(alignment)); - return (T)(value & ~(alignment - 1)); -} - -/// @brief: Same as previous one, but first parameter becomes pointer, for more -/// info, see the previous desciption. -/// @param: value(Input), pointer to type T. -/// @param: alignment(Input), alignment value. -/// @return: T*, pointer to type T. -template -static __forceinline T* AlignDown(T* value, size_t alignment) { - return (T*)AlignDown((intptr_t)value, alignment); -} - -/// @brief: Calculates the ceiling value aligned based on parameter of -/// alignment. -/// If value is at the boundary of alignment, it is unchanged. -/// @param: value(Input), value to be calculated. -/// @param: alignment(Input), alignment value. -/// @param: T. -template -static __forceinline T AlignUp(T value, size_t alignment) { - return AlignDown((T)(value + alignment - 1), alignment); -} - -/// @brief: Same as previous one, but first parameter becomes pointer, for more -/// info, see the previous desciption. -/// @param: value(Input), pointer to type T. -/// @param: alignment(Input), alignment value. -/// @return: T*, pointer to type T. -template -static __forceinline T* AlignUp(T* value, size_t alignment) { - return (T*)AlignDown((intptr_t)((uint8_t*)value + alignment - 1), alignment); -} - -/// @brief: Checks if the input value is at the boundary of alignment, if it is, -/// @return true. -/// @param: value(Input), value to be checked. -/// @param: alignment(Input), alignment value. -/// @return: bool. -template -static __forceinline bool IsMultipleOf(T value, size_t alignment) { - return (AlignUp(value, alignment) == value); -} - -/// @brief: Same as previous one, but first parameter becomes pointer, for more -/// info, see the previous desciption. -/// @param: value(Input), pointer to type T. -/// @param: alignment(Input), alignment value. -/// @return: bool. -template -static __forceinline bool IsMultipleOf(T* value, size_t alignment) { - return (AlignUp(value, alignment) == value); -} - -static __forceinline uint32_t NextPow2(uint32_t value) { - if (value == 0) return 1; - uint32_t v = value - 1; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return v + 1; -} - -static __forceinline uint64_t NextPow2(uint64_t value) { - if (value == 0) return 1; - uint64_t v = value - 1; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v |= v >> 32; - return v + 1; -} - -#include "atomic_helpers.h" - -#endif // HSA_RUNTIME_CORE_UTIL_UTIIS_H_ diff --git a/runtime/hsa-runtime/inc/Brig.h b/runtime/hsa-runtime/inc/Brig.h deleted file mode 100644 index 1e441b3251..0000000000 --- a/runtime/hsa-runtime/inc/Brig.h +++ /dev/null @@ -1,1530 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -//.ignore{ - -#ifndef INCLUDED_BRIG_H -#define INCLUDED_BRIG_H - -#include - -enum BrigAuxDefs { - MAX_OPERANDS_NUM = 6 -}; - -//} - -typedef uint32_t BrigVersion32_t; - -enum BrigVersion { - - //.nowrap - //.nodump - //.nollvm - - BRIG_VERSION_HSAIL_MAJOR = 1, - BRIG_VERSION_HSAIL_MINOR = 0, - BRIG_VERSION_BRIG_MAJOR = 1, - BRIG_VERSION_BRIG_MINOR = 0 -}; - -typedef uint8_t BrigAlignment8_t; //.defValue=BRIG_ALIGNMENT_NONE - -typedef uint8_t BrigAllocation8_t; //.defValue=BRIG_ALLOCATION_NONE - -typedef uint8_t BrigAluModifier8_t; - -typedef uint8_t BrigAtomicOperation8_t; - -typedef uint32_t BrigCodeOffset32_t; //.defValue=0 //.wtype=ItemRef - -typedef uint8_t BrigCompareOperation8_t; - -typedef uint16_t BrigControlDirective16_t; - -typedef uint32_t BrigDataOffset32_t; - -typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t; //.wtype=ListRef //.defValue=0 - -typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t; //.wtype=ListRef //.defValue=0 - -typedef BrigDataOffset32_t BrigDataOffsetString32_t; //.wtype=StrRef //.defValue=0 - -typedef uint8_t BrigExecutableModifier8_t; - -typedef uint8_t BrigImageChannelOrder8_t; //.defValue=BRIG_CHANNEL_ORDER_UNKNOWN - -typedef uint8_t BrigImageChannelType8_t; //.defValue=BRIG_CHANNEL_TYPE_UNKNOWN - -typedef uint8_t BrigImageGeometry8_t; //.defValue=BRIG_GEOMETRY_UNKNOWN - -typedef uint8_t BrigImageQuery8_t; - -typedef uint16_t BrigKind16_t; - -typedef uint8_t BrigLinkage8_t; //.defValue=BRIG_LINKAGE_NONE - -typedef uint8_t BrigMachineModel8_t; //.defValue=BRIG_MACHINE_LARGE - -typedef uint8_t BrigMemoryModifier8_t; - -typedef uint8_t BrigMemoryOrder8_t; //.defValue=BRIG_MEMORY_ORDER_RELAXED - -typedef uint8_t BrigMemoryScope8_t; //.defValue=BRIG_MEMORY_SCOPE_SYSTEM - -typedef uint16_t BrigOpcode16_t; - -typedef uint32_t BrigOperandOffset32_t; //.defValue=0 //.wtype=ItemRef - -typedef uint8_t BrigPack8_t; //.defValue=BRIG_PACK_NONE - -typedef uint8_t BrigProfile8_t; //.defValue=BRIG_PROFILE_FULL - -typedef uint16_t BrigRegisterKind16_t; - -typedef uint8_t BrigRound8_t; //.defValue=BRIG_ROUND_NONE - -typedef uint8_t BrigSamplerAddressing8_t; //.defValue=BRIG_ADDRESSING_CLAMP_TO_EDGE - -typedef uint8_t BrigSamplerCoordNormalization8_t; - -typedef uint8_t BrigSamplerFilter8_t; - -typedef uint8_t BrigSamplerQuery8_t; - -typedef uint32_t BrigSectionIndex32_t; - -typedef uint8_t BrigSegCvtModifier8_t; - -typedef uint8_t BrigSegment8_t; //.defValue=BRIG_SEGMENT_NONE - -typedef uint32_t BrigStringOffset32_t; //.defValue=0 //.wtype=StrRef - -typedef uint16_t BrigType16_t; - -typedef uint8_t BrigVariableModifier8_t; - -typedef uint8_t BrigWidth8_t; - -typedef uint32_t BrigExceptions32_t; - -enum BrigKind { - - //.wname={ s/^BRIG_KIND//; MACRO2Name($_) } - //.mnemo=$wname{ $wname } - // - //.sizeof=$wname{ "sizeof(".$structs->{"Brig".$wname}->{rawbrig}.")" } - //.sizeof_switch //.sizeof_proto="int size_of_brig_record(unsigned arg)" //.sizeof_default="return -1" - // - //.isBodyOnly={ "false" } - //.isBodyOnly_switch //.isBodyOnly_proto="bool isBodyOnly(Directive d)" //.isBodyOnly_arg="d.kind()" - //.isBodyOnly_default="assert(false); return false" - // - //.isToplevelOnly={ "false" } - //.isToplevelOnly_switch //.isToplevelOnly_proto="bool isToplevelOnly(Directive d)" //.isToplevelOnly_arg="d.kind()" - //.isToplevelOnly_default="assert(false); return false" - - BRIG_KIND_NONE = 0x0000, //.skip - - BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, //.skip - BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000, //.isBodyOnly=true - BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001, //.isBodyOnly=true - BRIG_KIND_DIRECTIVE_COMMENT = 0x1002, - BRIG_KIND_DIRECTIVE_CONTROL = 0x1003, //.isBodyOnly=true - BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005, - BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_KERNEL = 0x1008, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_LABEL = 0x1009, //.isBodyOnly=true - BRIG_KIND_DIRECTIVE_LOC = 0x100a, - BRIG_KIND_DIRECTIVE_MODULE = 0x100b, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c, - BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e, - BRIG_KIND_DIRECTIVE_END = 0x100f, //.skip - - BRIG_KIND_INST_BEGIN = 0x2000, //.skip - BRIG_KIND_INST_ADDR = 0x2000, - BRIG_KIND_INST_ATOMIC = 0x2001, - BRIG_KIND_INST_BASIC = 0x2002, - BRIG_KIND_INST_BR = 0x2003, - BRIG_KIND_INST_CMP = 0x2004, - BRIG_KIND_INST_CVT = 0x2005, - BRIG_KIND_INST_IMAGE = 0x2006, - BRIG_KIND_INST_LANE = 0x2007, - BRIG_KIND_INST_MEM = 0x2008, - BRIG_KIND_INST_MEM_FENCE = 0x2009, - BRIG_KIND_INST_MOD = 0x200a, - BRIG_KIND_INST_QUERY_IMAGE = 0x200b, - BRIG_KIND_INST_QUERY_SAMPLER = 0x200c, - BRIG_KIND_INST_QUEUE = 0x200d, - BRIG_KIND_INST_SEG = 0x200e, - BRIG_KIND_INST_SEG_CVT = 0x200f, - BRIG_KIND_INST_SIGNAL = 0x2010, - BRIG_KIND_INST_SOURCE_TYPE = 0x2011, - BRIG_KIND_INST_END = 0x2012, //.skip - - BRIG_KIND_OPERAND_BEGIN = 0x3000, //.skip - BRIG_KIND_OPERAND_ADDRESS = 0x3000, - BRIG_KIND_OPERAND_ALIGN = 0x3001, - BRIG_KIND_OPERAND_CODE_LIST = 0x3002, - BRIG_KIND_OPERAND_CODE_REF = 0x3003, - BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004, - BRIG_KIND_OPERAND_RESERVED = 0x3005, //.skip - BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006, - BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007, - BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008, - BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009, - BRIG_KIND_OPERAND_REGISTER = 0x300a, - BRIG_KIND_OPERAND_STRING = 0x300b, - BRIG_KIND_OPERAND_WAVESIZE = 0x300c, - BRIG_KIND_OPERAND_END = 0x300d //.skip -}; - -enum BrigAlignment { - - //.mnemo={ s/^BRIG_ALIGNMENT_//; lc } - //.mnemo_proto="const char* align2str(unsigned arg)" - // - //.bytes={ /(\d+)/ ? $1 : undef } - //.bytes_switch //.bytes_proto="unsigned align2num(unsigned arg)" //.bytes_default="assert(false); return -1" - // - //.rbytes=$bytes{ $bytes } - //.rbytes_switch //.rbytes_reverse //.rbytes_proto="BrigAlignment num2align(uint64_t arg)" - //.rbytes_default="return BRIG_ALIGNMENT_LAST" - // - //.print=$bytes{ $bytes>1 ? "_align($bytes)" : "" } - - BRIG_ALIGNMENT_NONE = 0, //.no_mnemo - BRIG_ALIGNMENT_1 = 1, //.mnemo="" - BRIG_ALIGNMENT_2 = 2, - BRIG_ALIGNMENT_4 = 3, - BRIG_ALIGNMENT_8 = 4, - BRIG_ALIGNMENT_16 = 5, - BRIG_ALIGNMENT_32 = 6, - BRIG_ALIGNMENT_64 = 7, - BRIG_ALIGNMENT_128 = 8, - BRIG_ALIGNMENT_256 = 9, - - BRIG_ALIGNMENT_LAST, //.skip - BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_LAST - 1 //.skip -}; - -enum BrigAllocation { - - //.mnemo={ s/^BRIG_ALLOCATION_//;lc } - //.mnemo_token=EAllocKind - - BRIG_ALLOCATION_NONE = 0, //.mnemo="" - BRIG_ALLOCATION_PROGRAM = 1, - BRIG_ALLOCATION_AGENT = 2, - BRIG_ALLOCATION_AUTOMATIC = 3 -}; - -enum BrigAluModifierMask { - BRIG_ALU_FTZ = 1 -}; - -enum BrigAtomicOperation { - - //.tdcaption="Atomic Operations" - // - //.mnemo={ s/^BRIG_ATOMIC_//;lc } - //.mnemo_token=_EMAtomicOp - //.mnemo_context=EInstModifierInstAtomicContext - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_ATOMIC_ADD = 0, - BRIG_ATOMIC_AND = 1, - BRIG_ATOMIC_CAS = 2, - BRIG_ATOMIC_EXCH = 3, - BRIG_ATOMIC_LD = 4, - BRIG_ATOMIC_MAX = 5, - BRIG_ATOMIC_MIN = 6, - BRIG_ATOMIC_OR = 7, - BRIG_ATOMIC_ST = 8, - BRIG_ATOMIC_SUB = 9, - BRIG_ATOMIC_WRAPDEC = 10, - BRIG_ATOMIC_WRAPINC = 11, - BRIG_ATOMIC_XOR = 12, - BRIG_ATOMIC_WAIT_EQ = 13, - BRIG_ATOMIC_WAIT_NE = 14, - BRIG_ATOMIC_WAIT_LT = 15, - BRIG_ATOMIC_WAIT_GTE = 16, - BRIG_ATOMIC_WAITTIMEOUT_EQ = 17, - BRIG_ATOMIC_WAITTIMEOUT_NE = 18, - BRIG_ATOMIC_WAITTIMEOUT_LT = 19, - BRIG_ATOMIC_WAITTIMEOUT_GTE = 20 -}; - -enum BrigCompareOperation { - - //.tdcaption="Comparison Operators" - // - //.mnemo={ s/^BRIG_COMPARE_//;lc } - //.mnemo_token=_EMCompare - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_COMPARE_EQ = 0, - BRIG_COMPARE_NE = 1, - BRIG_COMPARE_LT = 2, - BRIG_COMPARE_LE = 3, - BRIG_COMPARE_GT = 4, - BRIG_COMPARE_GE = 5, - BRIG_COMPARE_EQU = 6, - BRIG_COMPARE_NEU = 7, - BRIG_COMPARE_LTU = 8, - BRIG_COMPARE_LEU = 9, - BRIG_COMPARE_GTU = 10, - BRIG_COMPARE_GEU = 11, - BRIG_COMPARE_NUM = 12, - BRIG_COMPARE_NAN = 13, - BRIG_COMPARE_SEQ = 14, - BRIG_COMPARE_SNE = 15, - BRIG_COMPARE_SLT = 16, - BRIG_COMPARE_SLE = 17, - BRIG_COMPARE_SGT = 18, - BRIG_COMPARE_SGE = 19, - BRIG_COMPARE_SGEU = 20, - BRIG_COMPARE_SEQU = 21, - BRIG_COMPARE_SNEU = 22, - BRIG_COMPARE_SLTU = 23, - BRIG_COMPARE_SLEU = 24, - BRIG_COMPARE_SNUM = 25, - BRIG_COMPARE_SNAN = 26, - BRIG_COMPARE_SGTU = 27 -}; - -enum BrigControlDirective { - - //.mnemo={ s/^BRIG_CONTROL_//;lc } - //.mnemo_token=EControl - // - //.print=$mnemo{ $mnemo } - - BRIG_CONTROL_NONE = 0, //.skip - BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1, - BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2, - BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3, - BRIG_CONTROL_MAXFLATGRIDSIZE = 4, - BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5, - BRIG_CONTROL_REQUIREDDIM = 6, - BRIG_CONTROL_REQUIREDGRIDSIZE = 7, - BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8, - BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9 -}; - -enum BrigExecutableModifierMask { - //.nodump - BRIG_EXECUTABLE_DEFINITION = 1 -}; - -enum BrigImageChannelOrder { - - //.mnemo={ s/^BRIG_CHANNEL_ORDER_?//;lc } - //.mnemo_token=EImageOrder - //.mnemo_context=EImageOrderContext - // - //.print=$mnemo{ $mnemo } - - BRIG_CHANNEL_ORDER_A = 0, - BRIG_CHANNEL_ORDER_R = 1, - BRIG_CHANNEL_ORDER_RX = 2, - BRIG_CHANNEL_ORDER_RG = 3, - BRIG_CHANNEL_ORDER_RGX = 4, - BRIG_CHANNEL_ORDER_RA = 5, - BRIG_CHANNEL_ORDER_RGB = 6, - BRIG_CHANNEL_ORDER_RGBX = 7, - BRIG_CHANNEL_ORDER_RGBA = 8, - BRIG_CHANNEL_ORDER_BGRA = 9, - BRIG_CHANNEL_ORDER_ARGB = 10, - BRIG_CHANNEL_ORDER_ABGR = 11, - BRIG_CHANNEL_ORDER_SRGB = 12, - BRIG_CHANNEL_ORDER_SRGBX = 13, - BRIG_CHANNEL_ORDER_SRGBA = 14, - BRIG_CHANNEL_ORDER_SBGRA = 15, - BRIG_CHANNEL_ORDER_INTENSITY = 16, - BRIG_CHANNEL_ORDER_LUMINANCE = 17, - BRIG_CHANNEL_ORDER_DEPTH = 18, - BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19, - - // used internally - BRIG_CHANNEL_ORDER_UNKNOWN, //.mnemo="" // used when no order is specified - - BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128 //.skip - -}; - -enum BrigImageChannelType { - - //.mnemo={ s/^BRIG_CHANNEL_TYPE_//;lc } - //.mnemo_token=EImageFormat - // - //.print=$mnemo{ $mnemo } - - BRIG_CHANNEL_TYPE_SNORM_INT8 = 0, - BRIG_CHANNEL_TYPE_SNORM_INT16 = 1, - BRIG_CHANNEL_TYPE_UNORM_INT8 = 2, - BRIG_CHANNEL_TYPE_UNORM_INT16 = 3, - BRIG_CHANNEL_TYPE_UNORM_INT24 = 4, - BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5, - BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6, - BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7, - BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8, - BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9, - BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10, - BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11, - BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12, - BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13, - BRIG_CHANNEL_TYPE_HALF_FLOAT = 14, - BRIG_CHANNEL_TYPE_FLOAT = 15, - - // used internally - BRIG_CHANNEL_TYPE_UNKNOWN, //.mnemo="" - - BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128 //.skip -}; - -enum BrigImageGeometry { - - //.tdcaption="Geometry" - // - //.mnemo={ s/^BRIG_GEOMETRY_//;lc } - //.mnemo_token=EImageGeometry - // - //.dim={/_([0-9]+D)(A)?/ ? $1+(defined $2?1:0) : undef} - //.dim_switch //.dim_proto="unsigned getBrigGeometryDim(unsigned geo)" //.dim_arg="geo" - //.dim_default="assert(0); return 0" - // - //.depth={/DEPTH$/?"true":"false"} - //.depth_switch //.depth_proto="bool isBrigGeometryDepth(unsigned geo)" //.depth_arg="geo" - //.depth_default="return false" - - BRIG_GEOMETRY_1D = 0, - BRIG_GEOMETRY_2D = 1, - BRIG_GEOMETRY_3D = 2, - BRIG_GEOMETRY_1DA = 3, - BRIG_GEOMETRY_2DA = 4, - BRIG_GEOMETRY_1DB = 5, - BRIG_GEOMETRY_2DDEPTH = 6, - BRIG_GEOMETRY_2DADEPTH = 7, - - // used internally - BRIG_GEOMETRY_UNKNOWN, //.mnemo="" - - BRIG_GEOMETRY_FIRST_USER_DEFINED = 128 //.skip -}; - -enum BrigImageQuery { - - //.mnemo={ s/^BRIG_IMAGE_QUERY_//;lc } - // - //.print=$mnemo{ $mnemo } - - BRIG_IMAGE_QUERY_WIDTH = 0, - BRIG_IMAGE_QUERY_HEIGHT = 1, - BRIG_IMAGE_QUERY_DEPTH = 2, - BRIG_IMAGE_QUERY_ARRAY = 3, - BRIG_IMAGE_QUERY_CHANNELORDER = 4, - BRIG_IMAGE_QUERY_CHANNELTYPE = 5, - - BRIG_IMAGE_QUERY_FIRST_USER_DEFINED = 6 //.skip -}; - -enum BrigLinkage { - - //.mnemo={ s/^BRIG_LINKAGE_//;s/NONE//;lc } - - BRIG_LINKAGE_NONE = 0, - BRIG_LINKAGE_PROGRAM = 1, - BRIG_LINKAGE_MODULE = 2, - BRIG_LINKAGE_FUNCTION = 3, - BRIG_LINKAGE_ARG = 4 -}; - -enum BrigMachineModel { - - //.mnemo={ s/^BRIG_MACHINE_//; '$'.lc } - //.mnemo_token=ETargetMachine - // - //.print=$mnemo{ $mnemo } - - BRIG_MACHINE_SMALL = 0, - BRIG_MACHINE_LARGE = 1, - - BRIG_MACHINE_UNDEF = 2 //.skip -}; - -enum BrigMemoryModifierMask { - BRIG_MEMORY_CONST = 1 -}; - -enum BrigMemoryOrder { - - //.mnemo={ s/^BRIG_MEMORY_ORDER_//; lc } - //.mnemo_token=_EMMemoryOrder - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_MEMORY_ORDER_NONE = 0, //.mnemo="" - BRIG_MEMORY_ORDER_RELAXED = 1, //.mnemo=rlx - BRIG_MEMORY_ORDER_SC_ACQUIRE = 2, //.mnemo=scacq - BRIG_MEMORY_ORDER_SC_RELEASE = 3, //.mnemo=screl - BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4, //.mnemo=scar - - BRIG_MEMORY_ORDER_LAST = 5 //.skip -}; - -enum BrigMemoryScope { - - //.mnemo={ s/^BRIG_MEMORY_SCOPE_//; lc } - //.mnemo_token=_EMMemoryScope - // - //.print=$mnemo{ $mnemo } - - BRIG_MEMORY_SCOPE_NONE = 0, //.mnemo="" - BRIG_MEMORY_SCOPE_WORKITEM = 1, //.mnemo="" - BRIG_MEMORY_SCOPE_WAVEFRONT = 2, //.mnemo=wave - BRIG_MEMORY_SCOPE_WORKGROUP = 3, //.mnemo=wg - BRIG_MEMORY_SCOPE_AGENT = 4, //.mnemo=agent - BRIG_MEMORY_SCOPE_SYSTEM = 5, //.mnemo=system - - BRIG_MEMORY_SCOPE_LAST = 6 //.skip -}; - -enum BrigOpcode { - - //.tdcaption="Instruction Opcodes" - // - //.k={ "BASIC" } - //.pscode=$k{ MACRO2Name("_".$k) } - //.opcodeparser=$pscode{ return $pscode && "parseMnemo$pscode" } - //.opcodeparser_incfile=ParserUtilities - //.opcodeparser_switch //.opcodeparser_proto="OpcodeParser getCoreOpcodeParser(BrigOpcode16_t arg)" //.opcodeparser_default="return parseMnemoBasic" - // - //.psopnd={undef} - //.opndparser=$psopnd{ return $psopnd && "&Parser::parse$psopnd" } - //.opndparser_incfile=ParserUtilities - //.opndparser_switch //.opndparser_proto="Parser::OperandParser Parser::getCoreOperandParser(BrigOpcode16_t arg)" //.opndparser_default="return &Parser::parseOperands" - // - //.mnemo={ s/^BRIG_OPCODE_//; lc } - //.mnemo_scanner=Instructions //.mnemo_token=EInstruction - //.mnemo_context=EDefaultContext - // - //.vecOpndIndex={undef} - //.vecOpndIndex_switch //.vecOpndIndex_proto="int getCoreVXIndex(BrigOpcode16_t arg)" //.vecOpndIndex_default="return -1" - // - //.numdst={undef} - //.numdst_switch //.numdst_proto="int getCoreDstOperandsNum(BrigOpcode16_t arg)" //.numdst_default="return 1" - // - //.print=$mnemo{ $mnemo } - - BRIG_OPCODE_NOP = 0, //.k=NOP - BRIG_OPCODE_ABS = 1, //.k=BASIC_OR_MOD - BRIG_OPCODE_ADD = 2, //.k=BASIC_OR_MOD - BRIG_OPCODE_BORROW = 3, - BRIG_OPCODE_CARRY = 4, - BRIG_OPCODE_CEIL = 5, //.k=BASIC_OR_MOD - BRIG_OPCODE_COPYSIGN = 6, //.k=BASIC_OR_MOD - BRIG_OPCODE_DIV = 7, //.k=BASIC_OR_MOD - BRIG_OPCODE_FLOOR = 8, //.k=BASIC_OR_MOD - BRIG_OPCODE_FMA = 9, //.k=BASIC_OR_MOD - BRIG_OPCODE_FRACT = 10, //.k=BASIC_OR_MOD - BRIG_OPCODE_MAD = 11, //.k=BASIC_OR_MOD - BRIG_OPCODE_MAX = 12, //.k=BASIC_OR_MOD - BRIG_OPCODE_MIN = 13, //.k=BASIC_OR_MOD - BRIG_OPCODE_MUL = 14, //.k=BASIC_OR_MOD - BRIG_OPCODE_MULHI = 15, //.k=BASIC_OR_MOD - BRIG_OPCODE_NEG = 16, //.k=BASIC_OR_MOD - BRIG_OPCODE_REM = 17, - BRIG_OPCODE_RINT = 18, //.k=BASIC_OR_MOD - BRIG_OPCODE_SQRT = 19, //.k=BASIC_OR_MOD - BRIG_OPCODE_SUB = 20, //.k=BASIC_OR_MOD - BRIG_OPCODE_TRUNC = 21, //.k=BASIC_OR_MOD - BRIG_OPCODE_MAD24 = 22, - BRIG_OPCODE_MAD24HI = 23, - BRIG_OPCODE_MUL24 = 24, - BRIG_OPCODE_MUL24HI = 25, - BRIG_OPCODE_SHL = 26, - BRIG_OPCODE_SHR = 27, - BRIG_OPCODE_AND = 28, - BRIG_OPCODE_NOT = 29, - BRIG_OPCODE_OR = 30, - BRIG_OPCODE_POPCOUNT = 31, //.k=SOURCE_TYPE - BRIG_OPCODE_XOR = 32, - BRIG_OPCODE_BITEXTRACT = 33, - BRIG_OPCODE_BITINSERT = 34, - BRIG_OPCODE_BITMASK = 35, - BRIG_OPCODE_BITREV = 36, - BRIG_OPCODE_BITSELECT = 37, - BRIG_OPCODE_FIRSTBIT = 38, //.k=SOURCE_TYPE - BRIG_OPCODE_LASTBIT = 39, //.k=SOURCE_TYPE - BRIG_OPCODE_COMBINE = 40, //.k=SOURCE_TYPE //.vecOpndIndex=1 - BRIG_OPCODE_EXPAND = 41, //.k=SOURCE_TYPE //.vecOpndIndex=0 - BRIG_OPCODE_LDA = 42, //.k=ADDR - BRIG_OPCODE_MOV = 43, - BRIG_OPCODE_SHUFFLE = 44, - BRIG_OPCODE_UNPACKHI = 45, - BRIG_OPCODE_UNPACKLO = 46, - BRIG_OPCODE_PACK = 47, //.k=SOURCE_TYPE - BRIG_OPCODE_UNPACK = 48, //.k=SOURCE_TYPE - BRIG_OPCODE_CMOV = 49, - BRIG_OPCODE_CLASS = 50, //.k=SOURCE_TYPE - BRIG_OPCODE_NCOS = 51, - BRIG_OPCODE_NEXP2 = 52, - BRIG_OPCODE_NFMA = 53, - BRIG_OPCODE_NLOG2 = 54, - BRIG_OPCODE_NRCP = 55, - BRIG_OPCODE_NRSQRT = 56, - BRIG_OPCODE_NSIN = 57, - BRIG_OPCODE_NSQRT = 58, - BRIG_OPCODE_BITALIGN = 59, - BRIG_OPCODE_BYTEALIGN = 60, - BRIG_OPCODE_PACKCVT = 61, //.k=SOURCE_TYPE - BRIG_OPCODE_UNPACKCVT = 62, //.k=SOURCE_TYPE - BRIG_OPCODE_LERP = 63, - BRIG_OPCODE_SAD = 64, //.k=SOURCE_TYPE - BRIG_OPCODE_SADHI = 65, //.k=SOURCE_TYPE - BRIG_OPCODE_SEGMENTP = 66, //.k=SEG_CVT - BRIG_OPCODE_FTOS = 67, //.k=SEG_CVT - BRIG_OPCODE_STOF = 68, //.k=SEG_CVT - BRIG_OPCODE_CMP = 69, //.k=CMP - BRIG_OPCODE_CVT = 70, //.k=CVT - BRIG_OPCODE_LD = 71, //.k=MEM //.vecOpndIndex=0 - BRIG_OPCODE_ST = 72, //.k=MEM //.vecOpndIndex=0 //.numdst=0 - BRIG_OPCODE_ATOMIC = 73, //.k=ATOMIC - BRIG_OPCODE_ATOMICNORET = 74, //.k=ATOMIC //.numdst=0 - BRIG_OPCODE_SIGNAL = 75, //.k=SIGNAL - BRIG_OPCODE_SIGNALNORET = 76, //.k=SIGNAL //.numdst=0 - BRIG_OPCODE_MEMFENCE = 77, //.k=MEM_FENCE //.numdst=0 - BRIG_OPCODE_RDIMAGE = 78, //.skip // NB: handled by IMAGE extension - BRIG_OPCODE_LDIMAGE = 79, //.skip // NB: handled by IMAGE extension - BRIG_OPCODE_STIMAGE = 80, //.skip // NB: handled by IMAGE extension - BRIG_OPCODE_IMAGEFENCE = 81, //.skip // NB: handled by IMAGE extension - BRIG_OPCODE_QUERYIMAGE = 82, //.skip // NB: handled by IMAGE extension - BRIG_OPCODE_QUERYSAMPLER = 83, //.skip // NB: handled by IMAGE extension - BRIG_OPCODE_CBR = 84, //.k=BR //.numdst=0 - BRIG_OPCODE_BR = 85, //.k=BR //.numdst=0 - BRIG_OPCODE_SBR = 86, //.k=BR //.numdst=0 //.psopnd=SbrOperands - BRIG_OPCODE_BARRIER = 87, //.k=BR //.numdst=0 - BRIG_OPCODE_WAVEBARRIER = 88, //.k=BR //.numdst=0 - BRIG_OPCODE_ARRIVEFBAR = 89, //.k=BR //.numdst=0 - BRIG_OPCODE_INITFBAR = 90, //.k=BASIC_NO_TYPE //.numdst=0 - BRIG_OPCODE_JOINFBAR = 91, //.k=BR //.numdst=0 - BRIG_OPCODE_LEAVEFBAR = 92, //.k=BR //.numdst=0 - BRIG_OPCODE_RELEASEFBAR = 93, //.k=BASIC_NO_TYPE //.numdst=0 - BRIG_OPCODE_WAITFBAR = 94, //.k=BR //.numdst=0 - BRIG_OPCODE_LDF = 95, - BRIG_OPCODE_ACTIVELANECOUNT = 96, //.k=LANE - BRIG_OPCODE_ACTIVELANEID = 97, //.k=LANE - BRIG_OPCODE_ACTIVELANEMASK = 98, //.k=LANE //.vecOpndIndex=0 - BRIG_OPCODE_ACTIVELANEPERMUTE = 99, //.k=LANE - BRIG_OPCODE_CALL = 100, //.k=BR //.psopnd=CallOperands //.numdst=0 - BRIG_OPCODE_SCALL = 101, //.k=BR //.psopnd=CallOperands //.numdst=0 - BRIG_OPCODE_ICALL = 102, //.k=BR //.psopnd=CallOperands //.numdst=0 - BRIG_OPCODE_RET = 103, //.k=BASIC_NO_TYPE - BRIG_OPCODE_ALLOCA = 104, //.k=MEM - BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105, - BRIG_OPCODE_CURRENTWORKITEMFLATID = 106, - BRIG_OPCODE_DIM = 107, - BRIG_OPCODE_GRIDGROUPS = 108, - BRIG_OPCODE_GRIDSIZE = 109, - BRIG_OPCODE_PACKETCOMPLETIONSIG = 110, - BRIG_OPCODE_PACKETID = 111, - BRIG_OPCODE_WORKGROUPID = 112, - BRIG_OPCODE_WORKGROUPSIZE = 113, - BRIG_OPCODE_WORKITEMABSID = 114, - BRIG_OPCODE_WORKITEMFLATABSID = 115, - BRIG_OPCODE_WORKITEMFLATID = 116, - BRIG_OPCODE_WORKITEMID = 117, - BRIG_OPCODE_CLEARDETECTEXCEPT = 118, //.numdst=0 - BRIG_OPCODE_GETDETECTEXCEPT = 119, - BRIG_OPCODE_SETDETECTEXCEPT = 120, //.numdst=0 - BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121, //.k=QUEUE - BRIG_OPCODE_CASQUEUEWRITEINDEX = 122, //.k=QUEUE - BRIG_OPCODE_LDQUEUEREADINDEX = 123, //.k=QUEUE - BRIG_OPCODE_LDQUEUEWRITEINDEX = 124, //.k=QUEUE - BRIG_OPCODE_STQUEUEREADINDEX = 125, //.k=QUEUE //.numdst=0 - BRIG_OPCODE_STQUEUEWRITEINDEX = 126, //.k=QUEUE //.numdst=0 - BRIG_OPCODE_CLOCK = 127, - BRIG_OPCODE_CUID = 128, - BRIG_OPCODE_DEBUGTRAP = 129, //.numdst=0 - BRIG_OPCODE_GROUPBASEPTR = 130, - BRIG_OPCODE_KERNARGBASEPTR = 131, - BRIG_OPCODE_LANEID = 132, - BRIG_OPCODE_MAXCUID = 133, - BRIG_OPCODE_MAXWAVEID = 134, - BRIG_OPCODE_NULLPTR = 135, //.k=SEG - BRIG_OPCODE_WAVEID = 136, - BRIG_OPCODE_FIRST_USER_DEFINED = 32768, //.skip -}; - -enum BrigPack { - - //.tdcaption="Packing" - // - //.mnemo={ s/^BRIG_PACK_//;s/SAT$/_sat/;lc } - //.mnemo_token=_EMPacking - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_PACK_NONE = 0, //.mnemo="" - BRIG_PACK_PP = 1, - BRIG_PACK_PS = 2, - BRIG_PACK_SP = 3, - BRIG_PACK_SS = 4, - BRIG_PACK_S = 5, - BRIG_PACK_P = 6, - BRIG_PACK_PPSAT = 7, - BRIG_PACK_PSSAT = 8, - BRIG_PACK_SPSAT = 9, - BRIG_PACK_SSSAT = 10, - BRIG_PACK_SSAT = 11, - BRIG_PACK_PSAT = 12 -}; - -enum BrigProfile { - - //.mnemo={ s/^BRIG_PROFILE_//;'$'.lc } - //.mnemo_token=ETargetProfile - // - //.print=$mnemo{ $mnemo } - - BRIG_PROFILE_BASE = 0, - BRIG_PROFILE_FULL = 1, - - BRIG_PROFILE_UNDEF = 2 //.skip -}; - -enum BrigRegisterKind { - - //.mnemo={ s/^BRIG_REGISTER_KIND_//;'$'.lc(substr($_,0,1)) } - // - //.bits={ } - //.bits_switch //.bits_proto="unsigned getRegBits(BrigRegisterKind16_t arg)" //.bits_default="return (unsigned)-1" - // - //.nollvm - - BRIG_REGISTER_KIND_CONTROL = 0, //.bits=1 - BRIG_REGISTER_KIND_SINGLE = 1, //.bits=32 - BRIG_REGISTER_KIND_DOUBLE = 2, //.bits=64 - BRIG_REGISTER_KIND_QUAD = 3 //.bits=128 -}; - -enum BrigRound { - - //.mnemo={} - //.mnemo_fn=round2str //.mnemo_token=_EMRound - // - //.sat={/_SAT$/? "true" : "false"} - //.sat_switch //.sat_proto="bool isSatRounding(unsigned rounding)" //.sat_arg="rounding" - //.sat_default="return false" - // - //.sig={/_SIGNALING_/? "true" : "false"} - //.sig_switch //.sig_proto="bool isSignalingRounding(unsigned rounding)" //.sig_arg="rounding" - //.sig_default="return false" - // - //.int={/_INTEGER_/? "true" : "false"} - //.int_switch //.int_proto="bool isIntRounding(unsigned rounding)" //.int_arg="rounding" - //.int_default="return false" - // - //.flt={/_FLOAT_/? "true" : "false"} - //.flt_switch //.flt_proto="bool isFloatRounding(unsigned rounding)" //.flt_arg="rounding" - //.flt_default="return false" - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_ROUND_NONE = 0, //.no_mnemo - BRIG_ROUND_FLOAT_DEFAULT = 1, //.no_mnemo - BRIG_ROUND_FLOAT_NEAR_EVEN = 2, //.mnemo=near - BRIG_ROUND_FLOAT_ZERO = 3, //.mnemo=zero - BRIG_ROUND_FLOAT_PLUS_INFINITY = 4, //.mnemo=up - BRIG_ROUND_FLOAT_MINUS_INFINITY = 5, //.mnemo=down - BRIG_ROUND_INTEGER_NEAR_EVEN = 6, //.mnemo=neari - BRIG_ROUND_INTEGER_ZERO = 7, //.mnemo=zeroi - BRIG_ROUND_INTEGER_PLUS_INFINITY = 8, //.mnemo=upi - BRIG_ROUND_INTEGER_MINUS_INFINITY = 9, //.mnemo=downi - BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10, //.mnemo=neari_sat - BRIG_ROUND_INTEGER_ZERO_SAT = 11, //.mnemo=zeroi_sat - BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12, //.mnemo=upi_sat - BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13, //.mnemo=downi_sat - BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14, //.mnemo=sneari - BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15, //.mnemo=szeroi - BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16, //.mnemo=supi - BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17, //.mnemo=sdowni - BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18, //.mnemo=sneari_sat - BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19, //.mnemo=szeroi_sat - BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20, //.mnemo=supi_sat - BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21 //.mnemo=sdowni_sat -}; - -enum BrigSamplerAddressing { - - //.mnemo={ s/^BRIG_ADDRESSING_//;lc } - //.mnemo_token=ESamplerAddressingMode - - BRIG_ADDRESSING_UNDEFINED = 0, - BRIG_ADDRESSING_CLAMP_TO_EDGE = 1, - BRIG_ADDRESSING_CLAMP_TO_BORDER = 2, - BRIG_ADDRESSING_REPEAT = 3, - BRIG_ADDRESSING_MIRRORED_REPEAT = 4, - - BRIG_ADDRESSING_FIRST_USER_DEFINED = 128 //.skip -}; - -enum BrigSamplerCoordNormalization { - - //.mnemo={ s/^BRIG_COORD_//;lc } - //.mnemo_token=ESamplerCoord - // - //.print=$mnemo{ $mnemo } - - BRIG_COORD_UNNORMALIZED = 0, - BRIG_COORD_NORMALIZED = 1 -}; - -enum BrigSamplerFilter { - - //.mnemo={ s/^BRIG_FILTER_//;lc } - // - //.print=$mnemo{ $mnemo } - - BRIG_FILTER_NEAREST = 0, - BRIG_FILTER_LINEAR = 1, - - BRIG_FILTER_FIRST_USER_DEFINED = 128 //.skip -}; - -enum BrigSamplerQuery { - - //.mnemo={ s/^BRIG_SAMPLER_QUERY_//;lc } - //.mnemo_token=_EMSamplerQuery - // - //.print=$mnemo{ $mnemo } - - BRIG_SAMPLER_QUERY_ADDRESSING = 0, - BRIG_SAMPLER_QUERY_COORD = 1, - BRIG_SAMPLER_QUERY_FILTER = 2 -}; - -enum BrigSectionIndex { - - //.nollvm - // - //.mnemo={ s/^BRIG_SECTION_INDEX_/HSA_/;lc } - - BRIG_SECTION_INDEX_DATA = 0, - BRIG_SECTION_INDEX_CODE = 1, - BRIG_SECTION_INDEX_OPERAND = 2, - BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3, - - // used internally - BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED = BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED //.skip -}; - -enum BrigSegCvtModifierMask { - BRIG_SEG_CVT_NONULL = 1 //.mnemo="nonull" //.print="_nonull" -}; - -enum BrigSegment { - - //.mnemo={ s/^BRIG_SEGMENT_//;lc} - //.mnemo_token=_EMSegment - //.mnemo_context=EInstModifierContext - // - //.print=$mnemo{ $mnemo ? "_$mnemo" : "" } - - BRIG_SEGMENT_NONE = 0, //.mnemo="" - BRIG_SEGMENT_FLAT = 1, //.mnemo="" - BRIG_SEGMENT_GLOBAL = 2, - BRIG_SEGMENT_READONLY = 3, - BRIG_SEGMENT_KERNARG = 4, - BRIG_SEGMENT_GROUP = 5, - BRIG_SEGMENT_PRIVATE = 6, - BRIG_SEGMENT_SPILL = 7, - BRIG_SEGMENT_ARG = 8, - - BRIG_SEGMENT_FIRST_USER_DEFINED = 128 //.skip -}; - -enum BrigPackedTypeBits { - - //.nodump - // - //.nollvm - - BRIG_TYPE_BASE_SIZE = 5, - BRIG_TYPE_PACK_SIZE = 2, - BRIG_TYPE_ARRAY_SIZE = 1, - - BRIG_TYPE_BASE_SHIFT = 0, - BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE, - BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE, - - BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1) << BRIG_TYPE_BASE_SHIFT, - BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1) << BRIG_TYPE_PACK_SHIFT, - BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT, - - BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT, - BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT, - BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT, - BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT, - - BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT -}; - -enum BrigType { - - //.numBits={ /ARRAY$/ ? undef : /([0-9]+)X([0-9]+)/ ? $1*$2 : /([0-9]+)/ ? $1 : undef } - //.numBits_switch //.numBits_proto="unsigned getBrigTypeNumBits(unsigned arg)" //.numBits_default="assert(0); return 0" - //.numBytes=$numBits{ $numBits > 1 ? $numBits/8 : undef } - //.numBytes_switch //.numBytes_proto="unsigned getBrigTypeNumBytes(unsigned arg)" //.numBytes_default="assert(0); return 0" - // - //.mnemo={ s/^BRIG_TYPE_//;lc } - //.mnemo_token=_EMType - // - //.array={/ARRAY$/?"true":"false"} - //.array_switch //.array_proto="bool isArrayType(unsigned type)" //.array_arg="type" - //.array_default="return false" - // - //.a2e={/(.*)_ARRAY$/? $1 : "BRIG_TYPE_NONE"} - //.a2e_switch //.a2e_proto="unsigned arrayType2elementType(unsigned type)" //.a2e_arg="type" - //.a2e_default="return BRIG_TYPE_NONE" - // - //.e2a={/_ARRAY$/? "BRIG_TYPE_NONE" : /_NONE$/ ? "BRIG_TYPE_NONE" : /_B1$/ ? "BRIG_TYPE_NONE" : $_ . "_ARRAY"} - //.e2a_switch //.e2a_proto="unsigned elementType2arrayType(unsigned type)" //.e2a_arg="type" - //.e2a_default="return BRIG_TYPE_NONE" - // - //.t2s={s/^BRIG_TYPE_//;lc s/_ARRAY$/[]/;lc} - //.t2s_switch //.t2s_proto="const char* type2name(unsigned type)" //.t2s_arg="type" - //.t2s_default="return NULL" - // - //.dispatch_switch //.dispatch_incfile=TemplateUtilities - //.dispatch_proto="template\nRetType dispatchByType_gen(unsigned type, Visitor& v)" - //.dispatch={ /ARRAY$/ ? "v.visitNone(type)" : /^BRIG_TYPE_([BUSF]|SIG)[0-9]+/ ? "v.template visit< BrigTypeTraits<$_> >()" : "v.visitNone(type)" } - //.dispatch_arg="type" //.dispatch_default="return v.visitNone(type)" - // - //- .tdname=BrigType - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_TYPE_NONE = 0, //.mnemo="" //.print="" - BRIG_TYPE_U8 = 1, //.ctype=uint8_t - BRIG_TYPE_U16 = 2, //.ctype=uint16_t - BRIG_TYPE_U32 = 3, //.ctype=uint32_t - BRIG_TYPE_U64 = 4, //.ctype=uint64_t - BRIG_TYPE_S8 = 5, //.ctype=int8_t - BRIG_TYPE_S16 = 6, //.ctype=int16_t - BRIG_TYPE_S32 = 7, //.ctype=int32_t - BRIG_TYPE_S64 = 8, //.ctype=int64_t - BRIG_TYPE_F16 = 9, //.ctype=f16_t - BRIG_TYPE_F32 = 10, //.ctype=float - BRIG_TYPE_F64 = 11, //.ctype=double - BRIG_TYPE_B1 = 12, //.ctype=bool //.numBytes=1 - BRIG_TYPE_B8 = 13, //.ctype=uint8_t - BRIG_TYPE_B16 = 14, //.ctype=uint16_t - BRIG_TYPE_B32 = 15, //.ctype=uint32_t - BRIG_TYPE_B64 = 16, //.ctype=uint64_t - BRIG_TYPE_B128 = 17, //.ctype=b128_t - BRIG_TYPE_SAMP = 18, //.mnemo=samp //.numBits=64 - BRIG_TYPE_ROIMG = 19, //.mnemo=roimg //.numBits=64 - BRIG_TYPE_WOIMG = 20, //.mnemo=woimg //.numBits=64 - BRIG_TYPE_RWIMG = 21, //.mnemo=rwimg //.numBits=64 - BRIG_TYPE_SIG32 = 22, //.mnemo=sig32 //.numBits=64 - BRIG_TYPE_SIG64 = 23, //.mnemo=sig64 //.numBits=64 - - BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32, //.ctype=uint8_t - BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64, //.ctype=uint8_t - BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128, //.ctype=uint8_t - BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32, //.ctype=uint16_t - BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64, //.ctype=uint16_t - BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128, //.ctype=uint16_t - BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64, //.ctype=uint32_t - BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128, //.ctype=uint32_t - BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128, //.ctype=uint64_t - BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32, //.ctype=int8_t - BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64, //.ctype=int8_t - BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128, //.ctype=int8_t - BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32, //.ctype=int16_t - BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64, //.ctype=int16_t - BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128, //.ctype=int16_t - BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64, //.ctype=int32_t - BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128, //.ctype=int32_t - BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128, //.ctype=int64_t - BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32, //.ctype=f16_t - BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64, //.ctype=f16_t - BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128, //.ctype=f16_t - BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64, //.ctype=float - BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128, //.ctype=float - BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128, //.ctype=double - - BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - - // Used internally - BRIG_TYPE_INVALID = (unsigned) -1 //.skip -}; - -enum BrigVariableModifierMask { - - //.nodump - - BRIG_VARIABLE_DEFINITION = 1, - BRIG_VARIABLE_CONST = 2 -}; - -enum BrigWidth { - - //.print={ s/^BRIG_WIDTH_//; "_width($_)" } - - BRIG_WIDTH_NONE = 0, - BRIG_WIDTH_1 = 1, - BRIG_WIDTH_2 = 2, - BRIG_WIDTH_4 = 3, - BRIG_WIDTH_8 = 4, - BRIG_WIDTH_16 = 5, - BRIG_WIDTH_32 = 6, - BRIG_WIDTH_64 = 7, - BRIG_WIDTH_128 = 8, - BRIG_WIDTH_256 = 9, - BRIG_WIDTH_512 = 10, - BRIG_WIDTH_1024 = 11, - BRIG_WIDTH_2048 = 12, - BRIG_WIDTH_4096 = 13, - BRIG_WIDTH_8192 = 14, - BRIG_WIDTH_16384 = 15, - BRIG_WIDTH_32768 = 16, - BRIG_WIDTH_65536 = 17, - BRIG_WIDTH_131072 = 18, - BRIG_WIDTH_262144 = 19, - BRIG_WIDTH_524288 = 20, - BRIG_WIDTH_1048576 = 21, - BRIG_WIDTH_2097152 = 22, - BRIG_WIDTH_4194304 = 23, - BRIG_WIDTH_8388608 = 24, - BRIG_WIDTH_16777216 = 25, - BRIG_WIDTH_33554432 = 26, - BRIG_WIDTH_67108864 = 27, - BRIG_WIDTH_134217728 = 28, - BRIG_WIDTH_268435456 = 29, - BRIG_WIDTH_536870912 = 30, - BRIG_WIDTH_1073741824 = 31, - BRIG_WIDTH_2147483648 = 32, - BRIG_WIDTH_WAVESIZE = 33, - BRIG_WIDTH_ALL = 34, - - BRIG_WIDTH_LAST //.skip -}; - -struct BrigUInt64 { //.isroot //.standalone - uint32_t lo; //.defValue=0 - uint32_t hi; //.defValue=0 - - //+hcode KLASS& operator=(uint64_t rhs); - //+hcode operator uint64_t(); - //+implcode inline KLASS& KLASS::operator=(uint64_t rhs) { lo() = (uint32_t)rhs; hi() = (uint32_t)(rhs >> 32); return *this; } - //+implcode inline KLASS::operator uint64_t() { return ((uint64_t)hi()) << 32 | lo(); } -}; - -struct BrigAluModifier { //.isroot //.standalone - BrigAluModifier8_t allBits; //.defValue=0 - //^^ bool ftz; //.wtype=BitValRef<0> -}; - -struct BrigBase { //.nowrap - uint16_t byteCount; - BrigKind16_t kind; -}; - -//.alias Code:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_CODE }; -//.alias Directive:Code { //.generic }; -//.alias Operand:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_OPERAND }; - -struct BrigData { - //.nowrap - uint32_t byteCount; - uint8_t bytes[1]; -}; - -struct BrigExecutableModifier { //.isroot //.standalone - BrigExecutableModifier8_t allBits; //.defValue=0 - //^^ bool isDefinition; //.wtype=BitValRef<0> -}; - -struct BrigMemoryModifier { //.isroot //.standalone - BrigMemoryModifier8_t allBits; //.defValue=0 - //^^ bool isConst; //.wtype=BitValRef<0> -}; - -struct BrigSegCvtModifier { //.isroot //.standalone - BrigSegCvtModifier8_t allBits; //.defValue=0 - //^^ bool isNoNull; //.wtype=BitValRef<0> -}; - -struct BrigVariableModifier { //.isroot //.standalone - BrigVariableModifier8_t allBits; //.defValue=0 - - //^^ bool isDefinition; //.wtype=BitValRef<0> - //^^ bool isConst; //.wtype=BitValRef<1> -}; - -struct BrigDirectiveArgBlockEnd { - BrigBase base; -}; - -struct BrigDirectiveArgBlockStart { - BrigBase base; -}; - -struct BrigDirectiveComment { - BrigBase base; - BrigDataOffsetString32_t name; -}; - -struct BrigDirectiveControl { - BrigBase base; - BrigControlDirective16_t control; - uint16_t reserved; //.defValue=0 - BrigDataOffsetOperandList32_t operands; -}; - -struct BrigDirectiveExecutable { //.generic - BrigBase base; - BrigDataOffsetString32_t name; - uint16_t outArgCount; //.defValue=0 - uint16_t inArgCount; //.defValue=0 - BrigCodeOffset32_t firstInArg; - BrigCodeOffset32_t firstCodeBlockEntry; - BrigCodeOffset32_t nextModuleEntry; - BrigExecutableModifier modifier; //.acc=subItem //.wtype=ExecutableModifier - BrigLinkage8_t linkage; - uint16_t reserved; //.defValue=0 -}; - -//.alias DirectiveKernel:DirectiveExecutable { }; -//.alias DirectiveFunction:DirectiveExecutable { }; -//.alias DirectiveSignature:DirectiveExecutable { }; -//.alias DirectiveIndirectFunction:DirectiveExecutable { }; - -struct BrigDirectiveExtension { - BrigBase base; - BrigDataOffsetString32_t name; -}; - -struct BrigDirectiveFbarrier { - BrigBase base; - BrigDataOffsetString32_t name; - BrigVariableModifier modifier; //.acc=subItem //.wtype=VariableModifier - BrigLinkage8_t linkage; - uint16_t reserved; //.defValue=0 -}; - -struct BrigDirectiveLabel { - BrigBase base; - BrigDataOffsetString32_t name; -}; - -struct BrigDirectiveLoc { - BrigBase base; - BrigDataOffsetString32_t filename; - uint32_t line; - uint32_t column; //.defValue=1 -}; - -struct BrigDirectiveNone { //.enum=BRIG_KIND_NONE - BrigBase base; -}; - -struct BrigDirectivePragma { - BrigBase base; - BrigDataOffsetOperandList32_t operands; -}; - -struct BrigDirectiveVariable { - BrigBase base; - BrigDataOffsetString32_t name; - BrigOperandOffset32_t init; - BrigType16_t type; - - //+hcode bool isArray(); - //+implcode inline bool KLASS::isArray() { return isArrayType(type()); } - - //+hcode unsigned elementType(); - //+implcode inline unsigned KLASS::elementType() { return isArray()? arrayType2elementType(type()) : type(); } - - BrigSegment8_t segment; - BrigAlignment8_t align; - BrigUInt64 dim; //.acc=subItem //.wtype=UInt64 - BrigVariableModifier modifier; //.acc=subItem //.wtype=VariableModifier - BrigLinkage8_t linkage; - BrigAllocation8_t allocation; - uint8_t reserved; //.defValue=0 -}; - -struct BrigDirectiveModule { - BrigBase base; - BrigDataOffsetString32_t name; - BrigVersion32_t hsailMajor; //.wtype=ValRef - BrigVersion32_t hsailMinor; //.wtype=ValRef - BrigProfile8_t profile; - BrigMachineModel8_t machineModel; - BrigRound8_t defaultFloatRound; - uint8_t reserved; //.defValue=0 -}; - -struct BrigInstBase { //.wname=Inst //.generic //.parent=BrigCode - BrigBase base; - BrigOpcode16_t opcode; - BrigType16_t type; - BrigDataOffsetOperandList32_t operands; - - //+hcode Operand operand(int index); - //+implcode inline Operand KLASS::operand(int index) { return operands()[index]; } -}; - -struct BrigInstAddr { - BrigInstBase base; - BrigSegment8_t segment; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstAtomic { - BrigInstBase base; - BrigSegment8_t segment; - BrigMemoryOrder8_t memoryOrder; - BrigMemoryScope8_t memoryScope; - BrigAtomicOperation8_t atomicOperation; - uint8_t equivClass; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstBasic { - BrigInstBase base; -}; - -struct BrigInstBr { - BrigInstBase base; - BrigWidth8_t width; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstCmp { - BrigInstBase base; - BrigType16_t sourceType; - BrigAluModifier modifier; //.acc=subItem //.wtype=AluModifier - BrigCompareOperation8_t compare; - BrigPack8_t pack; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstCvt { - BrigInstBase base; - BrigType16_t sourceType; - BrigAluModifier modifier; //.acc=subItem //.wtype=AluModifier - BrigRound8_t round; -}; - -struct BrigInstImage { - BrigInstBase base; - BrigType16_t imageType; - BrigType16_t coordType; - BrigImageGeometry8_t geometry; - uint8_t equivClass; - uint16_t reserved; //.defValue=0 -}; - -struct BrigInstLane { - BrigInstBase base; - BrigType16_t sourceType; - BrigWidth8_t width; - uint8_t reserved; //.defValue=0 -}; - -struct BrigInstMem { - BrigInstBase base; - BrigSegment8_t segment; - BrigAlignment8_t align; - uint8_t equivClass; - BrigWidth8_t width; - BrigMemoryModifier modifier; //.acc=subItem //.wtype=MemoryModifier - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstMemFence { - BrigInstBase base; - BrigMemoryOrder8_t memoryOrder; - BrigMemoryScope8_t globalSegmentMemoryScope; - BrigMemoryScope8_t groupSegmentMemoryScope; - BrigMemoryScope8_t imageSegmentMemoryScope; -}; - -struct BrigInstMod { - BrigInstBase base; - BrigAluModifier modifier; //.acc=subItem //.wtype=AluModifier - BrigRound8_t round; - BrigPack8_t pack; - uint8_t reserved; //.defValue=0 -}; - -struct BrigInstQueryImage { - BrigInstBase base; - BrigType16_t imageType; - BrigImageGeometry8_t geometry; - BrigImageQuery8_t imageQuery; -}; - -struct BrigInstQuerySampler { - BrigInstBase base; - BrigSamplerQuery8_t samplerQuery; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstQueue { - BrigInstBase base; - BrigSegment8_t segment; - BrigMemoryOrder8_t memoryOrder; - uint16_t reserved; //.defValue=0 -}; - -struct BrigInstSeg { - BrigInstBase base; - BrigSegment8_t segment; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstSegCvt { - BrigInstBase base; - BrigType16_t sourceType; - BrigSegment8_t segment; - BrigSegCvtModifier modifier; //.acc=subItem //.wtype=SegCvtModifier -}; - -struct BrigInstSignal { - BrigInstBase base; - BrigType16_t signalType; - BrigMemoryOrder8_t memoryOrder; - BrigAtomicOperation8_t signalOperation; -}; - -struct BrigInstSourceType { - BrigInstBase base; - BrigType16_t sourceType; - uint16_t reserved; //.defValue=0 -}; - -struct BrigOperandAddress { - BrigBase base; - BrigCodeOffset32_t symbol; //.wtype=ItemRef - BrigOperandOffset32_t reg; //.wtype=ItemRef - BrigUInt64 offset; //.acc=subItem //.wtype=UInt64 -}; - -struct BrigOperandAlign { - BrigBase base; - BrigAlignment8_t align; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigOperandCodeList { - BrigBase base; - BrigDataOffsetCodeList32_t elements; - - //+hcode unsigned elementCount(); - //+implcode inline unsigned KLASS::elementCount() { return elements().size(); } - //+hcode Code elements(int index); - //+implcode inline Code KLASS::elements(int index) { return elements()[index]; } -}; - -struct BrigOperandCodeRef { - BrigBase base; - BrigCodeOffset32_t ref; -}; - -struct BrigOperandConstantBytes { - BrigBase base; - BrigType16_t type; //.defValue=0 - uint16_t reserved; //.defValue=0 - BrigDataOffsetString32_t bytes; -}; - -struct BrigOperandConstantOperandList { - BrigBase base; - BrigType16_t type; - uint16_t reserved; //.defValue=0 - BrigDataOffsetOperandList32_t elements; - - //+hcode unsigned elementCount(); - //+implcode inline unsigned KLASS::elementCount() { return elements().size(); } - //+hcode Operand elements(int index); - //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; } -}; - -struct BrigOperandConstantImage { - BrigBase base; - BrigType16_t type; - BrigImageGeometry8_t geometry; - BrigImageChannelOrder8_t channelOrder; - BrigImageChannelType8_t channelType; - uint8_t reserved[3]; //.defValue=0 - BrigUInt64 width; //.acc=subItem //.wtype=UInt64 - BrigUInt64 height; //.acc=subItem //.wtype=UInt64 - BrigUInt64 depth; //.acc=subItem //.wtype=UInt64 - BrigUInt64 array; //.acc=subItem //.wtype=UInt64 -}; - -struct BrigOperandOperandList { - BrigBase base; - BrigDataOffsetOperandList32_t elements; - - //+hcode unsigned elementCount(); - //+implcode inline unsigned KLASS::elementCount() { return elements().size(); } - //+hcode Operand elements(int index); - //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; } -}; - -struct BrigOperandRegister { - BrigBase base; - BrigRegisterKind16_t regKind; - uint16_t regNum; -}; - -struct BrigOperandConstantSampler { - BrigBase base; - BrigType16_t type; - BrigSamplerCoordNormalization8_t coord; - BrigSamplerFilter8_t filter; - BrigSamplerAddressing8_t addressing; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigOperandString { - BrigBase base; - BrigDataOffsetString32_t string; -}; - -struct BrigOperandWavesize { - BrigBase base; -}; - -//.ignore{ - -enum BrigExceptionsMask { - BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0, - BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1, - BRIG_EXCEPTIONS_OVERFLOW = 1 << 2, - BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3, - BRIG_EXCEPTIONS_INEXACT = 1 << 4, - - BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16 -}; - -struct BrigSectionHeader { - uint64_t byteCount; - uint32_t headerByteCount; - uint32_t nameLength; - uint8_t name[1]; -}; - -#define MODULE_IDENTIFICATION_LENGTH (8) - -struct BrigModuleHeader { - char identification[MODULE_IDENTIFICATION_LENGTH]; - BrigVersion32_t brigMajor; - BrigVersion32_t brigMinor; - uint64_t byteCount; - uint8_t hash[64]; - uint32_t reserved; - uint32_t sectionCount; - uint64_t sectionIndex; -}; - -typedef BrigModuleHeader* BrigModule_t; - -#endif // defined(INCLUDED_BRIG_H) -//} diff --git a/runtime/hsa-runtime/inc/amd_hsa_common.h b/runtime/hsa-runtime/inc/amd_hsa_common.h deleted file mode 100644 index 92aba8ed2e..0000000000 --- a/runtime/hsa-runtime/inc/amd_hsa_common.h +++ /dev/null @@ -1,91 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// The following set of header files provides definitions for AMD GPU -// Architecture: -// - amd_hsa_common.h -// - amd_hsa_elf.h -// - amd_hsa_kernel_code.h -// - amd_hsa_queue.h -// - amd_hsa_signal.h -// -// Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more -// information. - -#ifndef AMD_HSA_COMMON_H -#define AMD_HSA_COMMON_H - -#include -#include - -// Descriptive version of the HSA Application Binary Interface. -#define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)" - -// Alignment attribute that specifies a minimum alignment (in bytes) for -// variables of the specified type. -#if defined(__GNUC__) -# define __ALIGNED__(x) __attribute__((aligned(x))) -#elif defined(_MSC_VER) -# define __ALIGNED__(x) __declspec(align(x)) -#elif defined(RC_INVOKED) -# define __ALIGNED__(x) -#else -# error -#endif - -// Creates enumeration entries for packed types. Enumeration entries include -// bit shift amount, bit width, and bit mask. -#define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width) \ - name ## _SHIFT = (shift), \ - name ## _WIDTH = (width), \ - name = (((1 << (width)) - 1) << (shift)) \ - -// Gets bits for specified mask from specified src packed instance. -#define AMD_HSA_BITS_GET(src, mask) \ - ((src & mask) >> mask ## _SHIFT) \ - -// Sets val bits for specified mask in specified dst packed instance. -#define AMD_HSA_BITS_SET(dst, mask, val) \ - dst &= (~(1 << mask ## _SHIFT) & ~mask); \ - dst |= (((val) << mask ## _SHIFT) & mask) \ - -#endif // AMD_HSA_COMMON_H diff --git a/runtime/hsa-runtime/inc/amd_hsa_elf.h b/runtime/hsa-runtime/inc/amd_hsa_elf.h deleted file mode 100644 index 941aeeb389..0000000000 --- a/runtime/hsa-runtime/inc/amd_hsa_elf.h +++ /dev/null @@ -1,295 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef AMD_HSA_ELF_H -#define AMD_HSA_ELF_H - -#include "amd_hsa_common.h" - -// ELF Header Enumeration Values. -#define EM_AMDGPU 224 -#define ELFOSABI_AMDGPU_HSA 64 -#define ELFABIVERSION_AMDGPU_HSA 0 -#define EF_AMDGPU_XNACK 0x00000001 -#define EF_AMDGPU_TRAP_HANDLER 0x00000002 - -// ELF Section Header Flag Enumeration Values. -#define SHF_AMDGPU_HSA_GLOBAL (0x00100000 & SHF_MASKOS) -#define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS) -#define SHF_AMDGPU_HSA_CODE (0x00400000 & SHF_MASKOS) -#define SHF_AMDGPU_HSA_AGENT (0x00800000 & SHF_MASKOS) - -// -typedef enum { - AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0, - AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1, - AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2, - AMDGPU_HSA_SEGMENT_CODE_AGENT = 3, - AMDGPU_HSA_SEGMENT_LAST, -} amdgpu_hsa_elf_segment_t; - -// ELF Program Header Type Enumeration Values. -#define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM) -#define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT) -#define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT) -#define PT_AMDGPU_HSA_LOAD_CODE_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT) - -// ELF Symbol Type Enumeration Values. -#define STT_AMDGPU_HSA_KERNEL (STT_LOOS + 0) -#define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1) -#define STT_AMDGPU_HSA_METADATA (STT_LOOS + 2) - -// ELF Symbol Binding Enumeration Values. -#define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0) - -// ELF Symbol Other Information Creation/Retrieval. -#define ELF64_ST_AMDGPU_ALLOCATION(o) (((o) >> 2) & 0x3) -#define ELF64_ST_AMDGPU_FLAGS(o) ((o) >> 4) -#define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3)) - -typedef enum { - AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0, - AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1, - AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2, - AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3, - AMDGPU_HSA_SYMBOL_ALLOCATION_LAST, -} amdgpu_hsa_symbol_allocation_t; - -// ELF Symbol Allocation Enumeration Values. -#define STA_AMDGPU_HSA_DEFAULT AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT -#define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM -#define STA_AMDGPU_HSA_GLOBAL_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT -#define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT - -typedef enum { - AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0, - AMDGPU_HSA_SYMBOL_FLAG_CONST = 1, - AMDGPU_HSA_SYMBOL_FLAG_LAST, -} amdgpu_hsa_symbol_flag_t; - -// ELF Symbol Flag Enumeration Values. -#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST - -// AMD GPU Relocation Type Enumeration Values. -#define R_AMDGPU_NONE 0 -#define R_AMDGPU_32_LOW 1 -#define R_AMDGPU_32_HIGH 2 -#define R_AMDGPU_64 3 -#define R_AMDGPU_INIT_SAMPLER 4 -#define R_AMDGPU_INIT_IMAGE 5 - -// AMD GPU Note Type Enumeration Values. -#define NT_AMDGPU_HSA_CODE_OBJECT_VERSION 1 -#define NT_AMDGPU_HSA_HSAIL 2 -#define NT_AMDGPU_HSA_ISA 3 -#define NT_AMDGPU_HSA_PRODUCER 4 -#define NT_AMDGPU_HSA_PRODUCER_OPTIONS 5 -#define NT_AMDGPU_HSA_EXTENSION 6 -#define NT_AMDGPU_HSA_HLDEBUG_DEBUG 101 -#define NT_AMDGPU_HSA_HLDEBUG_TARGET 102 - -// AMD GPU Metadata Kind Enumeration Values. -typedef uint16_t amdgpu_hsa_metadata_kind16_t; -typedef enum { - AMDGPU_HSA_METADATA_KIND_NONE = 0, - AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1, - AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2, - AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3, - AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4 -} amdgpu_hsa_metadata_kind_t; - -// AMD GPU Sampler Coordinate Normalization Enumeration Values. -typedef uint8_t amdgpu_hsa_sampler_coord8_t; -typedef enum { - AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0, - AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1 -} amdgpu_hsa_sampler_coord_t; - -// AMD GPU Sampler Filter Enumeration Values. -typedef uint8_t amdgpu_hsa_sampler_filter8_t; -typedef enum { - AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0, - AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1 -} amdgpu_hsa_sampler_filter_t; - -// AMD GPU Sampler Addressing Enumeration Values. -typedef uint8_t amdgpu_hsa_sampler_addressing8_t; -typedef enum { - AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0, - AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1, - AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2, - AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3, - AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4 -} amdgpu_hsa_sampler_addressing_t; - -// AMD GPU Sampler Descriptor. -typedef struct amdgpu_hsa_sampler_descriptor_s { - uint16_t size; - amdgpu_hsa_metadata_kind16_t kind; - amdgpu_hsa_sampler_coord8_t coord; - amdgpu_hsa_sampler_filter8_t filter; - amdgpu_hsa_sampler_addressing8_t addressing; - uint8_t reserved1; -} amdgpu_hsa_sampler_descriptor_t; - -// AMD GPU Image Geometry Enumeration Values. -typedef uint8_t amdgpu_hsa_image_geometry8_t; -typedef enum { - AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0, - AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1, - AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2, - AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3, - AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4, - AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5, - AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6, - AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7 -} amdgpu_hsa_image_geometry_t; - -// AMD GPU Image Channel Order Enumeration Values. -typedef uint8_t amdgpu_hsa_image_channel_order8_t; -typedef enum { - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18, - AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19 -} amdgpu_hsa_image_channel_order_t; - -// AMD GPU Image Channel Type Enumeration Values. -typedef uint8_t amdgpu_hsa_image_channel_type8_t; -typedef enum { - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14, - AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15 -} amdgpu_hsa_image_channel_type_t; - -// AMD GPU Image Descriptor. -typedef struct amdgpu_hsa_image_descriptor_s { - uint16_t size; - amdgpu_hsa_metadata_kind16_t kind; - amdgpu_hsa_image_geometry8_t geometry; - amdgpu_hsa_image_channel_order8_t channel_order; - amdgpu_hsa_image_channel_type8_t channel_type; - uint8_t reserved1; - uint64_t width; - uint64_t height; - uint64_t depth; - uint64_t array; -} amdgpu_hsa_image_descriptor_t; - -typedef struct amdgpu_hsa_note_code_object_version_s { - uint32_t major_version; - uint32_t minor_version; -} amdgpu_hsa_note_code_object_version_t; - -typedef struct amdgpu_hsa_note_hsail_s { - uint32_t hsail_major_version; - uint32_t hsail_minor_version; - uint8_t profile; - uint8_t machine_model; - uint8_t default_float_round; -} amdgpu_hsa_note_hsail_t; - -typedef struct amdgpu_hsa_note_isa_s { - uint16_t vendor_name_size; - uint16_t architecture_name_size; - uint32_t major; - uint32_t minor; - uint32_t stepping; - char vendor_and_architecture_name[1]; -} amdgpu_hsa_note_isa_t; - -typedef struct amdgpu_hsa_note_producer_s { - uint16_t producer_name_size; - uint16_t reserved; - uint32_t producer_major_version; - uint32_t producer_minor_version; - char producer_name[1]; -} amdgpu_hsa_note_producer_t; - -typedef struct amdgpu_hsa_note_producer_options_s { - uint16_t producer_options_size; - char producer_options[1]; -} amdgpu_hsa_note_producer_options_t; - -typedef enum { - AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0, - AMDGPU_HSA_RODATA_GLOBAL_AGENT, - AMDGPU_HSA_RODATA_READONLY_AGENT, - AMDGPU_HSA_DATA_GLOBAL_PROGRAM, - AMDGPU_HSA_DATA_GLOBAL_AGENT, - AMDGPU_HSA_DATA_READONLY_AGENT, - AMDGPU_HSA_BSS_GLOBAL_PROGRAM, - AMDGPU_HSA_BSS_GLOBAL_AGENT, - AMDGPU_HSA_BSS_READONLY_AGENT, - AMDGPU_HSA_SECTION_LAST, -} amdgpu_hsa_elf_section_t; - -#endif // AMD_HSA_ELF_H diff --git a/runtime/hsa-runtime/inc/amd_hsa_kernel_code.h b/runtime/hsa-runtime/inc/amd_hsa_kernel_code.h deleted file mode 100644 index 12f096b432..0000000000 --- a/runtime/hsa-runtime/inc/amd_hsa_kernel_code.h +++ /dev/null @@ -1,271 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef AMD_HSA_KERNEL_CODE_H -#define AMD_HSA_KERNEL_CODE_H - -#include "amd_hsa_common.h" -#include "hsa.h" - -// AMD Kernel Code Version Enumeration Values. -typedef uint32_t amd_kernel_code_version32_t; -enum amd_kernel_code_version_t { - AMD_KERNEL_CODE_VERSION_MAJOR = 1, - AMD_KERNEL_CODE_VERSION_MINOR = 1 -}; - -// AMD Machine Kind Enumeration Values. -typedef uint16_t amd_machine_kind16_t; -enum amd_machine_kind_t { - AMD_MACHINE_KIND_UNDEFINED = 0, - AMD_MACHINE_KIND_AMDGPU = 1 -}; - -// AMD Machine Version. -typedef uint16_t amd_machine_version16_t; - -// AMD Float Round Mode Enumeration Values. -enum amd_float_round_mode_t { - AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0, - AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1, - AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2, - AMD_FLOAT_ROUND_MODE_ZERO = 3 -}; - -// AMD Float Denorm Mode Enumeration Values. -enum amd_float_denorm_mode_t { - AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0, - AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1, - AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2, - AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3 -}; - -// AMD Compute Program Resource Register One. -typedef uint32_t amd_compute_pgm_rsrc_one32_t; -enum amd_compute_pgm_rsrc_one_t { - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED1, 26, 6) -}; - -// AMD System VGPR Workitem ID Enumeration Values. -enum amd_system_vgpr_workitem_id_t { - AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0, - AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1, - AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2, - AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3 -}; - -// AMD Compute Program Resource Register Two. -typedef uint32_t amd_compute_pgm_rsrc_two32_t; -enum amd_compute_pgm_rsrc_two_t { - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1) -}; - -// AMD Element Byte Size Enumeration Values. -enum amd_element_byte_size_t { - AMD_ELEMENT_BYTE_SIZE_2 = 0, - AMD_ELEMENT_BYTE_SIZE_4 = 1, - AMD_ELEMENT_BYTE_SIZE_8 = 2, - AMD_ELEMENT_BYTE_SIZE_16 = 3 -}; - -// AMD Kernel Code Properties. -typedef uint32_t amd_kernel_code_properties32_t; -enum amd_kernel_code_properties_t { - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 10, 6), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9) -}; - -// AMD Power Of Two Enumeration Values. -typedef uint8_t amd_powertwo8_t; -enum amd_powertwo_t { - AMD_POWERTWO_1 = 0, - AMD_POWERTWO_2 = 1, - AMD_POWERTWO_4 = 2, - AMD_POWERTWO_8 = 3, - AMD_POWERTWO_16 = 4, - AMD_POWERTWO_32 = 5, - AMD_POWERTWO_64 = 6, - AMD_POWERTWO_128 = 7, - AMD_POWERTWO_256 = 8 -}; - -// AMD Enabled Control Directive Enumeration Values. -typedef uint64_t amd_enabled_control_directive64_t; -enum amd_enabled_control_directive_t { - AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1, - AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2, - AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4, - AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8, - AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16, - AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32, - AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64, - AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128, - AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256 -}; - -// AMD Exception Kind Enumeration Values. -typedef uint16_t amd_exception_kind16_t; -enum amd_exception_kind_t { - AMD_EXCEPTION_KIND_INVALID_OPERATION = 1, - AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2, - AMD_EXCEPTION_KIND_OVERFLOW = 4, - AMD_EXCEPTION_KIND_UNDERFLOW = 8, - AMD_EXCEPTION_KIND_INEXACT = 16 -}; - -// AMD Control Directives. -#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64 -#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES) -typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s { - amd_enabled_control_directive64_t enabled_control_directives; - uint16_t enable_break_exceptions; - uint16_t enable_detect_exceptions; - uint32_t max_dynamic_group_size; - uint64_t max_flat_grid_size; - uint32_t max_flat_workgroup_size; - uint8_t required_dim; - uint8_t reserved1[3]; - uint64_t required_grid_size[3]; - uint32_t required_workgroup_size[3]; - uint8_t reserved2[60]; -} amd_control_directives_t; - -// AMD Kernel Code. -#define AMD_ISA_ALIGN_BYTES 256 -#define AMD_KERNEL_CODE_ALIGN_BYTES 64 -#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES) -typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s { - amd_kernel_code_version32_t amd_kernel_code_version_major; - amd_kernel_code_version32_t amd_kernel_code_version_minor; - amd_machine_kind16_t amd_machine_kind; - amd_machine_version16_t amd_machine_version_major; - amd_machine_version16_t amd_machine_version_minor; - amd_machine_version16_t amd_machine_version_stepping; - int64_t kernel_code_entry_byte_offset; - int64_t kernel_code_prefetch_byte_offset; - uint64_t kernel_code_prefetch_byte_size; - uint64_t max_scratch_backing_memory_byte_size; - amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1; - amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2; - amd_kernel_code_properties32_t kernel_code_properties; - uint32_t workitem_private_segment_byte_size; - uint32_t workgroup_group_segment_byte_size; - uint32_t gds_segment_byte_size; - uint64_t kernarg_segment_byte_size; - uint32_t workgroup_fbarrier_count; - uint16_t wavefront_sgpr_count; - uint16_t workitem_vgpr_count; - uint16_t reserved_vgpr_first; - uint16_t reserved_vgpr_count; - uint16_t reserved_sgpr_first; - uint16_t reserved_sgpr_count; - uint16_t debug_wavefront_private_segment_offset_sgpr; - uint16_t debug_private_segment_buffer_sgpr; - amd_powertwo8_t kernarg_segment_alignment; - amd_powertwo8_t group_segment_alignment; - amd_powertwo8_t private_segment_alignment; - amd_powertwo8_t wavefront_size; - int32_t call_convention; - uint8_t reserved1[12]; - uint64_t runtime_loader_kernel_symbol; - amd_control_directives_t control_directives; -} amd_kernel_code_t; - -// TODO: this struct should be completely gone once debugger designs/implements -// Debugger APIs. -typedef struct amd_runtime_loader_debug_info_s { - const void* elf_raw; - size_t elf_size; - const char *kernel_name; - const void *owning_segment; - hsa_profile_t profile; - uint64_t gpuva; -} amd_runtime_loader_debug_info_t; - -#endif // AMD_HSA_KERNEL_CODE_H diff --git a/runtime/hsa-runtime/inc/amd_hsa_queue.h b/runtime/hsa-runtime/inc/amd_hsa_queue.h deleted file mode 100644 index b37bb53f36..0000000000 --- a/runtime/hsa-runtime/inc/amd_hsa_queue.h +++ /dev/null @@ -1,86 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef AMD_HSA_QUEUE_H -#define AMD_HSA_QUEUE_H - -#include "amd_hsa_common.h" -#include "hsa.h" - -// AMD Queue Properties. -typedef uint32_t amd_queue_properties32_t; -enum amd_queue_properties_t { - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1), - AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 4, 28) -}; - -// AMD Queue. -#define AMD_QUEUE_ALIGN_BYTES 64 -#define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES) -typedef struct AMD_QUEUE_ALIGN amd_queue_s { - hsa_queue_t hsa_queue; - uint32_t reserved1[4]; - volatile uint64_t write_dispatch_id; - uint32_t group_segment_aperture_base_hi; - uint32_t private_segment_aperture_base_hi; - uint32_t max_cu_id; - uint32_t max_wave_id; - volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1; - volatile uint32_t legacy_doorbell_lock; - uint32_t reserved2[9]; - volatile uint64_t read_dispatch_id; - uint32_t read_dispatch_id_field_base_byte_offset; - uint32_t compute_tmpring_size; - uint32_t scratch_resource_descriptor[4]; - uint64_t scratch_backing_memory_location; - uint64_t scratch_backing_memory_byte_size; - uint32_t scratch_workitem_byte_size; - amd_queue_properties32_t queue_properties; - uint32_t reserved3[2]; - hsa_signal_t queue_inactive_signal; - uint32_t reserved4[14]; -} amd_queue_t; - -#endif // AMD_HSA_QUEUE_H diff --git a/runtime/hsa-runtime/inc/amd_hsa_signal.h b/runtime/hsa-runtime/inc/amd_hsa_signal.h deleted file mode 100644 index deefc8f025..0000000000 --- a/runtime/hsa-runtime/inc/amd_hsa_signal.h +++ /dev/null @@ -1,80 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef AMD_HSA_SIGNAL_H -#define AMD_HSA_SIGNAL_H - -#include "amd_hsa_common.h" -#include "amd_hsa_queue.h" - -// AMD Signal Kind Enumeration Values. -typedef int64_t amd_signal_kind64_t; -enum amd_signal_kind_t { - AMD_SIGNAL_KIND_INVALID = 0, - AMD_SIGNAL_KIND_USER = 1, - AMD_SIGNAL_KIND_DOORBELL = -1, - AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2 -}; - -// AMD Signal. -#define AMD_SIGNAL_ALIGN_BYTES 64 -#define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES) -typedef struct AMD_SIGNAL_ALIGN amd_signal_s { - amd_signal_kind64_t kind; - union { - volatile int64_t value; - volatile uint32_t* legacy_hardware_doorbell_ptr; - volatile uint64_t* hardware_doorbell_ptr; - }; - uint64_t event_mailbox_ptr; - uint32_t event_id; - uint32_t reserved1; - uint64_t start_ts; - uint64_t end_ts; - union { - amd_queue_t* queue_ptr; - uint64_t reserved2; - }; - uint32_t reserved3[2]; -} amd_signal_t; - -#endif // AMD_HSA_SIGNAL_H diff --git a/runtime/hsa-runtime/inc/hsa.h b/runtime/hsa-runtime/inc/hsa.h deleted file mode 100644 index 159ef07d24..0000000000 --- a/runtime/hsa-runtime/inc/hsa.h +++ /dev/null @@ -1,3728 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_INC_HSA_H_ -#define HSA_RUNTIME_INC_HSA_H_ - -#include /* size_t */ -#include /* uintXX_t */ -#ifndef __cplusplus -#include -#endif /* __cplusplus */ - -// Placeholder for calling convention and import/export macros -#ifndef HSA_CALL -#define HSA_CALL -#endif - -#ifndef HSA_EXPORT_DECORATOR -#ifdef __GNUC__ -#define HSA_EXPORT_DECORATOR __attribute__ ((visibility ("default"))) -#else -#define HSA_EXPORT_DECORATOR -#endif -#endif - -#define HSA_API_EXPORT HSA_EXPORT_DECORATOR HSA_CALL -#define HSA_API_IMPORT HSA_CALL - -#if !defined(HSA_API) && defined(HSA_EXPORT) -#define HSA_API HSA_API_EXPORT -#else -#define HSA_API HSA_API_IMPORT -#endif - -// Detect and set large model builds. -#undef HSA_LARGE_MODEL -#if defined(__LP64__) || defined(_M_X64) -#define HSA_LARGE_MODEL -#endif - -// Try to detect CPU endianness -#if !defined(LITTLEENDIAN_CPU) && !defined(BIGENDIAN_CPU) -#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \ - defined(_M_X64) -#define LITTLEENDIAN_CPU -#endif -#endif - -#undef HSA_LITTLE_ENDIAN -#if defined(LITTLEENDIAN_CPU) -#define HSA_LITTLE_ENDIAN -#elif defined(BIGENDIAN_CPU) -#else -#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" -#endif - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** \defgroup status Runtime Notifications - * @{ - */ - -/** - * @brief Status codes. - */ -typedef enum { - /** - * The function has been executed successfully. - */ - HSA_STATUS_SUCCESS = 0x0, - /** - * A traversal over a list of elements has been interrupted by the - * application before completing. - */ - HSA_STATUS_INFO_BREAK = 0x1, - /** - * A generic error has occurred. - */ - HSA_STATUS_ERROR = 0x1000, - /** - * One of the actual arguments does not meet a precondition stated in the - * documentation of the corresponding formal argument. - */ - HSA_STATUS_ERROR_INVALID_ARGUMENT = 0x1001, - /** - * The requested queue creation is not valid. - */ - HSA_STATUS_ERROR_INVALID_QUEUE_CREATION = 0x1002, - /** - * The requested allocation is not valid. - */ - HSA_STATUS_ERROR_INVALID_ALLOCATION = 0x1003, - /** - * The agent is invalid. - */ - HSA_STATUS_ERROR_INVALID_AGENT = 0x1004, - /** - * The memory region is invalid. - */ - HSA_STATUS_ERROR_INVALID_REGION = 0x1005, - /** - * The signal is invalid. - */ - HSA_STATUS_ERROR_INVALID_SIGNAL = 0x1006, - /** - * The queue is invalid. - */ - HSA_STATUS_ERROR_INVALID_QUEUE = 0x1007, - /** - * The HSA runtime failed to allocate the necessary resources. This error - * may also occur when the HSA runtime needs to spawn threads or create - * internal OS-specific events. - */ - HSA_STATUS_ERROR_OUT_OF_RESOURCES = 0x1008, - /** - * The AQL packet is malformed. - */ - HSA_STATUS_ERROR_INVALID_PACKET_FORMAT = 0x1009, - /** - * An error has been detected while releasing a resource. - */ - HSA_STATUS_ERROR_RESOURCE_FREE = 0x100A, - /** - * An API other than ::hsa_init has been invoked while the reference count - * of the HSA runtime is 0. - */ - HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B, - /** - * The maximum reference count for the object has been reached. - */ - HSA_STATUS_ERROR_REFCOUNT_OVERFLOW = 0x100C, - /** - * The arguments passed to a functions are not compatible. - */ - HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS = 0x100D, - /** - * The index is invalid. - */ - HSA_STATUS_ERROR_INVALID_INDEX = 0x100E, - /** - * The instruction set architecture is invalid. - */ - HSA_STATUS_ERROR_INVALID_ISA = 0x100F, - /** - * The instruction set architecture name is invalid. - */ - HSA_STATUS_ERROR_INVALID_ISA_NAME = 0x1017, - /** - * The code object is invalid. - */ - HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010, - /** - * The executable is invalid. - */ - HSA_STATUS_ERROR_INVALID_EXECUTABLE = 0x1011, - /** - * The executable is frozen. - */ - HSA_STATUS_ERROR_FROZEN_EXECUTABLE = 0x1012, - /** - * There is no symbol with the given name. - */ - HSA_STATUS_ERROR_INVALID_SYMBOL_NAME = 0x1013, - /** - * The variable is already defined. - */ - HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED = 0x1014, - /** - * The variable is undefined. - */ - HSA_STATUS_ERROR_VARIABLE_UNDEFINED = 0x1015, - /** - * An HSAIL operation resulted on a hardware exception. - */ - HSA_STATUS_ERROR_EXCEPTION = 0x1016 -} hsa_status_t; - -/** - * @brief Query additional information about a status code. - * - * @param[in] status Status code. - * - * @param[out] status_string A NUL-terminated string that describes the error - * status. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p status is an invalid - * status code, or @p status_string is NULL. - */ -hsa_status_t HSA_API - hsa_status_string(hsa_status_t status, const char **status_string); - -/** @} */ - -/** \defgroup common Common Definitions - * @{ - */ - -/** - * @brief Three-dimensional coordinate. - */ -typedef struct hsa_dim3_s { - /** - * X dimension. - */ - uint32_t x; - - /** - * Y dimension. - */ - uint32_t y; - - /** - * Z dimension. - */ - uint32_t z; -} hsa_dim3_t; - -/** - * @brief Access permissions. - */ -typedef enum { - /** - * Read-only access. - */ - HSA_ACCESS_PERMISSION_RO = 1, - /** - * Write-only access. - */ - HSA_ACCESS_PERMISSION_WO = 2, - /** - * Read and write access. - */ - HSA_ACCESS_PERMISSION_RW = 3 -} hsa_access_permission_t; - -/** @} **/ - -/** \defgroup initshutdown Initialization and Shut Down - * @{ - */ - -/** - * @brief Initialize the HSA runtime. - * - * @details Initializes the HSA runtime if it is not already initialized, and - * increases the reference counter associated with the HSA runtime for the - * current process. Invocation of any HSA function other than ::hsa_init results - * in undefined behavior if the current HSA runtime reference counter is less - * than one. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate - * the resources required by the implementation. - * - * @retval ::HSA_STATUS_ERROR_REFCOUNT_OVERFLOW The HSA runtime reference - * count reaches INT32_MAX. - */ -hsa_status_t HSA_API hsa_init(); - -/** - * @brief Shut down the HSA runtime. - * - * @details Decreases the reference count of the HSA runtime instance. When the - * reference count reaches 0, the HSA runtime is no longer considered valid - * but the application might call ::hsa_init to initialize the HSA runtime - * again. - * - * Once the reference count of the HSA runtime reaches 0, all the resources - * associated with it (queues, signals, agent information, etc.) are - * considered invalid and any attempt to reference them in subsequent API calls - * results in undefined behavior. When the reference count reaches 0, the HSA - * runtime may release resources associated with it. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - */ -hsa_status_t HSA_API hsa_shut_down(); - -/** @} **/ - -/** \defgroup agentinfo System and Agent Information - * @{ - */ - -/** - * @brief Endianness. A convention used to interpret the bytes making up a data - * word. - */ -typedef enum { - /** - * The least significant byte is stored in the smallest address. - */ - HSA_ENDIANNESS_LITTLE = 0, - /** - * The most significant byte is stored in the smallest address. - */ - HSA_ENDIANNESS_BIG = 1 -} hsa_endianness_t; - -/** - * @brief Machine model. A machine model determines the size of certain data - * types in HSA runtime and an agent. - */ -typedef enum { - /** - * Small machine model. Addresses use 32 bits. - */ - HSA_MACHINE_MODEL_SMALL = 0, - /** - * Large machine model. Addresses use 64 bits. - */ - HSA_MACHINE_MODEL_LARGE = 1 -} hsa_machine_model_t; - -/** - * @brief Profile. A profile indicates a particular level of feature - * support. For example, in the base profile the application must use the HSA - * runtime allocator to reserve Shared Virtual Memory, while in the full profile - * any host pointer can be shared across all the agents. - */ -typedef enum { - /** - * Base profile. - */ - HSA_PROFILE_BASE = 0, - /** - * Full profile. - */ - HSA_PROFILE_FULL = 1 -} hsa_profile_t; - -/** - * @brief System attributes. - */ -typedef enum { - /** - * Major version of the HSA runtime specification supported by the - * implementation. The type of this attribute is uint16_t. - */ - HSA_SYSTEM_INFO_VERSION_MAJOR = 0, - /** - * Minor version of the HSA runtime specification supported by the - * implementation. The type of this attribute is uint16_t. - */ - HSA_SYSTEM_INFO_VERSION_MINOR = 1, - /** - * Current timestamp. The value of this attribute monotonically increases at a - * constant rate. The type of this attribute is uint64_t. - */ - HSA_SYSTEM_INFO_TIMESTAMP = 2, - /** - * Timestamp value increase rate, in Hz. The timestamp (clock) frequency is - * in the range 1-400MHz. The type of this attribute is uint64_t. - */ - HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY = 3, - /** - * Maximum duration of a signal wait operation. Expressed as a count based on - * the timestamp frequency. The type of this attribute is uint64_t. - */ - HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT = 4, - /** - * Endianness of the system. The type of this attribute us ::hsa_endianness_t. - */ - HSA_SYSTEM_INFO_ENDIANNESS = 5, - /** - * Machine model supported by the HSA runtime. The type of this attribute is - * ::hsa_machine_model_t. - */ - HSA_SYSTEM_INFO_MACHINE_MODEL = 6, - /** - * Bit-mask indicating which extensions are supported by the - * implementation. An extension with an ID of @p i is supported if the bit at - * position @p i is set. The type of this attribute is uint8_t[128]. - */ - HSA_SYSTEM_INFO_EXTENSIONS = 7 -} hsa_system_info_t; - -/** - * @brief Get the current value of a system attribute. - * - * @param[in] attribute Attribute to query. - * - * @param[out] value Pointer to an application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid - * system attribute, or @p value is NULL. - */ -hsa_status_t HSA_API - hsa_system_get_info(hsa_system_info_t attribute, void *value); - -/** - * @brief HSA extensions. - */ -typedef enum { - /** - * Finalizer extension. - */ - HSA_EXTENSION_FINALIZER = 0, - /** - * Images extension. - */ - HSA_EXTENSION_IMAGES = 1, - HSA_EXTENSION_AMD_PROFILER = 2 -} hsa_extension_t; - -/** - * @brief Query if a given version of an extension is supported by the HSA - * implementation. - * - * @param[in] extension Extension identifier. - * - * @param[in] version_major Major version number. - * - * @param[in] version_minor Minor version number. - * - * @param[out] result Pointer to a memory location where the HSA runtime stores - * the result of the check. The result is true if the specified version of the - * extension is supported, and false otherwise. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid - * extension, or @p result is NULL. - */ -hsa_status_t HSA_API - hsa_system_extension_supported(uint16_t extension, uint16_t version_major, - uint16_t version_minor, bool *result); - -/** - * @brief Retrieve the function pointers corresponding to a given version of an - * extension. Portable applications are expected to invoke the extension API - * using the returned function pointers - * - * @details The application is responsible for verifying that the given version - * of the extension is supported by the HSA implementation (see - * ::hsa_system_extension_supported). If the given combination of extension, - * major version, and minor version is not supported by the implementation, the - * behavior is undefined. - * - * @param[in] extension Extension identifier. - * - * @param[in] version_major Major version number for which to retrieve the - * function pointer table. - * - * @param[in] version_minor Minor version number for which to retrieve the - * function pointer table. - * - * @param[out] table Pointer to an application-allocated function pointer table - * that is populated by the HSA runtime. Must not be NULL. The memory associated - * with table can be reused or freed after the function returns. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid - * extension, or @p table is NULL. - */ -hsa_status_t HSA_API - hsa_system_get_extension_table(uint16_t extension, uint16_t version_major, - uint16_t version_minor, void *table); - -/** - * @brief Opaque handle representing an agent, a device that participates in - * the HSA memory model. An agent can submit AQL packets for execution, and - * may also accept AQL packets for execution (agent dispatch packets or kernel - * dispatch packets launching HSAIL-derived binaries). - */ -typedef struct hsa_agent_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_agent_t; - -/** - * @brief Agent features. - */ -typedef enum { - /** - * The agent supports AQL packets of kernel dispatch type. If this - * feature is enabled, the agent is also a kernel agent. - */ - HSA_AGENT_FEATURE_KERNEL_DISPATCH = 1, - /** - * The agent supports AQL packets of agent dispatch type. - */ - HSA_AGENT_FEATURE_AGENT_DISPATCH = 2 -} hsa_agent_feature_t; - -/** - * @brief Hardware device type. - */ -typedef enum { - /** - * CPU device. - */ - HSA_DEVICE_TYPE_CPU = 0, - /** - * GPU device. - */ - HSA_DEVICE_TYPE_GPU = 1, - /** - * DSP device. - */ - HSA_DEVICE_TYPE_DSP = 2 -} hsa_device_type_t; - -/** - * @brief Default floating-point rounding mode. - */ -typedef enum { - /** - * Use a default floating-point rounding mode specified elsewhere. - */ - HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT = 0, - /** - * Operations that specify the default floating-point mode are rounded to zero - * by default. - */ - HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO = 1, - /** - * Operations that specify the default floating-point mode are rounded to the - * nearest representable number and that ties should be broken by selecting - * the value with an even least significant bit. - */ - HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR = 2 -} hsa_default_float_rounding_mode_t; - -/** - * @brief Agent attributes. - */ -typedef enum { - /** - * Agent name. The type of this attribute is a NUL-terminated char[64]. If - * the name of the agent uses less than 63 characters, the rest of the - * array must be filled with NULs. - */ - HSA_AGENT_INFO_NAME = 0, - /** - * Name of vendor. The type of this attribute is a NUL-terminated char[64]. If - * the name of the vendor uses less than 63 characters, the rest of the array - * must be filled with NULs. - */ - HSA_AGENT_INFO_VENDOR_NAME = 1, - /** - * Agent capability. The type of this attribute is ::hsa_agent_feature_t. - */ - HSA_AGENT_INFO_FEATURE = 2, - /** - * Machine model supported by the agent. The type of this attribute is - * ::hsa_machine_model_t. - */ - HSA_AGENT_INFO_MACHINE_MODEL = 3, - /** - * Profile supported by the agent. The type of this attribute is - * ::hsa_profile_t. - */ - HSA_AGENT_INFO_PROFILE = 4, - /** - * Default floating-point rounding mode. The type of this attribute is - * ::hsa_default_float_rounding_mode_t, but the value - * ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT is not allowed. - */ - HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5, - /** - * Default floating-point rounding modes supported by the agent in the Base - * profile. The type of this attribute is a mask of - * ::hsa_default_float_rounding_mode_t. The default floating-point rounding - * mode (::HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE) bit must not be set. - */ - HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 23, - /** - * Flag indicating that the f16 HSAIL operation is at least as fast as the - * f32 operation in the current agent. The value of this attribute is - * undefined if the agent is not a kernel agent. The type of this - * attribute is bool. - */ - HSA_AGENT_INFO_FAST_F16_OPERATION = 24, - /** - * Number of work-items in a wavefront. Must be a power of 2 in the range - * [1,256]. The value of this attribute is undefined if the agent is not - * a kernel agent. The type of this attribute is uint32_t. - */ - HSA_AGENT_INFO_WAVEFRONT_SIZE = 6, - /** - * Maximum number of work-items of each dimension of a work-group. Each - * maximum must be greater than 0. No maximum can exceed the value of - * ::HSA_AGENT_INFO_WORKGROUP_MAX_SIZE. The value of this attribute is - * undefined if the agent is not a kernel agent. The type of this - * attribute is uint16_t[3]. - */ - HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7, - /** - * Maximum total number of work-items in a work-group. The value of this - * attribute is undefined if the agent is not a kernel agent. The type - * of this attribute is uint32_t. - */ - HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8, - /** - * Maximum number of work-items of each dimension of a grid. Each maximum must - * be greater than 0, and must not be smaller than the corresponding value in - * ::HSA_AGENT_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of - * ::HSA_AGENT_INFO_GRID_MAX_SIZE. The value of this attribute is undefined if - * the agent is not a kernel agent. The type of this attribute is - * ::hsa_dim3_t. - */ - HSA_AGENT_INFO_GRID_MAX_DIM = 9, - /** - * Maximum total number of work-items in a grid. The value of this attribute - * is undefined if the agent is not a kernel agent. The type of this - * attribute is uint32_t. - */ - HSA_AGENT_INFO_GRID_MAX_SIZE = 10, - /** - * Maximum number of fbarriers per work-group. Must be at least 32. The value - * of this attribute is undefined if the agent is not a kernel agent. The - * type of this attribute is uint32_t. - */ - HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11, - /** - * Maximum number of queues that can be active (created but not destroyed) at - * one time in the agent. The type of this attribute is uint32_t. - */ - HSA_AGENT_INFO_QUEUES_MAX = 12, - /** - * Minimum number of packets that a queue created in the agent - * can hold. Must be a power of 2 greater than 0. Must not exceed - * the value of ::HSA_AGENT_INFO_QUEUE_MAX_SIZE. The type of this - * attribute is uint32_t. - */ - HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13, - /** - * Maximum number of packets that a queue created in the agent can - * hold. Must be a power of 2 greater than 0. The type of this attribute - * is uint32_t. - */ - HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14, - /** - * Type of a queue created in the agent. The type of this attribute is - * ::hsa_queue_type_t. - */ - HSA_AGENT_INFO_QUEUE_TYPE = 15, - /** - * Identifier of the NUMA node associated with the agent. The type of this - * attribute is uint32_t. - */ - HSA_AGENT_INFO_NODE = 16, - /** - * Type of hardware device associated with the agent. The type of this - * attribute is ::hsa_device_type_t. - */ - HSA_AGENT_INFO_DEVICE = 17, - /** - * Array of data cache sizes (L1..L4). Each size is expressed in bytes. A size - * of 0 for a particular level indicates that there is no cache information - * for that level. The type of this attribute is uint32_t[4]. - */ - HSA_AGENT_INFO_CACHE_SIZE = 18, - /** - * Instruction set architecture of the agent. The type of this attribute - * is ::hsa_isa_t. - */ - HSA_AGENT_INFO_ISA = 19, - /** - * Bit-mask indicating which extensions are supported by the agent. An - * extension with an ID of @p i is supported if the bit at position @p i is - * set. The type of this attribute is uint8_t[128]. - */ - HSA_AGENT_INFO_EXTENSIONS = 20, - /** - * Major version of the HSA runtime specification supported by the - * agent. The type of this attribute is uint16_t. - */ - HSA_AGENT_INFO_VERSION_MAJOR = 21, - /** - * Minor version of the HSA runtime specification supported by the - * agent. The type of this attribute is uint16_t. - */ - HSA_AGENT_INFO_VERSION_MINOR = 22 -} hsa_agent_info_t; - -/** - * @brief Get the current value of an attribute for a given agent. - * - * @param[in] agent A valid agent. - * - * @param[in] attribute Attribute to query. - * - * @param[out] value Pointer to an application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid - * agent attribute, or @p value is NULL. - */ -hsa_status_t HSA_API hsa_agent_get_info(hsa_agent_t agent, - hsa_agent_info_t attribute, - void *value); - -/** - * @brief Iterate over the available agents, and invoke an - * application-defined callback on every iteration. - * - * @param[in] callback Callback to be invoked once per agent. The HSA - * runtime passes two arguments to the callback, the agent and the - * application data. If @p callback returns a status other than - * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and - * ::hsa_iterate_agents returns that status value. - * - * @param[in] data Application data that is passed to @p callback on every - * iteration. May be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. - */ -hsa_status_t HSA_API - hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void *data), - void *data); - -/* - -// If we do not know the size of an attribute, we need to query it first -// Note: this API will not be in the spec unless needed -hsa_status_t HSA_API hsa_agent_get_info_size( - hsa_agent_t agent, - hsa_agent_info_t attribute, - size_t* size); - -// Set the value of an agents attribute -// Note: this API will not be in the spec unless needed -hsa_status_t HSA_API hsa_agent_set_info( - hsa_agent_t agent, - hsa_agent_info_t attribute, - void* value); - -*/ - -/** - * @brief Exception policies applied in the presence of hardware exceptions. - */ -typedef enum { - /** - * If a hardware exception is detected, a work-item signals an exception. - */ - HSA_EXCEPTION_POLICY_BREAK = 1, - /** - * If a hardware exception is detected, a hardware status bit is set. - */ - HSA_EXCEPTION_POLICY_DETECT = 2 -} hsa_exception_policy_t; - -/** - * @brief Retrieve the exception policy support for a given combination of - * agent and profile - * - * @param[in] agent Agent. - * - * @param[in] profile Profile. - * - * @param[out] mask Pointer to a memory location where the HSA runtime stores a - * mask of ::hsa_exception_policy_t values. Must not be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid - * profile, or @p mask is NULL. - * - */ -hsa_status_t HSA_API hsa_agent_get_exception_policies(hsa_agent_t agent, - hsa_profile_t profile, - uint16_t *mask); - -/** - * @brief Query if a given version of an extension is supported by an agent - * - * @param[in] extension Extension identifier. - * - * @param[in] agent Agent. - * - * @param[in] version_major Major version number. - * - * @param[in] version_minor Minor version number. - * - * @param[out] result Pointer to a memory location where the HSA runtime stores - * the result of the check. The result is true if the specified version of the - * extension is supported, and false otherwise. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid - * extension, or @p result is NULL. - */ -hsa_status_t HSA_API - hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent, - uint16_t version_major, - uint16_t version_minor, bool *result); - -/** @} */ - -/** \defgroup signals Signals - * @{ - */ - -/** - * @brief Signal handle. - */ -typedef struct hsa_signal_s { - /** - * Opaque handle. The value 0 is reserved. - */ - uint64_t handle; -} hsa_signal_t; - -/** - * @brief Signal value. The value occupies 32 bits in small machine mode, and 64 - * bits in large machine mode. - */ -#ifdef HSA_LARGE_MODEL -typedef int64_t hsa_signal_value_t; -#else -typedef int32_t hsa_signal_value_t; -#endif - -/** - * @brief Create a signal. - * - * @param[in] initial_value Initial value of the signal. - * - * @param[in] num_consumers Size of @p consumers. A value of 0 indicates that - * any agent might wait on the signal. - * - * @param[in] consumers List of agents that might consume (wait on) the - * signal. If @p num_consumers is 0, this argument is ignored; otherwise, the - * HSA runtime might use the list to optimize the handling of the signal - * object. If an agent not listed in @p consumers waits on the returned - * signal, the behavior is undefined. The memory associated with @p consumers - * can be reused or freed after the function returns. - * - * @param[out] signal Pointer to a memory location where the HSA runtime will - * store the newly created signal handle. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate the - * resources required by the implementation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is NULL, @p - * num_consumers is greater than 0 but @p consumers is NULL, or @p consumers - * contains duplicates. - */ -hsa_status_t HSA_API - hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers, - const hsa_agent_t *consumers, hsa_signal_t *signal); - -/** - * @brief Destroy a signal previous created by ::hsa_signal_create. - * - * @param[in] signal Signal. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p signal is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The handle in @p signal is 0. - */ -hsa_status_t HSA_API hsa_signal_destroy(hsa_signal_t signal); - -/** - * @brief Atomically read the current value of a signal. - * - * @param[in] signal Signal. - * - * @return Value of the signal. - */ -hsa_signal_value_t HSA_API hsa_signal_load_acquire(hsa_signal_t signal); - -/** - * @copydoc hsa_signal_load_acquire - */ -hsa_signal_value_t HSA_API hsa_signal_load_relaxed(hsa_signal_t signal); - -/** - * @brief Atomically set the value of a signal. - * - * @details If the value of the signal is changed, all the agents waiting - * on @p signal for which @p value satisfies their wait condition are awakened. - * - * @param[in] signal Signal. - * - * @param[in] value New signal value. - */ -void HSA_API - hsa_signal_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_store_relaxed - */ -void HSA_API - hsa_signal_store_release(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @brief Atomically set the value of a signal and return its previous value. - * - * @details If the value of the signal is changed, all the agents waiting - * on @p signal for which @p value satisfies their wait condition are awakened. - * - * @param[in] signal Signal. If @p signal is a queue doorbell signal, the - * behavior is undefined. - * - * @param[in] value New value. - * - * @return Value of the signal prior to the exchange. - * - */ -hsa_signal_value_t HSA_API - hsa_signal_exchange_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_exchange_acq_rel - */ -hsa_signal_value_t HSA_API - hsa_signal_exchange_acquire(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_exchange_acq_rel - */ -hsa_signal_value_t HSA_API - hsa_signal_exchange_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_exchange_acq_rel - */ -hsa_signal_value_t HSA_API - hsa_signal_exchange_release(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @brief Atomically set the value of a signal if the observed value is equal to - * the expected value. The observed value is returned regardless of whether the - * replacement was done. - * - * @details If the value of the signal is changed, all the agents waiting - * on @p signal for which @p value satisfies their wait condition are awakened. - * - * @param[in] signal Signal. If @p signal is a queue - * doorbell signal, the behavior is undefined. - * - * @param[in] expected Value to compare with. - * - * @param[in] value New value. - * - * @return Observed value of the signal. - * - */ -hsa_signal_value_t HSA_API hsa_signal_cas_acq_rel(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_cas_acq_rel - */ -hsa_signal_value_t HSA_API hsa_signal_cas_acquire(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_cas_acq_rel - */ -hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_cas_acq_rel - */ -hsa_signal_value_t HSA_API hsa_signal_cas_release(hsa_signal_t signal, - hsa_signal_value_t expected, - hsa_signal_value_t value); - -/** - * @brief Atomically increment the value of a signal by a given amount. - * - * @details If the value of the signal is changed, all the agents waiting on - * @p signal for which @p value satisfies their wait condition are awakened. - * - * @param[in] signal Signal. If @p signal is a queue doorbell signal, the - * behavior is undefined. - * - * @param[in] value Value to add to the value of the signal. - * - */ -void HSA_API - hsa_signal_add_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_add_acq_rel - */ -void HSA_API - hsa_signal_add_acquire(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_add_acq_rel - */ -void HSA_API - hsa_signal_add_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_add_acq_rel - */ -void HSA_API - hsa_signal_add_release(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @brief Atomically decrement the value of a signal by a given amount. - * - * @details If the value of the signal is changed, all the agents waiting on - * @p signal for which @p value satisfies their wait condition are awakened. - * - * @param[in] signal Signal. If @p signal is a queue doorbell signal, the - * behavior is undefined. - * - * @param[in] value Value to subtract from the value of the signal. - * - */ -void HSA_API - hsa_signal_subtract_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_subtract_acq_rel - */ -void HSA_API - hsa_signal_subtract_acquire(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_subtract_acq_rel - */ -void HSA_API - hsa_signal_subtract_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_subtract_acq_rel - */ -void HSA_API - hsa_signal_subtract_release(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @brief Atomically perform a bitwise AND operation between the value of a - * signal and a given value. - * - * @details If the value of the signal is changed, all the agents waiting on - * @p signal for which @p value satisfies their wait condition are awakened. - * - * @param[in] signal Signal. If @p signal is a queue doorbell signal, the - * behavior is undefined. - * - * @param[in] value Value to AND with the value of the signal. - * - */ -void HSA_API - hsa_signal_and_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_and_acq_rel - */ -void HSA_API - hsa_signal_and_acquire(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_and_acq_rel - */ -void HSA_API - hsa_signal_and_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_and_acq_rel - */ -void HSA_API - hsa_signal_and_release(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @brief Atomically perform a bitwise OR operation between the value of a - * signal and a given value. - * - * @details If the value of the signal is changed, all the agents waiting on - * @p signal for which @p value satisfies their wait condition are awakened. - * - * @param[in] signal Signal. If @p signal is a queue doorbell signal, the - * behavior is undefined. - * - * @param[in] value Value to OR with the value of the signal. - */ -void HSA_API - hsa_signal_or_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_or_acq_rel - */ -void HSA_API - hsa_signal_or_acquire(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_or_acq_rel - */ -void HSA_API - hsa_signal_or_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_or_acq_rel - */ -void HSA_API - hsa_signal_or_release(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @brief Atomically perform a bitwise XOR operation between the value of a - * signal and a given value. - * - * @details If the value of the signal is changed, all the agents waiting on - * @p signal for which @p value satisfies their wait condition are awakened. - * - * @param[in] signal Signal. If @p signal is a queue doorbell signal, the - * behavior is undefined. - * - * @param[in] value Value to XOR with the value of the signal. - * - */ -void HSA_API - hsa_signal_xor_acq_rel(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_xor_acq_rel - */ -void HSA_API - hsa_signal_xor_acquire(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_xor_acq_rel - */ -void HSA_API - hsa_signal_xor_relaxed(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @copydoc hsa_signal_xor_acq_rel - */ -void HSA_API - hsa_signal_xor_release(hsa_signal_t signal, hsa_signal_value_t value); - -/** - * @brief Wait condition operator. - */ -typedef enum { - /** - * The two operands are equal. - */ - HSA_SIGNAL_CONDITION_EQ = 0, - /** - * The two operands are not equal. - */ - HSA_SIGNAL_CONDITION_NE = 1, - /** - * The first operand is less than the second operand. - */ - HSA_SIGNAL_CONDITION_LT = 2, - /** - * The first operand is greater than or equal to the second operand. - */ - HSA_SIGNAL_CONDITION_GTE = 3 -} hsa_signal_condition_t; - -/** - * @brief State of the application thread during a signal wait. - */ -typedef enum { - /** - * The application thread may be rescheduled while waiting on the signal. - */ - HSA_WAIT_STATE_BLOCKED = 0, - /** - * The application thread stays active while waiting on a signal. - */ - HSA_WAIT_STATE_ACTIVE = 1 -} hsa_wait_state_t; - -/** - * @brief Wait until a signal value satisfies a specified condition, or a - * certain amount of time has elapsed. - * - * @details A wait operation can spuriously resume at any time sooner than the - * timeout (for example, due to system or other external factors) even when the - * condition has not been met. - * - * The function is guaranteed to return if the signal value satisfies the - * condition at some point in time during the wait, but the value returned to - * the application might not satisfy the condition. The application must ensure - * that signals are used in such way that wait wakeup conditions are not - * invalidated before dependent threads have woken up. - * - * When the wait operation internally loads the value of the passed signal, it - * uses the memory order indicated in the function name. - * - * @param[in] signal Signal. - * - * @param[in] condition Condition used to compare the signal value with @p - * compare_value. - * - * @param[in] compare_value Value to compare with. - * - * @param[in] timeout_hint Maximum duration of the wait. Specified in the same - * unit as the system timestamp. The operation might block for a shorter or - * longer time even if the condition is not met. A value of UINT64_MAX indicates - * no maximum. - * - * @param[in] wait_state_hint Hint used by the application to indicate the - * preferred waiting state. The actual waiting state is ultimately decided by - * HSA runtime and may not match the provided hint. A value of - * ::HSA_WAIT_STATE_ACTIVE may improve the latency of response to a signal - * update by avoiding rescheduling overhead. - * - * @return Observed value of the signal, which might not satisfy the specified - * condition. - * - */ -hsa_signal_value_t HSA_API - hsa_signal_wait_acquire(hsa_signal_t signal, - hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout_hint, - hsa_wait_state_t wait_state_hint); - -/** - * @copydoc hsa_signal_wait_acquire - */ -hsa_signal_value_t HSA_API - hsa_signal_wait_relaxed(hsa_signal_t signal, - hsa_signal_condition_t condition, - hsa_signal_value_t compare_value, - uint64_t timeout_hint, - hsa_wait_state_t wait_state_hint); - -/** @} */ - -/** \defgroup memory Memory - * @{ - */ - -/** - * @brief A memory region represents a block of virtual memory with certain - * properties. For example, the HSA runtime represents fine-grained memory in - * the global segment using a region. A region might be associated with more - * than one agent. - */ -typedef struct hsa_region_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_region_t; - -/** @} */ - -/** \defgroup queue Queues - * @{ - */ - -/** - * @brief Queue type. Intended to be used for dynamic queue protocol - * determination. - */ -typedef enum { - /** - * Queue supports multiple producers. - */ - HSA_QUEUE_TYPE_MULTI = 0, - /** - * Queue only supports a single producer. - */ - HSA_QUEUE_TYPE_SINGLE = 1 -} hsa_queue_type_t; - -/** - * @brief Queue features. - */ -typedef enum { - /** - * Queue supports kernel dispatch packets. - */ - HSA_QUEUE_FEATURE_KERNEL_DISPATCH = 1, - - /** - * Queue supports agent dispatch packets. - */ - HSA_QUEUE_FEATURE_AGENT_DISPATCH = 2 -} hsa_queue_feature_t; - -/** - * @brief User mode queue. - * - * @details The queue structure is read-only and allocated by the HSA runtime, - * but agents can directly modify the contents of the buffer pointed by @a - * base_address, or use HSA runtime APIs to access the doorbell signal. - * - */ -typedef struct hsa_queue_s { - /** - * Queue type. - */ - hsa_queue_type_t type; - - /** - * Queue features mask. This is a bit-field of ::hsa_queue_feature_t - * values. Applications should ignore any unknown set bits. - */ - uint32_t features; - -#ifdef HSA_LARGE_MODEL - void *base_address; -#elif defined HSA_LITTLE_ENDIAN - /** - * Starting address of the HSA runtime-allocated buffer used to store the AQL - * packets. Must be aligned to the size of an AQL packet. - */ - void *base_address; - /** - * Reserved. Must be 0. - */ - uint32_t reserved0; -#else - uint32_t reserved0; - void *base_address; -#endif - - /** - * Signal object used by the application to indicate the ID of a packet that - * is ready to be processed. The HSA runtime manages the doorbell signal. If - * the application tries to replace or destroy this signal, the behavior is - * undefined. - * - * If @a type is ::HSA_QUEUE_TYPE_SINGLE the doorbell signal value must be - * updated in a monotonically increasing fashion. If @a type is - * ::HSA_QUEUE_TYPE_MULTI, the doorbell signal value can be updated with any - * value. - */ - hsa_signal_t doorbell_signal; - - /** - * Maximum number of packets the queue can hold. Must be a power of 2. - */ - uint32_t size; - /** - * Reserved. Must be 0. - */ - uint32_t reserved1; - /** - * Queue identifier, which is unique over the lifetime of the application. - */ - uint64_t id; - -} hsa_queue_t; - -/** - * @brief Create a user mode queue. - * - * @details The HSA runtime creates the queue structure, the underlying packet - * buffer, the completion signal, and the write and read indexes. The initial - * value of the write and read indexes is 0. The type of every packet in the - * buffer is initialized to ::HSA_PACKET_TYPE_INVALID. - * - * The application should only rely on the error code returned to determine if - * the queue is valid. - * - * @param[in] agent Agent where to create the queue. - * - * @param[in] size Number of packets the queue is expected to - * hold. Must be a power of 2 between 1 and the value of - * ::HSA_AGENT_INFO_QUEUE_MAX_SIZE in @p agent. The size of the newly - * created queue is the maximum of @p size and the value of - * ::HSA_AGENT_INFO_QUEUE_MIN_SIZE in @p agent. - * - * @param[in] type Type of the queue. If the value of - * ::HSA_AGENT_INFO_QUEUE_TYPE in @p agent is ::HSA_QUEUE_TYPE_SINGLE, then @p - * type must also be ::HSA_QUEUE_TYPE_SINGLE. - * - * @param[in] callback Callback invoked by the HSA runtime for every - * asynchronous event related to the newly created queue. May be NULL. The HSA - * runtime passes three arguments to the callback: a code identifying the event - * that triggered the invocation, a pointer to the queue where the event - * originated, and the application data. - * - * @param[in] data Application data that is passed to @p callback on every - * iteration. May be NULL. - * - * @param[in] private_segment_size Hint indicating the maximum - * expected private segment usage per work-item, in bytes. There may - * be performance degradation if the application places a kernel - * dispatch packet in the queue and the corresponding private segment - * usage exceeds @p private_segment_size. If the application does not - * want to specify any particular value for this argument, @p - * private_segment_size must be UINT32_MAX. If the queue does not - * support kernel dispatch packets, this argument is ignored. - * - * @param[in] group_segment_size Hint indicating the maximum expected - * group segment usage per work-group, in bytes. There may be - * performance degradation if the application places a kernel dispatch - * packet in the queue and the corresponding group segment usage - * exceeds @p group_segment_size. If the application does not want to - * specify any particular value for this argument, @p - * group_segment_size must be UINT32_MAX. If the queue does not - * support kernel dispatch packets, this argument is ignored. - * - * @param[out] queue Memory location where the HSA runtime stores a pointer to - * the newly created queue. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate - * the resources required by the implementation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE_CREATION @p agent does not - * support queues of the given type. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two, - * @p size is 0, @p type is an invalid queue type, or @p queue is NULL. - * - */ -hsa_status_t HSA_API - hsa_queue_create(hsa_agent_t agent, uint32_t size, hsa_queue_type_t type, - void (*callback)(hsa_status_t status, hsa_queue_t *source, - void *data), - void *data, uint32_t private_segment_size, - uint32_t group_segment_size, hsa_queue_t **queue); - -/** - * @brief Create a queue for which the application or a kernel is responsible - * for processing the AQL packets. - * - * @details The application can use this function to create queues where AQL - * packets are not parsed by the packet processor associated with an agent, - * but rather by a unit of execution running on that agent (for example, a - * thread in the host application). - * - * The application is responsible for ensuring that all the producers and - * consumers of the resulting queue can access the provided doorbell signal - * and memory region. The application is also responsible for ensuring that the - * unit of execution processing the queue packets supports the indicated - * features (AQL packet types). - * - * When the queue is created, the HSA runtime allocates the packet buffer using - * @p region, and the write and read indexes. The initial value of the write and - * read indexes is 0, and the type of every packet in the buffer is initialized - * to ::HSA_PACKET_TYPE_INVALID. The value of the @e size, @e type, @e features, - * and @e doorbell_signal fields in the returned queue match the values passed - * by the application. - * - * @param[in] region Memory region that the HSA runtime should use to allocate - * the AQL packet buffer and any other queue metadata. - * - * @param[in] size Number of packets the queue is expected to hold. Must be a - * power of 2 greater than 0. - * - * @param[in] type Queue type. - * - * @param[in] features Supported queue features. This is a bit-field of - * ::hsa_queue_feature_t values. - * - * @param[in] doorbell_signal Doorbell signal that the HSA runtime must - * associate with the returned queue. The signal handle must not be 0. - * - * @param[out] queue Memory location where the HSA runtime stores a pointer to - * the newly created queue. The application should not rely on the value - * returned for this argument but only in the status code to determine if the - * queue is valid. Must not be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate - * the resources required by the implementation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two, @p - * size is 0, @p type is an invalid queue type, the doorbell signal handle is - * 0, or @p queue is NULL. - * - */ -hsa_status_t HSA_API - hsa_soft_queue_create(hsa_region_t region, uint32_t size, - hsa_queue_type_t type, uint32_t features, - hsa_signal_t doorbell_signal, hsa_queue_t **queue); - -/** - * @brief Destroy a user mode queue. - * - * @details When a queue is destroyed, the state of the AQL packets that have - * not been yet fully processed (their completion phase has not finished) - * becomes undefined. It is the responsibility of the application to ensure that - * all pending queue operations are finished if their results are required. - * - * The resources allocated by the HSA runtime during queue creation (queue - * structure, ring buffer, doorbell signal) are released. The queue should not - * be accessed after being destroyed. - * - * @param[in] queue Pointer to a queue created using ::hsa_queue_create. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL. - */ -hsa_status_t HSA_API hsa_queue_destroy(hsa_queue_t *queue); - -/** - * @brief Inactivate a queue. - * - * @details Inactivating the queue aborts any pending executions and prevent any - * new packets from being processed. Any more packets written to the queue once - * it is inactivated will be ignored by the packet processor. - * - * @param[in] queue Pointer to a queue. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL. - */ -hsa_status_t HSA_API hsa_queue_inactivate(hsa_queue_t *queue); - -/** - * @brief Atomically load the read index of a queue. - * - * @param[in] queue Pointer to a queue. - * - * @return Read index of the queue pointed by @p queue. - */ -uint64_t HSA_API hsa_queue_load_read_index_acquire(const hsa_queue_t *queue); - -/** - * @copydoc hsa_queue_load_read_index_acquire - */ -uint64_t HSA_API hsa_queue_load_read_index_relaxed(const hsa_queue_t *queue); - -/** - * @brief Atomically load the write index of a queue. - * - * @param[in] queue Pointer to a queue. - * - * @return Write index of the queue pointed by @p queue. - */ -uint64_t HSA_API hsa_queue_load_write_index_acquire(const hsa_queue_t *queue); - -/** - * @copydoc hsa_queue_load_write_index_acquire - */ -uint64_t HSA_API hsa_queue_load_write_index_relaxed(const hsa_queue_t *queue); - -/** - * @brief Atomically set the write index of a queue. - * - * @param[in] queue Pointer to a queue. - * - * @param[in] value Value to assign to the write index. - * - */ -void HSA_API hsa_queue_store_write_index_relaxed(const hsa_queue_t *queue, - uint64_t value); - -/** - * @copydoc hsa_queue_store_write_index_relaxed - */ -void HSA_API hsa_queue_store_write_index_release(const hsa_queue_t *queue, - uint64_t value); - -/** - * @brief Atomically set the write index of a queue if the observed value is - * equal to the expected value. The application can inspect the returned value - * to determine if the replacement was done. - * - * @param[in] queue Pointer to a queue. - * - * @param[in] expected Expected value. - * - * @param[in] value Value to assign to the write index if @p expected matches - * the observed write index. Must be greater than @p expected. - * - * @return Previous value of the write index. - */ -uint64_t HSA_API hsa_queue_cas_write_index_acq_rel(const hsa_queue_t *queue, - uint64_t expected, - uint64_t value); - -/** - * @copydoc hsa_queue_cas_write_index_acq_rel - */ -uint64_t HSA_API hsa_queue_cas_write_index_acquire(const hsa_queue_t *queue, - uint64_t expected, - uint64_t value); - -/** - * @copydoc hsa_queue_cas_write_index_acq_rel - */ -uint64_t HSA_API hsa_queue_cas_write_index_relaxed(const hsa_queue_t *queue, - uint64_t expected, - uint64_t value); - -/** - * @copydoc hsa_queue_cas_write_index_acq_rel - */ -uint64_t HSA_API hsa_queue_cas_write_index_release(const hsa_queue_t *queue, - uint64_t expected, - uint64_t value); - -/** - * @brief Atomically increment the write index of a queue by an offset. - * - * @param[in] queue Pointer to a queue. - * - * @param[in] value Value to add to the write index. - * - * @return Previous value of the write index. - */ -uint64_t HSA_API - hsa_queue_add_write_index_acq_rel(const hsa_queue_t *queue, uint64_t value); - -/** - * @copydoc hsa_queue_add_write_index_acq_rel - */ -uint64_t HSA_API - hsa_queue_add_write_index_acquire(const hsa_queue_t *queue, uint64_t value); - -/** - * @copydoc hsa_queue_add_write_index_acq_rel - */ -uint64_t HSA_API - hsa_queue_add_write_index_relaxed(const hsa_queue_t *queue, uint64_t value); - -/** - * @copydoc hsa_queue_add_write_index_acq_rel - */ -uint64_t HSA_API - hsa_queue_add_write_index_release(const hsa_queue_t *queue, uint64_t value); - -/** - * @brief Atomically set the read index of a queue. - * - * @details Modifications of the read index are not allowed and result in - * undefined behavior if the queue is associated with an agent for which - * only the corresponding packet processor is permitted to update the read - * index. - * - * @param[in] queue Pointer to a queue. - * - * @param[in] value Value to assign to the read index. - * - */ -void HSA_API hsa_queue_store_read_index_relaxed(const hsa_queue_t *queue, - uint64_t value); - -/** - * @copydoc hsa_queue_store_read_index_relaxed - */ -void HSA_API hsa_queue_store_read_index_release(const hsa_queue_t *queue, - uint64_t value); -/** @} */ - -/** \defgroup aql Architected Queuing Language - * @{ - */ - -/** - * @brief Packet type. - */ -typedef enum { - /** - * Vendor-specific packet. - */ - HSA_PACKET_TYPE_VENDOR_SPECIFIC = 0, - /** - * The packet has been processed in the past, but has not been reassigned to - * the packet processor. A packet processor must not process a packet of this - * type. All queues support this packet type. - */ - HSA_PACKET_TYPE_INVALID = 1, - /** - * Packet used by agents for dispatching jobs to kernel agents. Not all - * queues support packets of this type (see ::hsa_queue_feature_t). - */ - HSA_PACKET_TYPE_KERNEL_DISPATCH = 2, - /** - * Packet used by agents to delay processing of subsequent packets, and to - * express complex dependencies between multiple packets. All queues support - * this packet type. - */ - HSA_PACKET_TYPE_BARRIER_AND = 3, - /** - * Packet used by agents for dispatching jobs to agents. Not all - * queues support packets of this type (see ::hsa_queue_feature_t). - */ - HSA_PACKET_TYPE_AGENT_DISPATCH = 4, - /** - * Packet used by agents to delay processing of subsequent packets, and to - * express complex dependencies between multiple packets. All queues support - * this packet type. - */ - HSA_PACKET_TYPE_BARRIER_OR = 5 -} hsa_packet_type_t; - -/** - * @brief Scope of the memory fence operation associated with a packet. - */ -typedef enum { - /** - * No scope (no fence is applied). The packet relies on external fences to - * ensure visibility of memory updates. - */ - HSA_FENCE_SCOPE_NONE = 0, - /** - * The fence is applied with agent scope for the global segment. - */ - HSA_FENCE_SCOPE_AGENT = 1, - /** - * The fence is applied across both agent and system scope for the global - * segment. - */ - HSA_FENCE_SCOPE_SYSTEM = 2 -} hsa_fence_scope_t; - -/** - * @brief Sub-fields of the @a header field that is present in any AQL - * packet. The offset (with respect to the address of @a header) of a sub-field - * is identical to its enumeration constant. The width of each sub-field is - * determined by the corresponding value in ::hsa_packet_header_width_t. The - * offset and the width are expressed in bits. - */ -typedef enum { - /** - * Packet type. The value of this sub-field must be one of - * ::hsa_packet_type_t. If the type is ::HSA_PACKET_TYPE_VENDOR_SPECIFIC, the - * packet layout is vendor-specific. - */ - HSA_PACKET_HEADER_TYPE = 0, - /** - * Barrier bit. If the barrier bit is set, the processing of the current - * packet only launches when all preceding packets (within the same queue) are - * complete. - */ - HSA_PACKET_HEADER_BARRIER = 8, - /** - * Acquire fence scope. The value of this sub-field determines the scope and - * type of the memory fence operation applied before the packet enters the - * active phase. An acquire fence ensures that any subsequent global segment - * or image loads by any unit of execution that belongs to a dispatch that has - * not yet entered the active phase on any queue of the same kernel agent, - * sees any data previously released at the scopes specified by the acquire - * fence. The value of this sub-field must be one of ::hsa_fence_scope_t. - */ - HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE = 9, - /** - * Release fence scope, The value of this sub-field determines the scope and - * type of the memory fence operation applied after kernel completion but - * before the packet is completed. A release fence makes any global segment or - * image data that was stored by any unit of execution that belonged to a - * dispatch that has completed the active phase on any queue of the same - * kernel agent visible in all the scopes specified by the release fence. The - * value of this sub-field must be one of ::hsa_fence_scope_t. - */ - HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE = 11 -} hsa_packet_header_t; - -/** - * @brief Width (in bits) of the sub-fields in ::hsa_packet_header_t. - */ -typedef enum { - HSA_PACKET_HEADER_WIDTH_TYPE = 8, - HSA_PACKET_HEADER_WIDTH_BARRIER = 1, - HSA_PACKET_HEADER_WIDTH_ACQUIRE_FENCE_SCOPE = 2, - HSA_PACKET_HEADER_WIDTH_RELEASE_FENCE_SCOPE = 2 -} hsa_packet_header_width_t; - -/** - * @brief Sub-fields of the kernel dispatch packet @a setup field. The offset - * (with respect to the address of @a setup) of a sub-field is identical to its - * enumeration constant. The width of each sub-field is determined by the - * corresponding value in ::hsa_kernel_dispatch_packet_setup_width_t. The - * offset and the width are expressed in bits. - */ -typedef enum { - /** - * Number of dimensions of the grid. Valid values are 1, 2, or 3. - * - */ - HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS = 0 -} hsa_kernel_dispatch_packet_setup_t; - -/** - * @brief Width (in bits) of the sub-fields in - * ::hsa_kernel_dispatch_packet_setup_t. - */ -typedef enum { - HSA_KERNEL_DISPATCH_PACKET_SETUP_WIDTH_DIMENSIONS = 2 -} hsa_kernel_dispatch_packet_setup_width_t; - -/** - * @brief AQL kernel dispatch packet - */ -typedef struct hsa_kernel_dispatch_packet_s { - /** - * Packet header. Used to configure multiple packet parameters such as the - * packet type. The parameters are described by ::hsa_packet_header_t. - */ - uint16_t header; - - /** - * Dispatch setup parameters. Used to configure kernel dispatch parameters - * such as the number of dimensions in the grid. The parameters are described - * by ::hsa_kernel_dispatch_packet_setup_t. - */ - uint16_t setup; - - /** - * X dimension of work-group, in work-items. Must be greater than 0. - */ - uint16_t workgroup_size_x; - - /** - * Y dimension of work-group, in work-items. Must be greater than - * 0. If the grid has 1 dimension, the only valid value is 1. - */ - uint16_t workgroup_size_y; - - /** - * Z dimension of work-group, in work-items. Must be greater than - * 0. If the grid has 1 or 2 dimensions, the only valid value is 1. - */ - uint16_t workgroup_size_z; - - /** - * Reserved. Must be 0. - */ - uint16_t reserved0; - - /** - * X dimension of grid, in work-items. Must be greater than 0. Must - * not be smaller than @a workgroup_size_x. - */ - uint32_t grid_size_x; - - /** - * Y dimension of grid, in work-items. Must be greater than 0. If the grid has - * 1 dimension, the only valid value is 1. Must not be smaller than @a - * workgroup_size_y. - */ - uint32_t grid_size_y; - - /** - * Z dimension of grid, in work-items. Must be greater than 0. If the grid has - * 1 or 2 dimensions, the only valid value is 1. Must not be smaller than @a - * workgroup_size_z. - */ - uint32_t grid_size_z; - - /** - * Size in bytes of private memory allocation request (per work-item). - */ - uint32_t private_segment_size; - - /** - * Size in bytes of group memory allocation request (per work-group). Must not - * be less than the sum of the group memory used by the kernel (and the - * functions it calls directly or indirectly) and the dynamically allocated - * group segment variables. - */ - uint32_t group_segment_size; - - /** - * Opaque handle to a code object that includes an implementation-defined - * executable code for the kernel. - */ - uint64_t kernel_object; - -#ifdef HSA_LARGE_MODEL - void *kernarg_address; -#elif defined HSA_LITTLE_ENDIAN - /** - * Pointer to a buffer containing the kernel arguments. May be NULL. - * - * The buffer must be allocated using ::hsa_memory_allocate, and must not be - * modified once the kernel dispatch packet is enqueued until the dispatch has - * completed execution. - */ - void *kernarg_address; - /** - * Reserved. Must be 0. - */ - uint32_t reserved1; -#else - uint32_t reserved1; - void *kernarg_address; -#endif - - /** - * Reserved. Must be 0. - */ - uint64_t reserved2; - - /** - * Signal used to indicate completion of the job. The application can use the - * special signal handle 0 to indicate that no signal is used. - */ - hsa_signal_t completion_signal; - -} hsa_kernel_dispatch_packet_t; - -/** - * @brief Agent dispatch packet. - */ -typedef struct hsa_agent_dispatch_packet_s { - /** - * Packet header. Used to configure multiple packet parameters such as the - * packet type. The parameters are described by ::hsa_packet_header_t. - */ - uint16_t header; - - /** - * Application-defined function to be performed by the destination agent. - */ - uint16_t type; - - /** - * Reserved. Must be 0. - */ - uint32_t reserved0; - -#ifdef HSA_LARGE_MODEL - void *return_address; -#elif defined HSA_LITTLE_ENDIAN - /** - * Address where to store the function return values, if any. - */ - void *return_address; - /** - * Reserved. Must be 0. - */ - uint32_t reserved1; -#else - uint32_t reserved1; - void *return_address; -#endif - - /** - * Function arguments. - */ - uint64_t arg[4]; - - /** - * Reserved. Must be 0. - */ - uint64_t reserved2; - - /** - * Signal used to indicate completion of the job. The application can use the - * special signal handle 0 to indicate that no signal is used. - */ - hsa_signal_t completion_signal; - -} hsa_agent_dispatch_packet_t; - -/** - * @brief Barrier-AND packet. - */ -typedef struct hsa_barrier_and_packet_s { - /** - * Packet header. Used to configure multiple packet parameters such as the - * packet type. The parameters are described by ::hsa_packet_header_t. - */ - uint16_t header; - - /** - * Reserved. Must be 0. - */ - uint16_t reserved0; - - /** - * Reserved. Must be 0. - */ - uint32_t reserved1; - - /** - * Array of dependent signal objects. Signals with a handle value of 0 are - * allowed and are interpreted by the packet processor as satisfied - * dependencies. - */ - hsa_signal_t dep_signal[5]; - - /** - * Reserved. Must be 0. - */ - uint64_t reserved2; - - /** - * Signal used to indicate completion of the job. The application can use the - * special signal handle 0 to indicate that no signal is used. - */ - hsa_signal_t completion_signal; - -} hsa_barrier_and_packet_t; - -/** - * @brief Barrier-OR packet. - */ -typedef struct hsa_barrier_or_packet_s { - /** - * Packet header. Used to configure multiple packet parameters such as the - * packet type. The parameters are described by ::hsa_packet_header_t. - */ - uint16_t header; - - /** - * Reserved. Must be 0. - */ - uint16_t reserved0; - - /** - * Reserved. Must be 0. - */ - uint32_t reserved1; - - /** - * Array of dependent signal objects. Signals with a handle value of 0 are - * allowed and are interpreted by the packet processor as dependencies not - * satisfied. - */ - hsa_signal_t dep_signal[5]; - - /** - * Reserved. Must be 0. - */ - uint64_t reserved2; - - /** - * Signal used to indicate completion of the job. The application can use the - * special signal handle 0 to indicate that no signal is used. - */ - hsa_signal_t completion_signal; - -} hsa_barrier_or_packet_t; - -/** @} */ - -/** \addtogroup memory Memory - * @{ - */ - -/** - * @brief Memory segments associated with a region. - */ -typedef enum { - /** - * Global segment. Used to hold data that is shared by all agents. - */ - HSA_REGION_SEGMENT_GLOBAL = 0, - /** - * Read-only segment. Used to hold data that remains constant during the - * execution of a kernel. - */ - HSA_REGION_SEGMENT_READONLY = 1, - /** - * Private segment. Used to hold data that is local to a single work-item. - */ - HSA_REGION_SEGMENT_PRIVATE = 2, - /** - * Group segment. Used to hold data that is shared by the work-items of a - * work-group. - */ - HSA_REGION_SEGMENT_GROUP = 3 -} hsa_region_segment_t; - -/** - * @brief Global region flags. - */ -typedef enum { - /** - * The application can use memory in the region to store kernel arguments, and - * provide the values for the kernarg segment of a kernel dispatch. If this - * flag is set, then ::HSA_REGION_GLOBAL_FLAG_FINE_GRAINED must be set. - */ - HSA_REGION_GLOBAL_FLAG_KERNARG = 1, - /** - * Updates to memory in this region are immediately visible to all the - * agents under the terms of the HSA memory model. If this - * flag is set, then ::HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED must not be set. - */ - HSA_REGION_GLOBAL_FLAG_FINE_GRAINED = 2, - /** - * Updates to memory in this region can be performed by a single agent at - * a time. If a different agent in the system is allowed to access the - * region, the application must explicitely invoke ::hsa_memory_assign_agent - * in order to transfer ownership to that agent for a particular buffer. - */ - HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED = 4 -} hsa_region_global_flag_t; - -/** - * @brief Attributes of a memory region. - */ -typedef enum { - /** - * Segment where memory in the region can be used. The type of this - * attribute is ::hsa_region_segment_t. - */ - HSA_REGION_INFO_SEGMENT = 0, - /** - * Flag mask. The value of this attribute is undefined if the value of - * ::HSA_REGION_INFO_SEGMENT is not ::HSA_REGION_SEGMENT_GLOBAL. The type of - * this attribute is uint32_t, a bit-field of ::hsa_region_global_flag_t - * values. - */ - HSA_REGION_INFO_GLOBAL_FLAGS = 1, - /** - * Size of this region, in bytes. The type of this attribute is size_t. - */ - HSA_REGION_INFO_SIZE = 2, - /** - * Maximum allocation size in this region, in bytes. Must not exceed the value - * of ::HSA_REGION_INFO_SIZE. The type of this attribute is size_t. - * - * If the region is in the global or readonly segments, this is the maximum - * size that the application can pass to ::hsa_memory_allocate. If the region - * is in the group segment, this is the maximum size (per work-group) that can - * be requested for a given kernel dispatch. If the region is in the private - * segment, this is the maximum size (per work-item) that can be request for a - * specific kernel dispatch. - */ - HSA_REGION_INFO_ALLOC_MAX_SIZE = 4, - /** - * Indicates whether memory in this region can be allocated using - * ::hsa_memory_allocate. The type of this attribute is bool. - * - * The value of this flag is always false for regions in the group and private - * segments. - */ - HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED = 5, - /** - * Allocation granularity of buffers allocated by ::hsa_memory_allocate in - * this region. The size of a buffer allocated in this region is a multiple of - * the value of this attribute. The value of this attribute is only defined if - * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region. The type - * of this attribute is size_t. - */ - HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE = 6, - /** - * Alignment of buffers allocated by ::hsa_memory_allocate in this region. The - * value of this attribute is only defined if - * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region, and must - * be a power of 2. The type of this attribute is size_t. - */ - HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT = 7 -} hsa_region_info_t; - -/** - * @brief Get the current value of an attribute of a region. - * - * @param[in] region A valid region. - * - * @param[in] attribute Attribute to query. - * - * @param[out] value Pointer to a application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid - * region attribute, or @p value is NULL. - */ -hsa_status_t HSA_API hsa_region_get_info(hsa_region_t region, - hsa_region_info_t attribute, - void *value); - -/** - * @brief Iterate over the memory regions associated with a given agent, and - * invoke an application-defined callback on every iteration. - * - * @param[in] agent A valid agent. - * - * @param[in] callback Callback to be invoked once per region that is - * accessible from the agent. The HSA runtime passes two arguments to the - * callback, the region and the application data. If @p callback returns a - * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the - * traversal stops and ::hsa_agent_iterate_regions returns that status value. - * - * @param[in] data Application data that is passed to @p callback on every - * iteration. May be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. - */ -hsa_status_t HSA_API hsa_agent_iterate_regions( - hsa_agent_t agent, - hsa_status_t (*callback)(hsa_region_t region, void *data), void *data); - -/** - * @brief Allocate a block of memory in a given region. - * - * @param[in] region Region where to allocate memory from. The region must have - * the ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED flag set. - * - * @param[in] size Allocation size, in bytes. Must not be zero. This value is - * rounded up to the nearest multiple of ::HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE - * in @p region. - * - * @param[out] ptr Pointer to the location where to store the base address of - * the allocated block. The returned base address is aligned to the value of - * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT in @p region. If the allocation - * fails, the returned value is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES No memory is available. - * - * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to - * allocate memory in @p region, or @p size is greater than the value of - * HSA_REGION_INFO_ALLOC_MAX_SIZE in @p region. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0. - */ -hsa_status_t HSA_API - hsa_memory_allocate(hsa_region_t region, size_t size, void **ptr); - -/** - * @brief Deallocate a block of memory previously allocated using - * ::hsa_memory_allocate. - * - * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value - * previously returned by ::hsa_memory_allocate, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - */ -hsa_status_t HSA_API hsa_memory_free(void *ptr); - -/** - * @brief Copy a block of memory. - * - * @param[out] dst Buffer where the content is to be copied. - * - * @param[in] src A valid pointer to the source of data to be copied. - * - * @param[in] size Number of bytes to copy. If @p size is 0, no copy is - * performed and the function returns success. Copying a number of bytes larger - * than the size of the buffers pointed by @p dst or @p src results in undefined - * behavior. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination - * pointers are NULL. - */ -hsa_status_t HSA_API hsa_memory_copy(void *dst, const void *src, size_t size); - -/** - * @brief Change the ownership of a global, coarse-grained buffer. - * - * @details The contents of a coarse-grained buffer are visible to an agent - * only after ownership has been explicitely transferred to that agent. Once the - * operation completes, the previous owner cannot longer access the data in the - * buffer. - * - * An implementation of the HSA runtime is allowed, but not required, to change - * the physical location of the buffer when ownership is transferred to a - * different agent. In general the application must not assume this - * behavior. The virtual location (address) of the passed buffer is never - * modified. - * - * @param[in] ptr Base address of a global buffer. The pointer should match an - * address previously returned by ::hsa_memory_allocate. The size of the buffer - * affected by the ownership change is identical to the size of that previous - * allocation. If @p ptr points to a fine-grained global buffer, no operation is - * performed and the function returns success. If @p ptr does not point to - * global memory, the behavior is undefined. - * - * @param[in] agent Agent that becomes the owner of the buffer. The - * application is responsible for ensuring that @p agent has access to the - * region that contains the buffer. It is allowed to change ownership to an - * agent that is already the owner of the buffer, with the same or different - * access permissions. - * - * @param[in] access Access permissions requested for the new owner. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is unable to - * acquire the resources required by the operation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p access is - * not a valid access value. - */ -hsa_status_t HSA_API hsa_memory_assign_agent(void *ptr, hsa_agent_t agent, - hsa_access_permission_t access); - -/** - * - * @brief Register a global, fine-grained buffer. - * - * @details Registering a buffer serves as an indication to the HSA runtime that - * the memory might be accessed from a kernel agent other than the - * host. Registration is a performance hint that allows the HSA runtime - * implementation to know which buffers will be accessed by some of the kernel - * agents ahead of time. - * - * Registration is only recommended for buffers in the global segment that have - * not been allocated using the HSA allocator (::hsa_memory_allocate), but an OS - * allocator instead. - * - * Registrations should not overlap. - * - * @param[in] ptr A buffer in global memory. If a NULL pointer is passed, no - * operation is performed. - * - * @param[in] size Requested registration size in bytes. A size of 0 is - * only allowed if @p ptr is NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in - * allocating the necessary resources. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 but @p ptr - * is not NULL. - */ -hsa_status_t HSA_API hsa_memory_register(void *ptr, size_t size); - -/** - * - * @brief Deregister memory previously registered using ::hsa_memory_register. - * - * @details If the memory interval being deregistered does not match a previous - * registration (start and end addresses), the behavior is undefined. - * - * @param[in] ptr A pointer to the base of the buffer to be deregistered. If - * a NULL pointer is passed, no operation is performed. - * - * @param[in] size Size of the buffer to be deregistered. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - */ -hsa_status_t HSA_API hsa_memory_deregister(void *ptr, size_t size); - -/** @} */ - -/** \defgroup symbol-attributes Symbol Attributes - * @{ - */ - -/** - * @brief Symbol type. - */ -typedef enum { - /** - * Variable. - */ - HSA_SYMBOL_KIND_VARIABLE = 0, - /** - * Kernel. - */ - HSA_SYMBOL_KIND_KERNEL = 1, - /** - * Indirect function. - */ - HSA_SYMBOL_KIND_INDIRECT_FUNCTION = 2 -} hsa_symbol_kind_t; - -/** - * @brief Allocation type of a variable. - */ -typedef enum { - /** - * Agent allocation. - */ - HSA_VARIABLE_ALLOCATION_AGENT = 0, - /** - * Program allocation. - */ - HSA_VARIABLE_ALLOCATION_PROGRAM = 1 -} hsa_variable_allocation_t; - -/** - * @brief Linkage type of a symbol. - */ -typedef enum { - /** - * Module linkage. - */ - HSA_SYMBOL_LINKAGE_MODULE = 0, - /** - * Program linkage. - */ - HSA_SYMBOL_LINKAGE_PROGRAM = 1 -} hsa_symbol_linkage_t; - -/** - * @brief Memory segment associated with a variable. - */ -typedef enum { - /** - * Global memory segment. - */ - HSA_VARIABLE_SEGMENT_GLOBAL = 0, - /** - * Readonly memory segment. - */ - HSA_VARIABLE_SEGMENT_READONLY = 1 -} hsa_variable_segment_t; - -/** @} */ - -/** \defgroup code-object Code Object - * @{ - */ - -/** - * @brief Instruction set architecture. - */ -typedef struct hsa_isa_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_isa_t; - -/** - * @brief Retrieve a reference to an ISA handle out of a symbolic name. - * - * @param[in] name Vendor-specific name associated with a particular instruction - * set architecture. Must be a NUL-terminated string. - * - * @param[out] isa Memory location where the HSA runtime stores the ISA handle - * corresponding to the given name. Must not be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p name is NULL, or @p isa is - * NULL. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ISA_NAME The given name does not - * correspond to any instruction set architecture. - */ -hsa_status_t HSA_API hsa_isa_from_name( - const char* name, - hsa_isa_t* isa); - -/** - * @brief Instruction set architecture attributes. - */ -typedef enum { - /** - * The length of the ISA name. The type of this attribute is uint32_t. - */ - HSA_ISA_INFO_NAME_LENGTH = 0, - /** - * Human-readable description. The type of this attribute is character array - * with the length equal to the value of ::HSA_ISA_INFO_NAME_LENGTH attribute. - */ - HSA_ISA_INFO_NAME = 1, - /** - * Number of call conventions supported by the instruction set architecture. - * The type of this attribute is uint32_t. - */ - HSA_ISA_INFO_CALL_CONVENTION_COUNT = 2, - /** - * Number of work-items in a wavefront for a given call convention. Must be a - * power of 2 in the range [1,256]. The type of this attribute is uint32_t. - */ - HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE = 3, - /** - * Number of wavefronts per compute unit for a given call convention. In - * practice, other factors (for example, the amount of group memory used by a - * work-group) may further limit the number of wavefronts per compute - * unit. The type of this attribute is uint32_t. - */ - HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT = 4 -} hsa_isa_info_t; - -/** - * @brief Get the current value of an attribute for a given instruction set - * architecture (ISA). - * - * @param[in] isa A valid instruction set architecture. - * - * @param[in] attribute Attribute to query. - * - * @param[in] index Call convention index. Used only for call convention - * attributes, otherwise ignored. Must have a value between 0 (inclusive) and - * the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT (not - * inclusive) in @p isa. - * - * @param[out] value Pointer to an application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is - * invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_INDEX @p index out of range. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid - * instruction set architecture attribute, or @p value is NULL. - */ -hsa_status_t HSA_API hsa_isa_get_info( - hsa_isa_t isa, - hsa_isa_info_t attribute, - uint32_t index, - void* value); - -/** - * @brief Check if the instruction set architecture of a code object can be - * executed on an agent associated with another architecture. - * - * @param[in] code_object_isa Instruction set architecture associated with a - * code object. - * - * @param[in] agent_isa Instruction set architecture associated with an agent. - * - * @param[out] result Pointer to a memory location where the HSA runtime stores - * the result of the check. If the two architectures are compatible, the result - * is true; if they are incompatible, the result is false. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p code_object_isa or @p agent_isa are - * invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL. - */ -hsa_status_t HSA_API hsa_isa_compatible( - hsa_isa_t code_object_isa, - hsa_isa_t agent_isa, - bool* result); - -/** - * @brief An opaque handle to a code object, which contains ISA for finalized - * kernels and indirect functions together with information about the - * global/readonly segment variables they reference. - */ -typedef struct hsa_code_object_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_code_object_t; - -/** - * @brief Opaque handle to application data that is passed to the serialization - * and deserialization functions. - */ -typedef struct hsa_callback_data_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_callback_data_t; - -/** - * @brief Serialize a code object. Can be used for offline finalization, - * install-time finalization, disk code caching, etc. - * - * @param[in] code_object Code object. - * - * @param[in] alloc_callback Callback function for memory allocation. Must not - * be NULL. The HSA runtime passes three arguments to the callback: the - * allocation size, the application data, and a pointer to a memory location - * where the application stores the allocation result. The HSA runtime invokes - * @p alloc_callback once to allocate a buffer that contains the serialized - * version of @p code_object. If the callback returns a status code other than - * ::HSA_STATUS_SUCCESS, this function returns the same code. - * - * @param[in] callback_data Application data that is passed to @p - * alloc_callback. May be NULL. - * - * @param[in] options Vendor-specific options. May be NULL. - * - * @param[out] serialized_code_object Memory location where the HSA runtime - * stores a pointer to the serialized code object. Must not be NULL. - * - * @param[out] serialized_code_object_size Memory location where the HSA runtime - * stores the size (in bytes) of @p serialized_code_object. The returned value - * matches the allocation size passed by the HSA runtime to @p - * alloc_callback. Must not be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate - * resources required for the operation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p alloc_callback, @p - * serialized_code_object, or @p serialized_code_object_size are NULL. - */ -hsa_status_t HSA_API hsa_code_object_serialize( - hsa_code_object_t code_object, - hsa_status_t (*alloc_callback)(size_t size, hsa_callback_data_t data, void **address), - hsa_callback_data_t callback_data, - const char *options, - void **serialized_code_object, - size_t *serialized_code_object_size); - -/** - * @brief Deserialize a code object. - * - * @param[in] serialized_code_object A serialized code object. Must not be NULL. - * - * @param[in] serialized_code_object_size The size (in bytes) of @p - * serialized_code_object. Must not be 0. - * - * @param[in] options Vendor-specific options. May be NULL. - * - * @param[out] code_object Memory location where the HSA runtime stores the - * deserialized code object. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate - * resources required for the operation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p serialized_code_object, or @p - * code_object are NULL. @p serialized_code_object_size is 0. - */ -hsa_status_t HSA_API hsa_code_object_deserialize( - void *serialized_code_object, - size_t serialized_code_object_size, - const char *options, - hsa_code_object_t *code_object); - -/** - * @brief Destroy a code object. - * - * @details The lifetime of a code object must exceed that of any executable - * where it has been loaded. If an executable that loaded @p code_object has not - * been destroyed, the behavior is undefined. - * - * @param[in] code_object Code object. The handle becomes invalid after it has - * been destroyed. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. - */ -hsa_status_t HSA_API hsa_code_object_destroy( - hsa_code_object_t code_object); - -/** - * @brief Code object type. - */ -typedef enum { - /** - * Produces code object that contains ISA for all kernels and indirect - * functions in HSA source. - */ - HSA_CODE_OBJECT_TYPE_PROGRAM = 0 -} hsa_code_object_type_t; - -/** - * @brief Code object attributes. - */ -typedef enum { - /** - * The version of the code object. The type of this attribute is a - * NUL-terminated char[64]. If the version of the code object uses less than - * 63 characters, the rest of the array must be filled with NULs. - */ - HSA_CODE_OBJECT_INFO_VERSION = 0, - /** - * Type of code object. The type of this attribute is - * ::hsa_code_object_type_t. - */ - HSA_CODE_OBJECT_INFO_TYPE = 1, - /** - * Instruction set architecture this code object is produced for. The type of - * this attribute is ::hsa_isa_t. - */ - HSA_CODE_OBJECT_INFO_ISA = 2, - /** - * Machine model this code object is produced for. The type of this attribute - * is ::hsa_machine_model_t. - */ - HSA_CODE_OBJECT_INFO_MACHINE_MODEL = 3, - /** - * Profile this code object is produced for. The type of this attribute is - * ::hsa_profile_t. - */ - HSA_CODE_OBJECT_INFO_PROFILE = 4, - /** - * Default floating-point rounding mode used when the code object is - * produced. The type of this attribute is - * ::hsa_default_float_rounding_mode_t. - */ - HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5 -} hsa_code_object_info_t; - -/** - * @brief Get the current value of an attribute for a given code object. - * - * @param[in] code_object Code object. - * - * @param[in] attribute Attribute to query. - * - * @param[out] value Pointer to an application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid - * code object attribute, or @p value is NULL. - * - * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. - */ -hsa_status_t HSA_API hsa_code_object_get_info( - hsa_code_object_t code_object, - hsa_code_object_info_t attribute, - void *value); - -/** - * @brief Code object symbol. - */ -typedef struct hsa_code_symbol_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_code_symbol_t; - -/** - * @brief Get the symbol handle within a code object for a given a symbol name. - * - * @param[in] code_object Code object. - * - * @param[in] symbol_name Symbol name. - * - * @param[out] symbol Memory location where the HSA runtime stores the symbol - * handle. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name - * that matches @p symbol_name. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or - * @p symbol is NULL. - */ -hsa_status_t HSA_API hsa_code_object_get_symbol( - hsa_code_object_t code_object, - const char *symbol_name, - hsa_code_symbol_t *symbol); - -/** - * @brief Code object symbol attributes. - */ -typedef enum { - /** - * The type of the symbol. The type of this attribute is ::hsa_symbol_kind_t. - */ - HSA_CODE_SYMBOL_INFO_TYPE = 0, - /** - * The length of the symbol name. The type of this attribute is uint32_t. - */ - HSA_CODE_SYMBOL_INFO_NAME_LENGTH = 1, - /** - * The name of the symbol. The type of this attribute is character array with - * the length equal to the value of ::HSA_CODE_SYMBOL_INFO_NAME_LENGTH - * attribute - */ - HSA_CODE_SYMBOL_INFO_NAME = 2, - /** - * The length of the module name to which this symbol belongs if this symbol - * has module linkage, otherwise 0 is returned. The type of this attribute is - * uint32_t. - */ - HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3, - /** - * The module name to which this symbol belongs if this symbol has module - * linkage, otherwise empty string is returned. The type of this attribute is - * character array with the length equal to the value of - * ::HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute. - */ - HSA_CODE_SYMBOL_INFO_MODULE_NAME = 4, - /** - * The linkage kind of the symbol. The type of this attribute is - * ::hsa_symbol_linkage_t. - */ - HSA_CODE_SYMBOL_INFO_LINKAGE = 5, - /** - * Indicates whether the symbol corresponds to a definition. The type of this - * attribute is bool. - */ - HSA_CODE_SYMBOL_INFO_IS_DEFINITION = 17, - /** - * The allocation kind of the variable. The value of this attribute is - * undefined if the symbol is not a variable. The type of this attribute is - * ::hsa_variable_allocation_t. - */ - HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6, - /** - * The segment kind of the variable. The value of this attribute is - * undefined if the symbol is not a variable. The type of this attribute is - * ::hsa_variable_segment_t. - */ - HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT = 7, - /** - * Alignment of the variable. The value of this attribute is undefined if the - * symbol is not a variable. The type of this attribute is uint32_t. - */ - HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8, - /** - * Size of the variable. The value of this attribute is undefined if the - * symbol is not a variable. The type of this attribute is uint32_t. - * - * A size of 0 is returned if the variable is an external variable and has an - * unknown dimension. - */ - HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE = 9, - /** - * Indicates whether the variable is constant. The value of this attribute is - * undefined if the symbol is not a variable. The type of this attribute is - * bool. - */ - HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST = 10, - /** - * Size of kernarg segment memory that is required to hold the values of the - * kernel arguments, in bytes. The value of this attribute is undefined if the - * symbol is not a kernel. The type of this attribute is uint32_t. - */ - HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11, - /** - * Alignment (in bytes) of the buffer used to pass arguments to the kernel, - * which is the maximum of 16 and the maximum alignment of any of the kernel - * arguments. The value of this attribute is undefined if the symbol is not a - * kernel. The type of this attribute is uint32_t. - */ - HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12, - /** - * Size of static group segment memory required by the kernel (per - * work-group), in bytes. The value of this attribute is undefined - * if the symbol is not a kernel. The type of this attribute is uint32_t. - * - * The reported amount does not include any dynamically allocated group - * segment memory that may be requested by the application when a kernel is - * dispatched. - */ - HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13, - /** - * Size of static private, spill, and arg segment memory required by - * this kernel (per work-item), in bytes. The value of this attribute is - * undefined if the symbol is not a kernel. The type of this attribute is - * uint32_t. - * - * If the value of ::HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is true, - * the kernel may use more private memory than the reported value, and the - * application must add the dynamic call stack usage to @a - * private_segment_size when populating a kernel dispatch packet. - */ - HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14, - /** - * Dynamic callstack flag. The value of this attribute is undefined if the - * symbol is not a kernel. The type of this attribute is bool. - * - * If this flag is set (the value is true), the kernel uses a dynamically - * sized call stack. This can happen if recursive calls, calls to indirect - * functions, or the HSAIL alloca instruction are present in the kernel. - */ - HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15, - /** - * Call convention of the indirect function. The value of this attribute is - * undefined if the symbol is not an indirect function. The type of this - * attribute is uint32_t. - */ - HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16 -} hsa_code_symbol_info_t; - -/** - * @brief Get the current value of an attribute for a given code symbol. - * - * @param[in] code_symbol Code symbol. - * - * @param[in] attribute Attribute to query. - * - * @param[out] value Pointer to an application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid - * code symbol attribute, or @p value is NULL. - */ -hsa_status_t HSA_API hsa_code_symbol_get_info( - hsa_code_symbol_t code_symbol, - hsa_code_symbol_info_t attribute, - void *value); - -/** - * @brief Iterate over the symbols in a code object, and invoke an - * application-defined callback on every iteration. - * - * @param[in] code_object Code object. - * - * @param[in] callback Callback to be invoked once per code object symbol. The - * HSA runtime passes three arguments to the callback: the code object, a - * symbol, and the application data. If @p callback returns a status other than - * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and - * ::hsa_code_object_iterate_symbols returns that status value. - * - * @param[in] data Application data that is passed to @p callback on every - * iteration. May be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. - */ -hsa_status_t HSA_API hsa_code_object_iterate_symbols( - hsa_code_object_t code_object, - hsa_status_t (*callback)(hsa_code_object_t code_object, hsa_code_symbol_t symbol, void* data), - void* data); - -/** @} */ - -/** \defgroup executable Executable - * @{ - */ - -/** - * @brief An opaque handle to an executable, which contains ISA for finalized - * kernels and indirect functions together with the allocated global/readonly - * segment variables they reference. - */ -typedef struct hsa_executable_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_executable_t; - -/** - * @brief Executable state. - */ -typedef enum { - /** - * Executable state, which allows the user to load code objects and define - * external variables. Variable addresses, kernel code handles, and - * indirect function code handles are not available in query operations until - * the executable is frozen (zero always returned). - */ - HSA_EXECUTABLE_STATE_UNFROZEN = 0, - /** - * Executable state, which allows the user to query variable addresses, - * kernel code handles, and indirect function code handles using query - * operation. Loading new code objects, as well as defining external variables - * is not allowed in this state. - */ - HSA_EXECUTABLE_STATE_FROZEN = 1 -} hsa_executable_state_t; - -/** - * @brief Create an empty executable. - * - * @param[in] profile Profile used in the executable. - * - * @param[in] executable_state Executable state. If the state is - * ::HSA_EXECUTABLE_STATE_FROZEN, the resulting executable is useless because no - * code objects can be loaded, and no variables can be defined. - * - * @param[in] options Vendor-specific options. May be NULL. - * - * @param[out] executable Memory location where the HSA runtime stores newly - * created executable handle. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate - * resources required for the operation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or - * @p executable is NULL. - */ -hsa_status_t HSA_API hsa_executable_create( - hsa_profile_t profile, - hsa_executable_state_t executable_state, - const char *options, - hsa_executable_t *executable); - -/** - * @brief Destroy an executable. - * - * @details Executable handle becomes invalid after the executable has been - * destroyed. Code object handles that were loaded into this executable are - * still valid after the executable has been destroyed, and can be used as - * intended. Resources allocated outside and associated with this executable - * (such as external global/readonly variables) can be released after the - * executable has been destroyed. - * - * Executable should not be destroyed while kernels are in flight. - * - * @param[in] executable Executable. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. - */ -hsa_status_t HSA_API hsa_executable_destroy( - hsa_executable_t executable); - -/** - * @brief Load code object into the executable. - * - * @details Every global/readonly variable that is external must be defined - * using define set of operations before loading code objects. Internal - * global/readonly variable is allocated once the code object, that is being - * loaded, references this variable and this variable is not allocated. - * - * Any module linkage declaration must have been defined either by a define - * variable or by loading a code object that has a symbol with module linkage - * definition. - * - * @param[in] executable Executable. - * - * @param[in] agent Agent to load code object for. The agent must support the - * default floating-point rounding mode used by @p code_object. - * - * @param[in] code_object Code object to load. The lifetime of the code object - * must exceed that of the executable: if @p code_object is destroyed before @p - * executable, the behavior is undefined. - * - * @param[in] options Vendor-specific options. May be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate - * resources required for the operation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. - * - * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p agent is not compatible - * with @p code_object (for example, @p agent does not support the default - * floating-point rounding mode specified by @p code_object), or @p code_object - * is not compatible with @p executable (for example, @p code_object and @p - * executable have different machine models or profiles). - * - * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen. - */ -hsa_status_t HSA_API hsa_executable_load_code_object( - hsa_executable_t executable, - hsa_agent_t agent, - hsa_code_object_t code_object, - const char *options); - -/** - * @brief Freeze the executable. - * - * @details No modifications to executable can be made after freezing: no - * code objects can be loaded to the executable, no external variables can - * be defined. Freezing the executable does not prevent querying executable's - * attributes. - * - * @param[in] executable Executable. - * - * @param[in] options Vendor-specific options. May be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. - * - * @retval ::HSA_STATUS_ERROR_VARIABLE_UNDEFINED One or more variable is - * undefined in the executable. - * - * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is already frozen. - */ -hsa_status_t HSA_API hsa_executable_freeze( - hsa_executable_t executable, - const char *options); - -/** - * @brief Executable attributes. - */ -typedef enum { - /** - * Profile this executable is created for. The type of this attribute is - * ::hsa_profile_t. - */ - HSA_EXECUTABLE_INFO_PROFILE = 1, - /** - * Executable state. The type of this attribute is ::hsa_executable_state_t. - */ - HSA_EXECUTABLE_INFO_STATE = 2 -} hsa_executable_info_t; - -/** - * @brief Get the current value of an attribute for a given executable. - * - * @param[in] executable Executable. - * - * @param[in] attribute Attribute to query. - * - * @param[out] value Pointer to an application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid - * executable attribute, or @p value is NULL. - */ -hsa_status_t HSA_API hsa_executable_get_info( - hsa_executable_t executable, - hsa_executable_info_t attribute, - void *value); - -/** - * @brief Define an external global variable with program allocation. - * - * @details This function allows the application to provide the definition - * of a variable in the global segment memory with program allocation. The - * variable must be defined before loading a code object into an executable. - * In addition, code objects loaded must not define the variable. - * - * @param[in] executable Executable. - * - * @param[in] variable_name Name of the variable. - * - * @param[in] address Address where the variable is defined. The buffer pointed - * by @p address is owned by the application, and cannot be deallocated before - * @p executable is destroyed. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate - * resources required for the operation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL. - * - * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. - * - * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is - * already defined. - * - * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the - * @p variable_name. - * - * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen. - */ -hsa_status_t HSA_API hsa_executable_global_variable_define( - hsa_executable_t executable, - const char *variable_name, - void *address); - -/** - * @brief Define an external global variable with agent allocation. - * - * @details This function allows the application to provide the definition - * of a variable in the global segment memory with agent allocation. The - * variable must be defined before loading a code object into an executable. - * In addition, code objects loaded must not define the variable. - * - * @param[in] executable Executable. - * - * @param[in] agent Agent for which the variable is being defined. - * - * @param[in] variable_name Name of the variable. - * - * @param[in] address Address where the variable is defined. The buffer pointed - * by @p address is owned by the application, and cannot be deallocated before - * @p executable is destroyed. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate - * resources required for the operation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL. - * - * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is - * already defined. - * - * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the - * @p variable_name. - * - * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen. - */ -hsa_status_t HSA_API hsa_executable_agent_global_variable_define( - hsa_executable_t executable, - hsa_agent_t agent, - const char *variable_name, - void *address); - -/** - * @brief Define an external readonly variable. - * - * @details This function allows the application to provide the definition - * of a variable in the readonly segment memory. The variable must be defined - * before loading a code object into an executable. In addition, code objects - * loaded must not define the variable. - * - * @param[in] executable Executable. - * - * @param[in] agent Agent for which the variable is being defined. - * - * @param[in] variable_name Name of the variable. - * - * @param[in] address Address where the variable is defined. The buffer pointed - * by @p address is owned by the application, and cannot be deallocated before - * @p executable is destroyed. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate - * resources required for the operation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL. - * - * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE Executable is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is - * already defined. - * - * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the - * @p variable_name. - * - * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen. - */ -hsa_status_t HSA_API hsa_executable_readonly_variable_define( - hsa_executable_t executable, - hsa_agent_t agent, - const char *variable_name, - void *address); - -/** - * @brief Validate executable. Checks that all code objects have matching - * machine model, profile, and default floating-point rounding mode. Checks that - * all declarations have definitions. Checks declaration-definition - * compatibility (see HSA Programming Reference Manual for compatibility rules). - * - * @param[in] executable Executable. - * - * @param[out] result Memory location where the HSA runtime stores the - * validation result. If the executable is valid, the result is 0. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL. - */ -hsa_status_t HSA_API hsa_executable_validate( - hsa_executable_t executable, - uint32_t* result); - -/** - * @brief Executable symbol. - */ -typedef struct hsa_executable_symbol_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_executable_symbol_t; - -/** - * @brief Get the symbol handle for a given a symbol name. - * - * @param[in] executable Executable. - * - * @param[in] module_name Module name. Must be NULL if the symbol has - * program linkage. - * - * @param[in] symbol_name Symbol name. - * - * @param[in] agent Agent associated with the symbol. If the symbol is - * independent of any agent (for example, a variable with program - * allocation), this argument is ignored. - * - * @param[in] call_convention Call convention associated with the symbol. If the - * symbol does not correspond to an indirect function, this argument is ignored. - * - * @param[out] symbol Memory location where the HSA runtime stores the symbol - * handle. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name - * that matches @p symbol_name. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or - * @p symbol is NULL. - */ -hsa_status_t HSA_API hsa_executable_get_symbol( - hsa_executable_t executable, - const char *module_name, - const char *symbol_name, - hsa_agent_t agent, - int32_t call_convention, - hsa_executable_symbol_t *symbol); - -/** - * @brief Executable symbol attributes. - */ -typedef enum { - /** - * The kind of the symbol. The type of this attribute is ::hsa_symbol_kind_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_TYPE = 0, - /** - * The length of the symbol name. The type of this attribute is uint32_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH = 1, - /** - * The name of the symbol. The type of this attribute is character array with - * the length equal to the value of ::HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH - * attribute - */ - HSA_EXECUTABLE_SYMBOL_INFO_NAME = 2, - /** - * The length of the module name to which this symbol belongs if this symbol - * has module linkage, otherwise 0 is returned. The type of this attribute is - * uint32_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3, - /** - * The module name to which this symbol belongs if this symbol has module - * linkage, otherwise empty string is returned. The type of this attribute is - * character array with the length equal to the value of - * ::HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute. - */ - HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME = 4, - /** - * Agent associated with this symbol. If the symbol is a variable, the - * value of this attribute is only defined if - * ::HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION is - * ::HSA_VARIABLE_ALLOCATION_AGENT. The type of this attribute is hsa_agent_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_AGENT = 20, - /** - * The address of the variable. The value of this attribute is undefined if - * the symbol is not a variable. The type of this attribute is uint64_t. - * - * If executable's state is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 is - * returned. - */ - HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS = 21, - /** - * The linkage kind of the symbol. The type of this attribute is - * ::hsa_symbol_linkage_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE = 5, - /** - * Indicates whether the symbol corresponds to a definition. The type of this - * attribute is bool. - */ - HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION = 17, - /** - * The allocation kind of the variable. The value of this attribute is - * undefined if the symbol is not a variable. The type of this attribute is - * ::hsa_variable_allocation_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6, - /** - * The segment kind of the variable. The value of this attribute is undefined - * if the symbol is not a variable. The type of this attribute is - * ::hsa_variable_segment_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT = 7, - /** - * Alignment of the variable. The value of this attribute is undefined if - * the symbol is not a variable. The type of this attribute is uint32_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8, - /** - * Size of the variable. The value of this attribute is undefined if - * the symbol is not a variable. The type of this attribute is uint32_t. - * - * A value of 0 is returned if the variable is an external variable and has an - * unknown dimension. - */ - HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE = 9, - /** - * Indicates whether the variable is constant. The value of this attribute is - * undefined if the symbol is not a variable. The type of this attribute is - * bool. - */ - HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST = 10, - /** - * Kernel object handle, used in the kernel dispatch packet. The value of this - * attribute is undefined if the symbol is not a kernel. The type of this - * attribute is uint64_t. - * - * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 - * is returned. - */ - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT = 22, - /** - * Size of kernarg segment memory that is required to hold the values of the - * kernel arguments, in bytes. The value of this attribute is undefined if the - * symbol is not a kernel. The type of this attribute is uint32_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11, - /** - * Alignment (in bytes) of the buffer used to pass arguments to the kernel, - * which is the maximum of 16 and the maximum alignment of any of the kernel - * arguments. The value of this attribute is undefined if the symbol is not a - * kernel. The type of this attribute is uint32_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12, - /** - * Size of static group segment memory required by the kernel (per - * work-group), in bytes. The value of this attribute is undefined - * if the symbol is not a kernel. The type of this attribute is uint32_t. - * - * The reported amount does not include any dynamically allocated group - * segment memory that may be requested by the application when a kernel is - * dispatched. - */ - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13, - /** - * Size of static private, spill, and arg segment memory required by - * this kernel (per work-item), in bytes. The value of this attribute is - * undefined if the symbol is not a kernel. The type of this attribute is - * uint32_t. - * - * If the value of ::HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is - * true, the kernel may use more private memory than the reported value, and - * the application must add the dynamic call stack usage to @a - * private_segment_size when populating a kernel dispatch packet. - */ - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14, - /** - * Dynamic callstack flag. The value of this attribute is undefined if the - * symbol is not a kernel. The type of this attribute is bool. - * - * If this flag is set (the value is true), the kernel uses a dynamically - * sized call stack. This can happen if recursive calls, calls to indirect - * functions, or the HSAIL alloca instruction are present in the kernel. - */ - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15, - /** - * Indirect function object handle. The value of this attribute is undefined - * if the symbol is not an indirect function, or the associated agent does - * not support the Full Profile. The type of this attribute depends on the - * machine model: if machine model is small, then the type is uint32_t, if - * machine model is large, then the type is uint64_t. - * - * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 - * is returned. - */ - HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT = 23, - /** - * Call convention of the indirect function. The value of this attribute is - * undefined if the symbol is not an indirect function, or the associated - * agent does not support the Full Profile. The type of this attribute is - * uint32_t. - */ - HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16 -} hsa_executable_symbol_info_t; - -/** - * @brief Get the current value of an attribute for a given executable symbol. - * - * @param[in] executable_symbol Executable symbol. - * - * @param[in] attribute Attribute to query. - * - * @param[out] value Pointer to an application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid - * executable symbol attribute, or @p value is NULL. - */ -hsa_status_t HSA_API hsa_executable_symbol_get_info( - hsa_executable_symbol_t executable_symbol, - hsa_executable_symbol_info_t attribute, - void *value); - -/** - * @brief Iterate over the symbols in a executable, and invoke an - * application-defined callback on every iteration. - * - * @param[in] executable Executable. - * - * @param[in] callback Callback to be invoked once per executable symbol. The - * HSA runtime passes three arguments to the callback: the executable, a symbol, - * and the application data. If @p callback returns a status other than - * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and - * ::hsa_executable_iterate_symbols returns that status value. - * - * @param[in] data Application data that is passed to @p callback on every - * iteration. May be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE Th executable is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. - */ -hsa_status_t HSA_API hsa_executable_iterate_symbols( - hsa_executable_t executable, - hsa_status_t (*callback)(hsa_executable_t executable, hsa_executable_symbol_t symbol, void* data), - void* data); - -/** @} */ - -#ifdef __cplusplus -} // end extern "C" block -#endif - -#endif // header guard diff --git a/runtime/hsa-runtime/inc/hsa_api_trace.h b/runtime/hsa-runtime/inc/hsa_api_trace.h deleted file mode 100644 index 5bfba0c1cd..0000000000 --- a/runtime/hsa-runtime/inc/hsa_api_trace.h +++ /dev/null @@ -1,177 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_H -#define HSA_RUNTIME_INC_HSA_API_TRACE_H - -#include "hsa.h" -#ifdef AMD_INTERNAL_BUILD -#include "hsa_ext_image.h" -#include "hsa_ext_amd.h" -#include "hsa_ext_finalize.h" -#else -#include "inc/hsa_ext_image.h" -#include "inc/hsa_ext_amd.h" -#include "inc/hsa_ext_finalize.h" -#endif - -struct ExtTable { - decltype(hsa_ext_program_create)* hsa_ext_program_create_fn; - decltype(hsa_ext_program_destroy)* hsa_ext_program_destroy_fn; - decltype(hsa_ext_program_add_module)* hsa_ext_program_add_module_fn; - decltype(hsa_ext_program_iterate_modules)* hsa_ext_program_iterate_modules_fn; - decltype(hsa_ext_program_get_info)* hsa_ext_program_get_info_fn; - decltype(hsa_ext_program_finalize)* hsa_ext_program_finalize_fn; - decltype(hsa_ext_image_get_capability)* hsa_ext_image_get_capability_fn; - decltype(hsa_ext_image_data_get_info)* hsa_ext_image_data_get_info_fn; - decltype(hsa_ext_image_create)* hsa_ext_image_create_fn; - decltype(hsa_ext_image_import)* hsa_ext_image_import_fn; - decltype(hsa_ext_image_export)* hsa_ext_image_export_fn; - decltype(hsa_ext_image_copy)* hsa_ext_image_copy_fn; - decltype(hsa_ext_image_clear)* hsa_ext_image_clear_fn; - decltype(hsa_ext_image_destroy)* hsa_ext_image_destroy_fn; - decltype(hsa_ext_sampler_create)* hsa_ext_sampler_create_fn; - decltype(hsa_ext_sampler_destroy)* hsa_ext_sampler_destroy_fn; -}; - -struct ApiTable { - decltype(hsa_init)* hsa_init_fn; - decltype(hsa_shut_down)* hsa_shut_down_fn; - decltype(hsa_system_get_info)* hsa_system_get_info_fn; - decltype(hsa_system_extension_supported)* hsa_system_extension_supported_fn; - decltype(hsa_system_get_extension_table)* hsa_system_get_extension_table_fn; - decltype(hsa_iterate_agents)* hsa_iterate_agents_fn; - decltype(hsa_agent_get_info)* hsa_agent_get_info_fn; - decltype(hsa_queue_create)* hsa_queue_create_fn; - decltype(hsa_soft_queue_create)* hsa_soft_queue_create_fn; - decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; - decltype(hsa_queue_inactivate)* hsa_queue_inactivate_fn; - decltype(hsa_queue_load_read_index_acquire)* hsa_queue_load_read_index_acquire_fn; - decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn; - decltype(hsa_queue_load_write_index_acquire)* hsa_queue_load_write_index_acquire_fn; - decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn; - decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn; - decltype(hsa_queue_store_write_index_release)* hsa_queue_store_write_index_release_fn; - decltype(hsa_queue_cas_write_index_acq_rel)* hsa_queue_cas_write_index_acq_rel_fn; - decltype(hsa_queue_cas_write_index_acquire)* hsa_queue_cas_write_index_acquire_fn; - decltype(hsa_queue_cas_write_index_relaxed)* hsa_queue_cas_write_index_relaxed_fn; - decltype(hsa_queue_cas_write_index_release)* hsa_queue_cas_write_index_release_fn; - decltype(hsa_queue_add_write_index_acq_rel)* hsa_queue_add_write_index_acq_rel_fn; - decltype(hsa_queue_add_write_index_acquire)* hsa_queue_add_write_index_acquire_fn; - decltype(hsa_queue_add_write_index_relaxed)* hsa_queue_add_write_index_relaxed_fn; - decltype(hsa_queue_add_write_index_release)* hsa_queue_add_write_index_release_fn; - decltype(hsa_queue_store_read_index_relaxed)* hsa_queue_store_read_index_relaxed_fn; - decltype(hsa_queue_store_read_index_release)* hsa_queue_store_read_index_release_fn; - decltype(hsa_agent_iterate_regions)* hsa_agent_iterate_regions_fn; - decltype(hsa_region_get_info)* hsa_region_get_info_fn; - decltype(hsa_agent_get_exception_policies)* hsa_agent_get_exception_policies_fn; - decltype(hsa_agent_extension_supported)* hsa_agent_extension_supported_fn; - decltype(hsa_memory_register)* hsa_memory_register_fn; - decltype(hsa_memory_deregister)* hsa_memory_deregister_fn; - decltype(hsa_memory_allocate)* hsa_memory_allocate_fn; - decltype(hsa_memory_free)* hsa_memory_free_fn; - decltype(hsa_memory_copy)* hsa_memory_copy_fn; - decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn; - decltype(hsa_signal_create)* hsa_signal_create_fn; - decltype(hsa_signal_destroy)* hsa_signal_destroy_fn; - decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed_fn; - decltype(hsa_signal_load_acquire)* hsa_signal_load_acquire_fn; - decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn; - decltype(hsa_signal_store_release)* hsa_signal_store_release_fn; - decltype(hsa_signal_wait_relaxed)* hsa_signal_wait_relaxed_fn; - decltype(hsa_signal_wait_acquire)* hsa_signal_wait_acquire_fn; - decltype(hsa_signal_and_relaxed)* hsa_signal_and_relaxed_fn; - decltype(hsa_signal_and_acquire)* hsa_signal_and_acquire_fn; - decltype(hsa_signal_and_release)* hsa_signal_and_release_fn; - decltype(hsa_signal_and_acq_rel)* hsa_signal_and_acq_rel_fn; - decltype(hsa_signal_or_relaxed)* hsa_signal_or_relaxed_fn; - decltype(hsa_signal_or_acquire)* hsa_signal_or_acquire_fn; - decltype(hsa_signal_or_release)* hsa_signal_or_release_fn; - decltype(hsa_signal_or_acq_rel)* hsa_signal_or_acq_rel_fn; - decltype(hsa_signal_xor_relaxed)* hsa_signal_xor_relaxed_fn; - decltype(hsa_signal_xor_acquire)* hsa_signal_xor_acquire_fn; - decltype(hsa_signal_xor_release)* hsa_signal_xor_release_fn; - decltype(hsa_signal_xor_acq_rel)* hsa_signal_xor_acq_rel_fn; - decltype(hsa_signal_exchange_relaxed)* hsa_signal_exchange_relaxed_fn; - decltype(hsa_signal_exchange_acquire)* hsa_signal_exchange_acquire_fn; - decltype(hsa_signal_exchange_release)* hsa_signal_exchange_release_fn; - decltype(hsa_signal_exchange_acq_rel)* hsa_signal_exchange_acq_rel_fn; - decltype(hsa_signal_add_relaxed)* hsa_signal_add_relaxed_fn; - decltype(hsa_signal_add_acquire)* hsa_signal_add_acquire_fn; - decltype(hsa_signal_add_release)* hsa_signal_add_release_fn; - decltype(hsa_signal_add_acq_rel)* hsa_signal_add_acq_rel_fn; - decltype(hsa_signal_subtract_relaxed)* hsa_signal_subtract_relaxed_fn; - decltype(hsa_signal_subtract_acquire)* hsa_signal_subtract_acquire_fn; - decltype(hsa_signal_subtract_release)* hsa_signal_subtract_release_fn; - decltype(hsa_signal_subtract_acq_rel)* hsa_signal_subtract_acq_rel_fn; - decltype(hsa_signal_cas_relaxed)* hsa_signal_cas_relaxed_fn; - decltype(hsa_signal_cas_acquire)* hsa_signal_cas_acquire_fn; - decltype(hsa_signal_cas_release)* hsa_signal_cas_release_fn; - decltype(hsa_signal_cas_acq_rel)* hsa_signal_cas_acq_rel_fn; - decltype(hsa_isa_from_name)* hsa_isa_from_name_fn; - decltype(hsa_isa_get_info)* hsa_isa_get_info_fn; - decltype(hsa_isa_compatible)* hsa_isa_compatible_fn; - decltype(hsa_code_object_serialize)* hsa_code_object_serialize_fn; - decltype(hsa_code_object_deserialize)* hsa_code_object_deserialize_fn; - decltype(hsa_code_object_destroy)* hsa_code_object_destroy_fn; - decltype(hsa_code_object_get_info)* hsa_code_object_get_info_fn; - decltype(hsa_code_object_get_symbol)* hsa_code_object_get_symbol_fn; - decltype(hsa_code_symbol_get_info)* hsa_code_symbol_get_info_fn; - decltype(hsa_code_object_iterate_symbols)* hsa_code_object_iterate_symbols_fn; - decltype(hsa_executable_create)* hsa_executable_create_fn; - decltype(hsa_executable_destroy)* hsa_executable_destroy_fn; - decltype(hsa_executable_load_code_object)* hsa_executable_load_code_object_fn; - decltype(hsa_executable_freeze)* hsa_executable_freeze_fn; - decltype(hsa_executable_get_info)* hsa_executable_get_info_fn; - decltype(hsa_executable_global_variable_define)* hsa_executable_global_variable_define_fn; - decltype(hsa_executable_agent_global_variable_define)* hsa_executable_agent_global_variable_define_fn; - decltype(hsa_executable_readonly_variable_define)* hsa_executable_readonly_variable_define_fn; - decltype(hsa_executable_validate)* hsa_executable_validate_fn; - decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol_fn; - decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info_fn; - decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols_fn; - decltype(hsa_status_string)* hsa_status_string_fn; - - ExtTable* std_exts_; -}; - -#endif diff --git a/runtime/hsa-runtime/inc/hsa_ext_amd.h b/runtime/hsa-runtime/inc/hsa_ext_amd.h deleted file mode 100644 index bb32b05a0d..0000000000 --- a/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ /dev/null @@ -1,1183 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// HSA AMD extension. - -#ifndef HSA_RUNTIME_EXT_AMD_H_ -#define HSA_RUNTIME_EXT_AMD_H_ - -#include "hsa.h" -#include "hsa_ext_image.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * @brief Enumeration constants added to ::hsa_status_t. - * - * @remark Additions to hsa_status_t - */ -enum { - /** - * The memory pool is invalid. - */ - HSA_STATUS_ERROR_INVALID_MEMORY_POOL = 40 -}; - -/** - * @brief Agent attributes. - */ -typedef enum hsa_amd_agent_info_s { - /** - * Chip identifier. The type of this attribute is uint32_t. - */ - HSA_AMD_AGENT_INFO_CHIP_ID = 0xA000, - /** - * Size of a cacheline in bytes. The type of this attribute is uint32_t. - */ - HSA_AMD_AGENT_INFO_CACHELINE_SIZE = 0xA001, - /** - * The number of compute unit available in the agent. The type of this - * attribute is uint32_t. - */ - HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT = 0xA002, - /** - * The maximum clock frequency of the agent in MHz. The type of this - * attribute is uint32_t. - */ - HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY = 0xA003, - /** - * Internal driver node identifier. The type of this attribute is uint32_t. - */ - HSA_AMD_AGENT_INFO_DRIVER_NODE_ID = 0xA004, - /** - * Max number of watch points on memory address ranges to generate exception - * events when the watched addresses are accessed. - */ - HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS = 0xA005, - /** - * Agent BDF_ID, named LocationID in thunk. The type of this attribute is - * uint16_t. - */ - HSA_AMD_AGENT_INFO_BDFID = 0xA006 -} hsa_amd_agent_info_t; - -/** - * @brief Region attributes. - */ -typedef enum hsa_amd_region_info_s { - /** - * Determine if host can access the region. The type of this attribute - * is bool. - */ - HSA_AMD_REGION_INFO_HOST_ACCESSIBLE = 0xA000, - /** - * Base address of the region in flat address space. - */ - HSA_AMD_REGION_INFO_BASE = 0xA001, - /** - * Memory Interface width, the return value type is uint32_t - */ - HSA_AMD_REGION_INFO_BUS_WIDTH = 0xA002, - /** - * Max Memory Clock, the return value type is uint32_t - */ - HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY = 0xA003 -} hsa_amd_region_info_t; - -/** - * @brief Coherency attributes of fine grain region. - */ -typedef enum hsa_amd_coherency_type_s { - /** - * Coherent region. - */ - HSA_AMD_COHERENCY_TYPE_COHERENT = 0, - /** - * Non coherent region. - */ - HSA_AMD_COHERENCY_TYPE_NONCOHERENT = 1 -} hsa_amd_coherency_type_t; - -/** - * @brief Get the coherency type of the fine grain region of an agent. - * - * @param[in] agent A valid agent. - * - * @param[out] type Pointer to a memory location where the HSA runtime will - * store the coherency type of the fine grain region. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p type is NULL. - */ -hsa_status_t HSA_API hsa_amd_coherency_get_type(hsa_agent_t agent, - hsa_amd_coherency_type_t* type); - -/** - * @brief Set the coherency type of the fine grain region of an agent. - * Deprecated. This is supported on KV platforms. For backward compatibility - * other platforms will spuriously succeed. - * - * @param[in] agent A valid agent. - * - * @param[in] type The coherency type to be set. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p type is invalid. - */ -hsa_status_t HSA_API hsa_amd_coherency_set_type(hsa_agent_t agent, - hsa_amd_coherency_type_t type); - -/** - * @brief Structure containing profiling dispatch time information. - * - * Times are reported as ticks in the domain of the HSA system clock. - * The HSA system clock tick and frequency is obtained via hsa_system_get_info. - */ -typedef struct hsa_amd_profiling_dispatch_time_s { - /** - * Dispatch packet processing start time. - */ - uint64_t start; - /** - * Dispatch packet completion time. - */ - uint64_t end; -} hsa_amd_profiling_dispatch_time_t; - -/** - * @brief Enable or disable profiling capability of a queue. - * - * @param[in] queue A valid queue. - * - * @param[in] enable 1 to enable profiling. 0 to disable profiling. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL. - */ -hsa_status_t HSA_API - hsa_amd_profiling_set_profiler_enabled(hsa_queue_t* queue, int enable); - -/** - * @brief Retrieve packet processing time stamps. - * - * @param[in] agent The agent with which the signal was last used. For instance, - * if the profiled dispatch packet is dispatched on to queue Q, which was - * created on agent A, then this parameter must be A. - * - * @param[in] signal A signal used as the completion signal of the dispatch - * packet to retrieve time stamps from. This dispatch packet must have been - * issued to a queue with profiling enabled and have already completed. Also - * the signal must not have yet been used in any other packet following the - * completion of the profiled dispatch packet. - * - * @param[out] time Packet processing timestamps in the HSA system clock - * domain. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL The signal is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p time is NULL. - */ -hsa_status_t HSA_API hsa_amd_profiling_get_dispatch_time( - hsa_agent_t agent, hsa_signal_t signal, - hsa_amd_profiling_dispatch_time_t* time); - -/** - * @brief Computes the frequency ratio and offset between the agent clock and - * HSA system clock and converts the agent’s tick to HSA system domain tick. - * - * @param[in] agent The agent used to retrieve the agent_tick. It is user's - * responsibility to make sure the tick number is from this agent, otherwise, - * the behavior is undefined. - * - * @param[in] agent_tick The tick count retrieved from the specified @p agent. - * - * @param[out] system_tick The translated HSA system domain clock counter tick. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p system_tick is NULL; - */ -hsa_status_t HSA_API - hsa_amd_profiling_convert_tick_to_system_domain(hsa_agent_t agent, - uint64_t agent_tick, - uint64_t* system_tick); - -/** - * @brief Asyncronous signal handler function type. - * - * @details Type definition of callback function to be used with - * hsa_amd_signal_async_handler. This callback is invoked if the associated - * signal and condition are met. The callback receives the value of the signal - * which satisfied the associated wait condition and a user provided value. If - * the callback returns true then the callback will be called again if the - * associated signal and condition are satisfied again. If the callback returns - * false then it will not be called again. - * - * @param[in] value Contains the value of the signal observed by - * hsa_amd_signal_async_handler which caused the signal handler to be invoked. - * - * @param[in] arg Contains the user provided value given when the signal handler - * was registered with hsa_amd_signal_async_handler - * - * @retval true resumes monitoring the signal with this handler (as if calling - * hsa_amd_signal_async_handler again with identical parameters) - * - * @retval false stops monitoring the signal with this handler (handler will - * not be called again for this signal) - * - */ -typedef bool (*hsa_amd_signal_handler)(hsa_signal_value_t value, void* arg); - -/** - * @brief Register asynchronous signal handler function. - * - * @details Allows registering a callback function and user provided value with - * a signal and wait condition. The callback will be invoked if the associated - * signal and wait condition are satisfied. Callbacks will be invoked serially - * but in an arbitrary order so callbacks should be independent of each other. - * After being invoked a callback may continue to wait for its associated signal - * and condition and, possibly, be invoked again. Or the callback may stop - * waiting. If the callback returns true then it will continue waiting and may - * be called again. If false then the callback will not wait again and will not - * be called again for the associated signal and condition. It is possible to - * register the same callback multiple times with the same or different signals - * and/or conditions. Each registration of the callback will be treated entirely - * independently. - * - * @param[in] signal hsa signal to be asynchronously monitored - * - * @param[in] cond condition value to monitor for - * - * @param[in] value signal value used in condition expression - * - * @param[in] handler asynchronous signal handler invoked when signal's - * condition is met - * - * @param[in] arg user provided value which is provided to handler when handler - * is invoked - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL signal is not a valid hsa_signal_t - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT handler is invalid (NULL) - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is out of - * resources or blocking signals are not supported by the HSA driver component. - * - */ -hsa_status_t HSA_API - hsa_amd_signal_async_handler(hsa_signal_t signal, - hsa_signal_condition_t cond, - hsa_signal_value_t value, - hsa_amd_signal_handler handler, void* arg); - -/** - * @brief Call a function asynchronously - * - * @details Provides access to the runtime's asynchronous event handling thread - * for general asynchronous functions. Functions queued this way are executed - * in the same manner as if they were a signal handler who's signal is - * satisfied. - * - * @param[in] callback asynchronous function to be invoked - * - * @param[in] arg user provided value which is provided to handler when handler - * is invoked - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT handler is invalid (NULL) - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is out of - * resources or blocking signals are not supported by the HSA driver component. - * - */ -hsa_status_t HSA_API - hsa_amd_async_function(void (*callback)(void* arg), void* arg); - -/** - * @brief Wait for any signal-condition pair to be satisfied. - * - * @details Allows waiting for any of several signal and conditions pairs to be - * satisfied. The function returns the index into the list of signals of the - * first satisfying signal-condition pair. The value of the satisfying signal’s - * value is returned in satisfying_value unless satisfying_value is NULL. This - * function provides only relaxed memory semantics. - */ -uint32_t HSA_API - hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* signals, - hsa_signal_condition_t* conds, - hsa_signal_value_t* values, uint64_t timeout_hint, - hsa_wait_state_t wait_hint, - hsa_signal_value_t* satisfying_value); - -/** - * @brief Query image limits. - * - * @param[in] agent A valid agent. - * - * @param[in] attribute HSA image info attribute to query. - * - * @param[out] value Pointer to an application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE @p value is NULL or @p attribute < - * HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS or @p attribute > - * HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS. - * - */ -hsa_status_t HSA_API hsa_amd_image_get_info_max_dim(hsa_agent_t agent, - hsa_agent_info_t attribute, - void* value); - -/** - * @brief Set a CU affinity to specific queues within the process, this function - * call is "atomic". - * - * @param[in] queue A pointer to HSA queue. - * - * @param[in] num_cu_mask_count Size of CUMask bit array passed in. - * - * @param[in] cu_mask Bit-vector representing the CU mask. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE @p queue is NULL or invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_cu_mask_count is not - * multiple of 32 or @p cu_mask is NULL. - * - * @retval ::HSA_STATUS_ERROR failed to call thunk api - * - */ -hsa_status_t HSA_API hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue, - uint32_t num_cu_mask_count, - const uint32_t* cu_mask); - -/** - * @brief Memory segments associated with a memory pool. - */ -typedef enum { - /** - * Global segment. Used to hold data that is shared by all agents. - */ - HSA_AMD_SEGMENT_GLOBAL = 0, - /** - * Read-only segment. Used to hold data that remains constant during the - * execution of a kernel. - */ - HSA_AMD_SEGMENT_READONLY = 1, - /** - * Private segment. Used to hold data that is local to a single work-item. - */ - HSA_AMD_SEGMENT_PRIVATE = 2, - /** - * Group segment. Used to hold data that is shared by the work-items of a - * work-group. - */ - HSA_AMD_SEGMENT_GROUP = 3, -} hsa_amd_segment_t; - -/** - * @brief A memory pool represents physical storage on an agent. - */ -typedef struct hsa_amd_memory_pool_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_amd_memory_pool_t; - -typedef enum hsa_amd_memory_pool_global_flag_s { - /** - * The application can use allocations in the memory pool to store kernel - * arguments, and provide the values for the kernarg segment of - * a kernel dispatch. - */ - HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT = 1, - /** - * Updates to memory in this pool conform to HSA memory consistency model. - * If this flag is set, then ::HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED - * must not be set. - */ - HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED = 2, - /** - * Writes to memory in this pool can be performed by a single agent at a time. - */ - HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED = 4 -} hsa_amd_memory_pool_global_flag_t; - -/** - * @brief Memory pool features. - */ -typedef enum { - /** - * Segment where the memory pool resides. The type of this attribute is - * ::hsa_amd_segment_t. - */ - HSA_AMD_MEMORY_POOL_INFO_SEGMENT = 0, - /** - * Flag mask. The value of this attribute is undefined if the value of - * ::HSA_AMD_MEMORY_POOL_INFO_SEGMENT is not ::HSA_AMD_SEGMENT_GLOBAL. The type - * of - * this attribute is uint32_t, a bit-field of - * ::hsa_amd_memory_pool_global_flag_t - * values. - */ - HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS = 1, - /** - * Size of this pool, in bytes. The type of this attribute is size_t. - */ - HSA_AMD_MEMORY_POOL_INFO_SIZE = 2, - /** - * Indicates whether memory in this pool can be allocated using - * ::hsa_amd_memory_pool_allocate. The type of this attribute is bool. - * - * The value of this flag is always false for memory pools in the group and - * private segments. - */ - HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED = 5, - /** - * Allocation granularity of buffers allocated by - * ::hsa_amd_memory_pool_allocate - * in this memory pool. The size of a buffer allocated in this pool is a - * multiple of the value of this attribute. The value of this attribute is - * only defined if ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for - * this pool. The type of this attribute is size_t. - */ - HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6, - /** - * Alignment of buffers allocated by ::hsa_amd_memory_pool_allocate in this - * pool. The value of this attribute is only defined if - * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for this pool, and - * must be a power of 2. The type of this attribute is size_t. - */ - HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7, - /** - * This memory_pool can be made directly accessible by all the agents in the - * system (::hsa_amd_agent_memory_pool_get_info returns - * ::HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT for all agents). The type of - * this attribute is bool. - */ - HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15, -} hsa_amd_memory_pool_info_t; - -/** - * @brief Get the current value of an attribute of a memory pool. - * - * @param[in] memory_pool A valid memory pool. - * - * @param[in] attribute Attribute to query. - * - * @param[out] value Pointer to a application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - */ -hsa_status_t HSA_API - hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool, - hsa_amd_memory_pool_info_t attribute, - void* value); - -/** - * @brief Iterate over the memory pools associated with a given agent, and - * invoke an application-defined callback on every iteration. - * - * @details An agent can directly access buffers located in some memory pool, or - * be enabled to access them by the application (see ::hsa_amd_agents_allow_access), - * yet that memory pool may not be returned by this function for that given - * agent. - * - * A memory pool of fine-grained type must be associated only with the host. - * - * @param[in] agent A valid agent. - * - * @param[in] callback Callback to be invoked on the same thread that called - * ::hsa_amd_agent_iterate_memory_pools, serially, once per memory pool that is - * associated with the agent. The HSA runtime passes two arguments to the - * callback: the memory pool, and the application data. If @p callback - * returns a status other than ::HSA_STATUS_SUCCESS for a particular iteration, - * the traversal stops and ::hsa_amd_agent_iterate_memory_pools returns that status - * value. - * - * @param[in] data Application data that is passed to @p callback on every - * iteration. May be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. - */ -hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools( - hsa_agent_t agent, - hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data), - void* data); - -/** - * @brief Allocate a block of memory (or buffer) in the specified pool. - * - * @param[in] memory_pool Memory pool where to allocate memory from. The memory - * pool must have the ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED flag set. - * - * @param[in] size Allocation size, in bytes. Must not be zero. This value is - * rounded up to the nearest multiple of - * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE in @p memory_pool. - * - * @param[in] flags A bit-field that is used to specify allocation - * directives. Must be 0. - * - * @param[out] ptr Pointer to the location where to store the base virtual - * address of - * the allocated block. The returned base address is aligned to the value of - * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT in @p memory_pool. If the - * allocation fails, the returned value is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES No memory is available. - * - * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The memory pool is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to - * allocate memory in @p memory_pool, or @p size is greater than the value of - * HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0. - * - */ -hsa_status_t HSA_API - hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, size_t size, - uint32_t flags, void** ptr); - -/** - * @brief Deallocate a block of memory previously allocated using - * ::hsa_amd_memory_pool_allocate. - * - * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value - * previously returned by ::hsa_amd_memory_pool_allocate, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - */ -hsa_status_t HSA_API hsa_amd_memory_pool_free(void* ptr); - -/** - * @brief Asynchronously copy a block of memory from the location pointed to by - * @p src on the @p src_agent to the memory block pointed to by @p dst on the @p - * dst_agent. - * Because the DMA engines used may not be in the same coherency domain, the caller must ensure - * that buffers are system-level coherent. In general this requires the sending device to have - * released the buffer to system scope prior to executing the copy API and the receiving device - * must execute a system scope acquire fence prior to use of the destination buffer. - * - * @param[out] dst Buffer where the content is to be copied. - * - * @param[in] dst_agent Agent associated with the @p dst. The agent must be able to directly - * access both the source and destination buffers in their current locations. - * - * @param[in] src A valid pointer to the source of data to be copied. The source - * buffer must not overlap with the destination buffer, otherwise the copy will succeed - * but contents of @p dst is undefined. - * - * @param[in] src_agent Agent associated with the @p src. The agent must be able to directly - * access both the source and destination buffers in their current locations. - * - * @param[in] size Number of bytes to copy. If @p size is 0, no copy is - * performed and the function returns success. Copying a number of bytes larger - * than the size of the buffers pointed by @p dst or @p src results in undefined - * behavior. - * - * @param[in] num_dep_signals Number of dependent signals. Can be 0. - * - * @param[in] dep_signals List of signals that must be waited on before the copy - * operation starts. The copy will start after every signal has been observed with - * the value 0. The dependent signal should not include completion signal from hsa_amd_memory_async_copy - * operation to be issued in future as that can result in a deadlock. If @p num_dep_signals is 0, this - * argument is ignored. - * - * @param[in] completion_signal Signal used to indicate completion of the copy - * operation. When the copy operation is finished, the value of the signal is - * decremented. The runtime indicates that an error has occurred during the copy - * operation by setting the value of the completion signal to a negative - * number. The signal handle must not be 0. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. The - * application is responsible for checking for asynchronous error conditions - * (see the description of @p completion_signal). - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p completion_signal is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination - * pointers are NULL, or the completion signal is 0. - */ -hsa_status_t HSA_API - hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent, const void* src, - hsa_agent_t src_agent, size_t size, - uint32_t num_dep_signals, - const hsa_signal_t* dep_signals, - hsa_signal_t completion_signal); - -/** - * @brief Type of accesses to a memory pool from a given agent. - */ -typedef enum { - /** - * The agent cannot directly access any buffer in the memory pool. - */ - HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED = 0, - /** - * The agent can directly access a buffer located in the pool; the application - * does not need to invoke ::hsa_amd_agents_allow_access. - */ - HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT = 1, - /** - * The agent can directly access a buffer located in the pool, but only if the - * application has previously requested access to that buffer using - * ::hsa_amd_agents_allow_access. - */ - HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT = 2 -} hsa_amd_memory_pool_access_t; - -/** - * @brief Properties of the relationship between an agent a memory pool. - */ -typedef enum { - /** - * Hyper-transport bus type. - */ - HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0, - - /** - * QPI bus type. - */ - HSA_AMD_LINK_INFO_TYPE_QPI = 1, - - /** - * PCIe bus type. - */ - HSA_AMD_LINK_INFO_TYPE_PCIE = 2, - - /** - * Infiniband bus type. - */ - HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3 - -} hsa_amd_link_info_type_t; - -/** - * @brief Link properties when accessing the memory pool from the specified - * agent. - */ -typedef struct hsa_amd_memory_pool_link_info_s { - /** - * Minimum transfer latency (rounded to ns). - */ - uint32_t min_latency; - - /** - * Maximum transfer latency (rounded to ns). - */ - uint32_t max_latency; - - /** - * Minimum link interface bandwidth in MB/s. - */ - uint32_t min_bandwidth; - - /** - * Maximum link interface bandwidth in MB/s. - */ - uint32_t max_bandwidth; - - /** - * Support for 32-bit atomic transactions. - */ - bool atomic_support_32bit; - - /** - * Support for 64-bit atomic transactions. - */ - bool atomic_support_64bit; - - /** - * Support for cache coherent transactions. - */ - bool coherent_support; - - /** - * The type of bus/link. - */ - hsa_amd_link_info_type_t link_type; - -} hsa_amd_memory_pool_link_info_t; - -/** - * @brief Properties of the relationship between an agent a memory pool. - */ -typedef enum { - /** - * Access to buffers located in the memory pool. The type of this attribute - * is ::hsa_amd_memory_pool_access_t. - * - * An agent can always directly access buffers currently located in a memory - * pool that is associated (the memory_pool is one of the values returned by - * ::hsa_amd_agent_iterate_memory_pools on the agent) with that agent. If the - * buffer is currently located in a memory pool that is not associated with - * the agent, and the value returned by this function for the given - * combination of agent and memory pool is not - * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED, the application still needs to invoke - * ::hsa_amd_agents_allow_access in order to gain direct access to the buffer. - * - * If the given agent can directly access buffers the pool, the result is not - * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. If the memory pool is associated with - * the agent, or it is of fined-grained type, the result must not be - * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. If the memory pool is not associated - * with the agent, and does not reside in the global segment, the result must - * be HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. - */ - HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS = 0, - - /** - * Number of links to hop when accessing the memory pool from the specified - * agent. The type of this attribute is uint32_t. - */ - HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS = 1, - - /** - * Details of each link hop when accessing the memory pool starting from the - * specified agent. The type of this attribute is an array size of - * HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS with each element containing - * ::hsa_amd_memory_pool_link_info_t. - */ - HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO = 2 - -} hsa_amd_agent_memory_pool_info_t; - -/** - * @brief Get the current value of an attribute of the relationship between an - * agent and a memory pool. - * - * @param[in] agent Agent. - * - * @param[in] memory_pool Memory pool. - * - * @param[in] attribute Attribute to query. - * - * @param[out] value Pointer to a application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behavior is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - */ -hsa_status_t HSA_API hsa_amd_agent_memory_pool_get_info( - hsa_agent_t agent, hsa_amd_memory_pool_t memory_pool, - hsa_amd_agent_memory_pool_info_t attribute, void* value); - -/** - * @brief Enable direct access to a buffer from a given set of agents. - * - * @details - * - * Upon return, only the listed agents and the agent associated with the - * buffer's memory pool have direct access to the @p ptr. - * - * Any agent that has access to the buffer before and after the call to - * ::hsa_amd_agents_allow_access will also have access while - * ::hsa_amd_agents_allow_access is in progress. - * - * The caller is responsible for ensuring that each agent in the list - * must be able to access the memory pool containing @p ptr - * (using ::hsa_amd_agent_memory_pool_get_info with ::HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS attribute), - * otherwise error code is returned. - * - * @param[in] num_agents Size of @p agents. - * - * @param[in] agents List of agents. If @p num_agents is 0, this argument is - * ignored. - * - * @param[in] flags A list of bit-field that is used to specify access - * information in a per-agent basis. The size of this list must match that of @p - * agents. Must be NULL. - * - * @param[in] ptr A buffer previously allocated using ::hsa_amd_memory_pool_allocate. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_agents is 0, or @p agents - * is NULL, @p flags is NULL, or attempting to enable access to agent(s) because - * @p ptr is allocated from an inaccessible pool. - * - */ -hsa_status_t HSA_API - hsa_amd_agents_allow_access(uint32_t num_agents, const hsa_agent_t* agents, - const uint32_t* flags, const void* ptr); - -/** - * @brief Query if buffers currently located in some memory pool can be - * relocated to a destination memory pool. - * - * @details If the returned value is non-zero, a migration of a buffer to @p - * dst_memory_pool using ::hsa_amd_memory_migrate may nevertheless fail due to - * resource limitations. - * - * @param[in] src_memory_pool Source memory pool. - * - * @param[in] dst_memory_pool Destination memory pool. - * - * @param[out] result Pointer to a memory location where the result of the query - * is stored. Must not be NULL. If buffers currently located in @p - * src_memory_pool can be relocated to @p dst_memory_pool, the result is - * true. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL One of the memory pools is - * invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL. - */ -hsa_status_t HSA_API - hsa_amd_memory_pool_can_migrate(hsa_amd_memory_pool_t src_memory_pool, - hsa_amd_memory_pool_t dst_memory_pool, - bool* result); - -/** - * @brief Relocate a buffer to a new memory pool. - * - * @details When a buffer is migrated, its virtual address remains the same but - * its physical contents are moved to the indicated memory pool. - * - * After migration, only the agent associated with the destination pool will have access. - * - * The caller is also responsible for ensuring that the allocation in the - * source memory pool where the buffer is currently located can be migrated to the - * specified destination memory pool (using ::hsa_amd_memory_pool_can_migrate returns a value of true - * for the source and destination memory pools), otherwise behavior is undefined. - * - * The caller must ensure that the buffer is not accessed while it is migrated. - * - * @param[in] ptr Buffer to be relocated. The buffer must have been released to system - * prior to call this API. The buffer will be released to system upon completion. - * - * @param[in] memory_pool Memory pool where to place the buffer. - * - * @param[in] flags A bit-field that is used to specify migration - * information. Must be zero. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The destination memory pool is - * invalid. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in - * allocating the necessary resources. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p flags is not 0. - */ -hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr, - hsa_amd_memory_pool_t memory_pool, - uint32_t flags); - -/** - * - * @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and return a new - * pointer accessible by the @p agents. If the @p host_ptr overlaps with previously locked - * memory, then the overlap area is kept locked (i.e multiple mappings are permitted). In this case, - * the same input @p host_ptr may give different locked @p agent_ptr and when it does, they - * are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent). - * - * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator. - * - * @param[in] size The size to be locked. - * - * @param[in] agents Array of agent handle to gain access to the @p host_ptr. - * If this parameter is NULL and the @p num_agent is 0, all agents - * in the platform will gain access to the @p host_ptr. - * - * @param[out] agent_ptr Pointer to the location where to store the new address. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in - * allocating the necessary resources. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT One or more agent in @p agents is - * invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 or @p host_ptr or - * @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents - * is NULL but @p num_agent is not 0. - */ - -hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size, - hsa_agent_t* agents, int num_agent, - void** agent_ptr); - -/** - * - * @brief Unpin the host pointer previously pinned via ::hsa_amd_memory_lock. - * - * @details The behavior is undefined if the host pointer being unpinned does not - * match previous pinned address or if the host pointer was already deallocated. - * - * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator that was - * pinned previously via ::hsa_amd_memory_lock. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - */ -hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr); - -/** - * @brief Sets the first @p num of uint32_t of the block of memory pointed by - * @p ptr to the specified @p value. - * - * @param[in] ptr Pointer to the block of memory to fill. - * - * @param[in] value Value to be set. - * - * @param[in] count Number of uint32_t element to be set to the value. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL or - * not 4 bytes aligned - * - */ -hsa_status_t HSA_API - hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count); - -/** - * @brief Maps an interop object into the HSA flat address space and establishes - * memory residency. The metadata pointer is valid during the lifetime of the - * map (until hsa_amd_interop_unmap_buffer is called). - * Multiple calls to hsa_amd_interop_map_buffer with the same interop_handle - * result in multiple mappings with potentially different addresses and - * different metadata pointers. Concurrent operations on these addresses are - * not coherent. Memory must be fenced to system scope to ensure consistency, - * between mappings and with any views of this buffer in the originating - * software stack. - * - * @param[in] num_agents Number of agents which require access to the memory - * - * @param[in] agents List of accessing agents. - * - * @param[in] interop_handle Handle of interop buffer (dmabuf handle in Linux) - * - * @param [in] flags Reserved, must be 0 - * - * @param[out] size Size in bytes of the mapped object - * - * @param[out] ptr Base address of the mapped object - * - * @param[out] metadata_size Size of metadata in bytes, may be NULL - * - * @param[out] metadata Pointer to metadata, may be NULL - * - * @retval HSA_STATUS_SUCCESS if successfully mapped - * - * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized - * - * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating - * necessary resources - * - * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT all other errors - */ -hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents, - hsa_agent_t* agents, - int interop_handle, - uint32_t flags, - size_t* size, - void** ptr, - size_t* metadata_size, - const void** metadata); - -/** - * @brief Removes a previously mapped interop object from HSA's flat address space. - * Ends lifetime for the mapping's associated metadata pointer. - */ -hsa_status_t HSA_API hsa_amd_interop_unmap_buffer(void* ptr); - -/** - * @brief Encodes an opaque vendor specific image format. The length of data - * depends on the underlying format. This structure must not be copied as its - * true length can not be determined. - */ -typedef struct hsa_amd_image_descriptor_s { - /* - Version number of the descriptor - */ - uint32_t version; - - /* - Vendor and device PCI IDs for the format as VENDOR_ID<<16|DEVICE_ID. - */ - uint32_t deviceID; - - /* - Start of vendor specific data. - */ - uint32_t data[0]; -} hsa_amd_image_descriptor_t; - -/** - * @brief Creates an image from an opaque vendor specific image format. - * Does not modify data at image_data. Intended initially for - * accessing interop images. - * - * @param agent[in] Agent on which to create the image - * - * @param[in] image_descriptor[in] Vendor specific image format - * - * @param[in] image_data Pointer to image backing store - * - * @param[in] access_permission Access permissions for the image object - * - * @param[out] image Created image object. - * - * @retval HSA_STATUS_SUCCESS Image created successfully - * - * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized - * - * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating - * necessary resources - * - * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT Bad or mismatched descriptor, - * null image_data, or mismatched access_permission. - */ -hsa_status_t HSA_API hsa_amd_image_create( - hsa_agent_t agent, - const hsa_ext_image_descriptor_t *image_descriptor, - const hsa_amd_image_descriptor_t *image_layout, - const void *image_data, - hsa_access_permission_t access_permission, - hsa_ext_image_t *image -); - -#ifdef __cplusplus -} // end extern "C" block -#endif - -#endif // header guard diff --git a/runtime/hsa-runtime/inc/hsa_ext_finalize.h b/runtime/hsa-runtime/inc/hsa_ext_finalize.h deleted file mode 100644 index 1aeb92d0bb..0000000000 --- a/runtime/hsa-runtime/inc/hsa_ext_finalize.h +++ /dev/null @@ -1,531 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_ -#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_ - -#include "hsa.h" - -#undef HSA_API -#ifdef HSA_EXPORT_FINALIZER -#define HSA_API HSA_API_EXPORT -#else -#define HSA_API HSA_API_IMPORT -#endif - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -struct BrigModuleHeader; -typedef struct BrigModuleHeader* BrigModule_t; - -/** \defgroup ext-alt-finalizer-extensions Finalization Extensions - * @{ - */ - -/** - * @brief Enumeration constants added to ::hsa_status_t by this extension. - */ -enum { - /** - * The HSAIL program is invalid. - */ - HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000, - /** - * The HSAIL module is invalid. - */ - HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001, - /** - * Machine model or profile of the HSAIL module do not match the machine model - * or profile of the HSAIL program. - */ - HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002, - /** - * The HSAIL module is already a part of the HSAIL program. - */ - HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003, - /** - * Compatibility mismatch between symbol declaration and symbol definition. - */ - HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004, - /** - * The finalization encountered an error while finalizing a kernel or - * indirect function. - */ - HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005, - /** - * Mismatch between a directive in the control directive structure and in - * the HSAIL kernel. - */ - HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006 -}; - -/** @} */ - -/** \defgroup ext-alt-finalizer-program Finalization Program - * @{ - */ - -/** - * @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains - * the definition of the BrigModule_t type. - */ -typedef BrigModule_t hsa_ext_module_t; - -/** - * @brief An opaque handle to a HSAIL program, which groups a set of HSAIL - * modules that collectively define functions and variables used by kernels and - * indirect functions. - */ -typedef struct hsa_ext_program_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_ext_program_t; - -/** - * @brief Create an empty HSAIL program. - * - * @param[in] machine_model Machine model used in the HSAIL program. - * - * @param[in] profile Profile used in the HSAIL program. - * - * @param[in] default_float_rounding_mode Default float rounding mode used in - * the HSAIL program. - * - * @param[in] options Vendor-specific options. May be NULL. - * - * @param[out] program Memory location where the HSA runtime stores the newly - * created HSAIL program handle. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate - * resources required for the operation. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid, - * @p profile is invalid, @p default_float_rounding_mode is invalid, or - * @p program is NULL. - */ -hsa_status_t HSA_API hsa_ext_program_create( - hsa_machine_model_t machine_model, - hsa_profile_t profile, - hsa_default_float_rounding_mode_t default_float_rounding_mode, - const char *options, - hsa_ext_program_t *program); - -/** - * @brief Destroy a HSAIL program. - * - * @details The HSAIL program handle becomes invalid after it has been - * destroyed. Code object handles produced by ::hsa_ext_program_finalize are - * still valid after the HSAIL program has been destroyed, and can be used as - * intended. Resources allocated outside and associated with the HSAIL program - * (such as HSAIL modules that are added to the HSAIL program) can be released - * after the finalization program has been destroyed. - * - * @param[in] program HSAIL program. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is - * invalid. - */ -hsa_status_t HSA_API hsa_ext_program_destroy( - hsa_ext_program_t program); - -/** - * @brief Add a HSAIL module to an existing HSAIL program. - * - * @details The HSA runtime does not perform a deep copy of the HSAIL module - * upon addition. Instead, it stores a pointer to the HSAIL module. The - * ownership of the HSAIL module belongs to the application, which must ensure - * that @p module is not released before destroying the HSAIL program. - * - * The HSAIL module is successfully added to the HSAIL program if @p module is - * valid, if all the declarations and definitions for the same symbol are - * compatible, and if @p module specify machine model and profile that matches - * the HSAIL program. - * - * @param[in] program HSAIL program. - * - * @param[in] module HSAIL module. The application can add the same HSAIL module - * to @p program at most once. The HSAIL module must specify the same machine - * model and profile as @p program. If the floating-mode rounding mode of @p - * module is not default, then it should match that of @p program. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate - * resources required for the operation. - * - * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid. - * - * @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid. - * - * @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p - * module does not match machine model of @p program, or the profile of @p - * module does not match profile of @p program. - * - * @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is - * already a part of the HSAIL program. - * - * @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol - * definition compatibility mismatch. See the symbol compatibility rules in the - * HSA Programming Reference Manual. - */ -hsa_status_t HSA_API hsa_ext_program_add_module( - hsa_ext_program_t program, - hsa_ext_module_t module); - -/** - * @brief Iterate over the HSAIL modules in a program, and invoke an - * application-defined callback on every iteration. - * - * @param[in] program HSAIL program. - * - * @param[in] callback Callback to be invoked once per HSAIL module in the - * program. The HSA runtime passes three arguments to the callback: the program, - * a HSAIL module, and the application data. If @p callback returns a status - * other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal - * stops and ::hsa_ext_program_iterate_modules returns that status value. - * - * @param[in] data Application data that is passed to @p callback on every - * iteration. May be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. - */ -hsa_status_t HSA_API hsa_ext_program_iterate_modules( - hsa_ext_program_t program, - hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module, - void* data), - void* data); - -/** - * @brief HSAIL program attributes. - */ -typedef enum { - /** - * Machine model specified when the HSAIL program was created. The type - * of this attribute is ::hsa_machine_model_t. - */ - HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0, - /** - * Profile specified when the HSAIL program was created. The type of - * this attribute is ::hsa_profile_t. - */ - HSA_EXT_PROGRAM_INFO_PROFILE = 1, - /** - * Default float rounding mode specified when the HSAIL program was - * created. The type of this attribute is ::hsa_default_float_rounding_mode_t. - */ - HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2 -} hsa_ext_program_info_t; - -/** - * @brief Get the current value of an attribute for a given HSAIL program. - * - * @param[in] program HSAIL program. - * - * @param[in] attribute Attribute to query. - * - * @param[out] value Pointer to an application-allocated buffer where to store - * the value of the attribute. If the buffer passed by the application is not - * large enough to hold the value of @p attribute, the behaviour is undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid - * HSAIL program attribute, or @p value is NULL. - */ -hsa_status_t HSA_API hsa_ext_program_get_info( - hsa_ext_program_t program, - hsa_ext_program_info_t attribute, - void *value); - -/** - * @brief Finalizer-determined call convention. - */ -typedef enum { - /** - * Finalizer-determined call convention. - */ - HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1 -} hsa_ext_finalizer_call_convention_t; - -/** - * @brief Control directives specify low-level information about the - * finalization process. - */ -typedef struct hsa_ext_control_directives_s { - /** - * Bitset indicating which control directives are enabled. The bit assigned to - * a control directive is determined by the corresponding value in - * BrigControlDirective. - * - * If a control directive is disabled, its corresponding field value (if any) - * must be 0. Control directives that are only present or absent (such as - * partial workgroups) have no corresponding field as the presence of the bit - * in this mask is sufficient. - */ - uint64_t control_directives_mask; - /** - * Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit - * assigned to an HSAIL exception is determined by the corresponding value - * in BrigExceptionsMask. If the kernel contains a enablebreakexceptions - * control directive, the finalizer uses the union of the two masks. - */ - uint16_t break_exceptions_mask; - /** - * Bitset of HSAIL exceptions that must have the DETECT policy enabled. The - * bit assigned to an HSAIL exception is determined by the corresponding value - * in BrigExceptionsMask. If the kernel contains a enabledetectexceptions - * control directive, the finalizer uses the union of the two masks. - */ - uint16_t detect_exceptions_mask; - /** - * Maximum size (in bytes) of dynamic group memory that will be allocated by - * the application for any dispatch of the kernel. If the kernel contains a - * maxdynamicsize control directive, the two values should match. - */ - uint32_t max_dynamic_group_size; - /** - * Maximum number of grid work-items that will be used by the application to - * launch the kernel. If the kernel contains a maxflatgridsize control - * directive, the value of @a max_flat_grid_size must not be greater than the - * value of the directive, and takes precedence. - * - * The value specified for maximum absolute grid size must be greater than or - * equal to the product of the values specified by @a required_grid_size. - * - * If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a - * control_directives_mask, this field must be greater than 0. - */ - uint64_t max_flat_grid_size; - /** - * Maximum number of work-group work-items that will be used by the - * application to launch the kernel. If the kernel contains a - * maxflatworkgroupsize control directive, the value of @a - * max_flat_workgroup_size must not be greater than the value of the - * directive, and takes precedence. - * - * The value specified for maximum absolute grid size must be greater than or - * equal to the product of the values specified by @a required_workgroup_size. - * - * If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a - * control_directives_mask, this field must be greater than 0. - */ - uint32_t max_flat_workgroup_size; - /** - * Reserved. Must be 0. - */ - uint32_t reserved1; - /** - * Grid size that will be used by the application in any dispatch of the - * kernel. If the kernel contains a requiredgridsize control directive, the - * dimensions should match. - * - * The specified grid size must be consistent with @a required_workgroup_size - * and @a required_dim. Also, the product of the three dimensions must not - * exceed @a max_flat_grid_size. Note that the listed invariants must hold - * only if all the corresponding control directives are enabled. - * - * If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a - * control_directives_mask, the three dimension values must be greater than 0. - */ - uint64_t required_grid_size[3]; - /** - * Work-group size that will be used by the application in any dispatch of the - * kernel. If the kernel contains a requiredworkgroupsize control directive, - * the dimensions should match. - * - * The specified work-group size must be consistent with @a required_grid_size - * and @a required_dim. Also, the product of the three dimensions must not - * exceed @a max_flat_workgroup_size. Note that the listed invariants must - * hold only if all the corresponding control directives are enabled. - * - * If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a - * control_directives_mask, the three dimension values must be greater than 0. - */ - hsa_dim3_t required_workgroup_size; - /** - * Number of dimensions that will be used by the application to launch the - * kernel. If the kernel contains a requireddim control directive, the two - * values should match. - * - * The specified dimensions must be consistent with @a required_grid_size and - * @a required_workgroup_size. This invariant must hold only if all the - * corresponding control directives are enabled. - * - * If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a - * control_directives_mask, this field must be 1, 2, or 3. - */ - uint8_t required_dim; - /** - * Reserved. Must be 0. - */ - uint8_t reserved2[75]; -} hsa_ext_control_directives_t; - -/** - * @brief Finalize an HSAIL program for a given instruction set architecture. - * - * @details Finalize all of the kernels and indirect functions that belong to - * the same HSAIL program for a specific instruction set architecture (ISA). The - * transitive closure of all functions specified by call or scall must be - * defined. Kernels and indirect functions that are being finalized must be - * defined. Kernels and indirect functions that are referenced in kernels and - * indirect functions being finalized may or may not be defined, but must be - * declared. All the global/readonly segment variables that are referenced in - * kernels and indirect functions being finalized may or may not be defined, but - * must be declared. - * - * @param[in] program HSAIL program. - * - * @param[in] isa Instruction set architecture to finalize for. - * - * @param[in] call_convention A call convention used in a finalization. Must - * have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive) - * and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p - * isa (not inclusive). - * - * @param[in] control_directives Low-level control directives that influence - * the finalization process. - * - * @param[in] options Vendor-specific options. May be NULL. - * - * @param[in] code_object_type Type of code object to produce. - * - * @param[out] code_object Code object generated by the Finalizer, which - * contains the machine code for the kernels and indirect functions in the HSAIL - * program. The code object is independent of the HSAIL module that was used to - * generate it. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate - * resources required for the operation. - * - * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is - * invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid. - * - * @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in - * the control directive structure and in the HSAIL kernel mismatch, or if the - * same directive is used with a different value in one of the functions used by - * this kernel. - * - * @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer - * encountered an error while compiling a kernel or an indirect function. - */ -hsa_status_t HSA_API hsa_ext_program_finalize( - hsa_ext_program_t program, - hsa_isa_t isa, - int32_t call_convention, - hsa_ext_control_directives_t control_directives, - const char *options, - hsa_code_object_type_t code_object_type, - hsa_code_object_t *code_object); - -/** @} */ - -#define hsa_ext_finalizer_1_00 - -typedef struct hsa_ext_finalizer_1_00_pfn_s { - hsa_status_t (*hsa_ext_program_create)( - hsa_machine_model_t machine_model, hsa_profile_t profile, - hsa_default_float_rounding_mode_t default_float_rounding_mode, - const char *options, hsa_ext_program_t *program); - - hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program); - - hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program, - hsa_ext_module_t module); - - hsa_status_t (*hsa_ext_program_iterate_modules)( - hsa_ext_program_t program, - hsa_status_t (*callback)(hsa_ext_program_t program, - hsa_ext_module_t module, void *data), - void *data); - - hsa_status_t (*hsa_ext_program_get_info)( - hsa_ext_program_t program, hsa_ext_program_info_t attribute, - void *value); - - hsa_status_t (*hsa_ext_program_finalize)( - hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention, - hsa_ext_control_directives_t control_directives, const char *options, - hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object); -} hsa_ext_finalizer_1_00_pfn_t; - -#ifdef __cplusplus -} // extern "C" block -#endif // __cplusplus - -#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_ diff --git a/runtime/hsa-runtime/inc/hsa_ext_image.h b/runtime/hsa-runtime/inc/hsa_ext_image.h deleted file mode 100644 index 4bc9999e46..0000000000 --- a/runtime/hsa-runtime/inc/hsa_ext_image.h +++ /dev/null @@ -1,964 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef HSA_EXT_IMAGE_H -#define HSA_EXT_IMAGE_H - -#include "hsa.h" - -#undef HSA_API -#ifdef HSA_EXPORT_IMAGES -#define HSA_API HSA_API_EXPORT -#else -#define HSA_API HSA_API_IMPORT -#endif - -#ifdef __cplusplus -extern "C" { -#endif /*__cplusplus*/ - -/** \defgroup ext-images Images and Samplers - * @{ - */ - -/** - * @brief Image handle, populated by ::hsa_ext_image_create. Images - * handles are only unique within an agent, not across agents. - * - */ -typedef struct hsa_ext_image_s { - /** - * Opaque handle. - */ - uint64_t handle; - -} hsa_ext_image_t; - -/** - * @brief Geometry associated with the HSA image (image dimensions allowed in - * HSA). The enumeration values match the BRIG type BrigImageGeometry. - */ -typedef enum { - /** - * One-dimensional image addressed by width coordinate. - */ - HSA_EXT_IMAGE_GEOMETRY_1D = 0, - - /** - * Two-dimensional image addressed by width and height coordinates. - */ - HSA_EXT_IMAGE_GEOMETRY_2D = 1, - - /** - * Three-dimensional image addressed by width, height, and depth coordinates. - */ - HSA_EXT_IMAGE_GEOMETRY_3D = 2, - - /** - * Array of one-dimensional images with the same size and format. 1D arrays - * are addressed by index and width coordinate. - */ - HSA_EXT_IMAGE_GEOMETRY_1DA = 3, - - /** - * Array of two-dimensional images with the same size and format. 2D arrays - * are addressed by index and width and height coordinates. - */ - HSA_EXT_IMAGE_GEOMETRY_2DA = 4, - - /** - * One-dimensional image interpreted as a buffer with specific restrictions. - */ - HSA_EXT_IMAGE_GEOMETRY_1DB = 5, - - /** - * Two-dimensional depth image addressed by width and height coordinates. - */ - HSA_EXT_IMAGE_GEOMETRY_2DDEPTH = 6, - - /** - * Array of two-dimensional depth images with the same size and format. 2D - * arrays are addressed by index and width and height coordinates. - */ - HSA_EXT_IMAGE_GEOMETRY_2DADEPTH = 7 -} hsa_ext_image_geometry_t; - -/** - * @brief Channel type associated with the elements of an image. See the Image - * section in the HSA Programming Reference Manual for definitions on each - * component type. The enumeration values match the BRIG type - * BrigImageChannelType. - */ -typedef enum { - HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0, - HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1, - HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2, - HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3, - HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4, - HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5, - HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6, - HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7, - HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8, - HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9, - HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10, - HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11, - HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12, - HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13, - HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14, - HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15 -} hsa_ext_image_channel_type_t; - -/** - * - * @brief Channel order associated with the elements of an image. See the - * Image section in the HSA Programming Reference Manual for definitions on each - * component order. The enumeration values match the BRIG type - * BrigImageChannelOrder. - */ -typedef enum { - HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0, - HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1, - HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2, - HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3, - HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4, - HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5, - HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6, - HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7, - HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8, - HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9, - HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10, - HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11, - HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12, - HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13, - HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14, - HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15, - HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16, - HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17, - HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18, - HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19 -} hsa_ext_image_channel_order_t; - -/** - * @brief Image format. - */ -typedef struct hsa_ext_image_format_s { - /** - * Channel type. - */ - hsa_ext_image_channel_type_t channel_type; - - /** - * Channel order. - */ - hsa_ext_image_channel_order_t channel_order; -} hsa_ext_image_format_t; - -/** - * @brief Implementation-independent image descriptor. - */ -typedef struct hsa_ext_image_descriptor_s { - /** - * Image geometry. - */ - hsa_ext_image_geometry_t geometry; - /** - * Width of the image, in components. - */ - size_t width; - /** - * Height of the image, in components. Only defined if the geometry is 2D or - * higher. - */ - size_t height; - /** - * Depth of the image, in components. Only defined if @a geometry is - * ::HSA_EXT_IMAGE_GEOMETRY_3D. A depth of 0 is same as a depth of 1. - */ - size_t depth; - /** - * Number of images in the image array. Only defined if @a geometry is - * ::HSA_EXT_IMAGE_GEOMETRY_1DA, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or - * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH. - */ - size_t array_size; - /** - * Image format. - */ - hsa_ext_image_format_t format; -} hsa_ext_image_descriptor_t; - -/** - * @brief Image capability. - */ -typedef enum { - /** - * Images of this geometry and format are not supported in the agent. - */ - HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED = 0x0, - /** - * Read-only images of this geometry and format are supported by the - * agent. - */ - HSA_EXT_IMAGE_CAPABILITY_READ_ONLY = 0x1, - /** - * Write-only images of this geometry and format are supported by the - * agent. - */ - HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY = 0x2, - /** - * Read-write images of this geometry and format are supported by the - * agent. - */ - HSA_EXT_IMAGE_CAPABILITY_READ_WRITE = 0x4, - /** - * Images of this geometry and format can be accessed from read-modify-write - * operations in the agent. - */ - HSA_EXT_IMAGE_CAPABILITY_READ_MODIFY_WRITE = 0x8, - /** - * Images of this geometry and format are guaranteed to have a consistent - * data layout regardless of how they are accessed by the associated - * agent. - */ - HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT = 0x10 -} hsa_ext_image_capability_t; - -/** - * @brief Retrieve the supported image capabilities for a given combination of - * agent, image format and geometry. - * - * @param[in] agent Agent to be associated with the image. - * - * @param[in] geometry Geometry. - * - * @param[in] image_format Pointer to an image format. Must not be NULL. - * - * @param[out] capability_mask Pointer to a memory location where the HSA - * runtime stores a bit-mask of supported image capability - * (::hsa_ext_image_capability_t) values. Must not be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p geometry is not a valid image - * geometry value, @p image_format is NULL, or @p capability_mask is NULL. - */ -hsa_status_t HSA_API - hsa_ext_image_get_capability(hsa_agent_t agent, - hsa_ext_image_geometry_t geometry, - const hsa_ext_image_format_t *image_format, - uint32_t *capability_mask); - -/** - * @brief Agent-specific image size and alignment requirements, populated by - * ::hsa_ext_image_data_get_info. - */ -typedef struct hsa_ext_image_data_info_s { - /** - * Image data size, in bytes. - */ - size_t size; - - /** - * Image data alignment, in bytes. - */ - size_t alignment; - -} hsa_ext_image_data_info_t; - -/** - * @brief Retrieve the image data requirements for a given combination of image - * descriptor, access permission, and agent. - * - * @details The optimal image data size and alignment requirements may vary - * depending on the image attributes specified in @p image_descriptor. Also, - * different implementation of the HSA runtime may return different requirements - * for the same input values. - * - * The implementation must return the same image data requirements for different - * access permissions with exactly the same image descriptor as long as - * ::hsa_ext_image_get_capability reports - * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT for the geometry - * and image format contained in the image descriptor. - * - * @param[in] agent Agent to be associated with the image. - * - * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL. - * - * @param[in] access_permission Image access mode for @a agent. - * - * @param[out] image_data_info Memory location where the runtime stores the - * size and alignment requirements. Must not be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent does - * not support the image format specified by the descriptor. - * - * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent does - * not support the image dimensions specified by the format descriptor. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p - * access_permission is not a valid access permission value, or @p - * image_data_info is NULL. - */ -hsa_status_t HSA_API hsa_ext_image_data_get_info( - hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor, - hsa_access_permission_t access_permission, - hsa_ext_image_data_info_t *image_data_info); - -/** - * @brief Creates a agent-defined image handle from an - * implementation-independent image descriptor and a agent-specific image - * data. - * - * @details Image created with different access permissions but the same image - * descriptor can share the same image data if - * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT is reported by - * ::hsa_ext_image_get_capability for the image format specified in the image - * descriptor. Images with a s-form channel order can share the same image data - * with other images that have the corresponding non-s-form channel order, - * provided the rest of their image descriptors are identical. - * - * If necessary, an application can use image operations (import, export, copy, - * clear) to prepare the image for the intended use regardless of the access - * permissions. - * - * @param[in] agent agent to be associated with the image. - * - * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL. - * - * @param[in] image_data Image data buffer that must have been allocated - * according to the size and alignment requirements dictated by - * ::hsa_ext_image_data_get_info. Must not be NULL. - * - * Any previous memory contents are preserved upon creation. The application is - * responsible for ensuring that the lifetime of the image data exceeds that of - * all the associated images. - * - * @param[in] access_permission Access permission of the image by the - * agent. The access permission defines how the agent expects to use the - * image and must match the corresponding HSAIL image handle type. The agent - * must support the image format specified in @p image_descriptor for the given - * permission. - * - * @param[out] image Pointer to a memory location where the HSA runtime stores - * the newly created image handle. Must not be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent does - * not have the capability to support the image format contained in the image - * descriptor using the specified access permission. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime cannot create the - * image because it is out of resources (for example, the agent does not - * support the creation of more image handles with the given access permission). - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p - * image_data is NULL, @p access_permission is not a valid access permission - * value, or @p image is NULL. - */ -hsa_status_t HSA_API - hsa_ext_image_create(hsa_agent_t agent, - const hsa_ext_image_descriptor_t *image_descriptor, - const void *image_data, - hsa_access_permission_t access_permission, - hsa_ext_image_t *image); - -/** - * @brief Destroy an image previously created using ::hsa_ext_image_create. - * - * @details Destroying the image handle does not free the associated image data, - * or modify its contents. The application should not destroy an image while - * there are references to it queued for execution or currently being used in a - * kernel. - * - * @param[in] agent Agent associated with the image. - * - * @param[in] image Image. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - */ -hsa_status_t HSA_API - hsa_ext_image_destroy(hsa_agent_t agent, hsa_ext_image_t image); - -/** - * @brief Copies a portion of one image (the source) to another image (the - * destination). - * - * @details The source and destination image formats should match, except if the - * channel type of one of the images is the standard form of the channel type of - * the other image. For example, it is allowed to copy a source image with a - * channel type of HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB to a destination image with - * a channel type of HSA_EXT_IMAGE_CHANNEL_ORDER_RGB. - * - * The source and destination images do not have to be of the same geometry and - * appropriate scaling is performed by the HSA runtime. It is possible to copy - * subregions between any combinations of source and destination types, provided - * that the dimensions of the subregions are the same. For example, it is - * allowed to copy a rectangular region from a 2D image to a slice of a 3D - * image. - * - * If the source and destination image data overlap, or the combination of - * offset and range references an out-out-bounds element in any of the images, - * the behavior is undefined. - * - * @param[in] agent Agent associated with both images. - * - * @param[in] src_image Source image. The agent associated with the source - * image must be identical to that of the destination image. - * - * @param[in] src_offset Pointer to the offset within the source image where to - * copy the data from. Must not be NULL. - * - * @param[in] dst_image Destination image. - * - * @param[in] dst_offset Pointer to the offset within the destination - * image where to copy the data. Must not be NULL. - * - * @param[in] range Dimensions of the image portion to be copied. The HSA - * runtime computes the size of the image data to be copied using this - * argument. Must not be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p src_offset is - * NULL, @p dst_offset is NULL, or @p range is NULL. - */ -hsa_status_t HSA_API - hsa_ext_image_copy(hsa_agent_t agent, hsa_ext_image_t src_image, - const hsa_dim3_t *src_offset, hsa_ext_image_t dst_image, - const hsa_dim3_t *dst_offset, const hsa_dim3_t *range); - -/** - * @brief Image region. - */ -typedef struct hsa_ext_image_region_s { - /** - * Offset within an image (in coordinates). - */ - hsa_dim3_t offset; - - /** - * Dimensions of the image range (in coordinates). The x, y, and z dimensions - * correspond to width, height, and depth respectively. - */ - hsa_dim3_t range; -} hsa_ext_image_region_t; - -/** - * @brief Import a linearly organized image data from memory directly to an - * image handle. - * - * @details This operation updates the image data referenced by the image handle - * from the source memory. The size of the data imported from memory is - * implicitly derived from the image region. - * - * If @p src_row_pitch is smaller than the destination region width (in bytes), - * then @p src_row_pitch = region width. - * - * If @p src_slice_pitch is smaller than the destination region width * region - * height (in bytes), then @p src_slice_pitch = region width * region height. - * - * It is the application's responsibility to avoid out of bounds memory access. - * - * None of the source memory or image data memory in the previously created - * ::hsa_ext_image_create image handle can overlap. Overlapping of any - * of the source and destination memory within the import operation produces - * undefined results. - * - * @param[in] agent Agent associated with the image. - * - * @param[in] src_memory Source memory. Must not be NULL. - * - * @param[in] src_row_pitch Number of bytes in one row of the source memory. - * - * @param[in] src_slice_pitch Number of bytes in one slice of the source memory. - * - * @param[in] dst_image Destination image. - * - * @param[in] image_region Pointer to the image region to be updated. Must not - * be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p src_memory is NULL, or @p - * image_region is NULL. - * - */ -hsa_status_t HSA_API - hsa_ext_image_import(hsa_agent_t agent, const void *src_memory, - size_t src_row_pitch, size_t src_slice_pitch, - hsa_ext_image_t dst_image, - const hsa_ext_image_region_t *image_region); - -/** - * @brief Export the image data to linearly organized memory. - * - * @details The operation updates the destination memory with the image data of - * @p src_image. The size of the data exported to memory is implicitly derived - * from the image region. - * - * If @p dst_row_pitch is smaller than the source region width (in bytes), then - * @p dst_row_pitch = region width. - * - * If @p dst_slice_pitch is smaller than the source region width * region height - * (in bytes), then @p dst_slice_pitch = region width * region height. - * - * It is the application's responsibility to avoid out of bounds memory access. - * - * None of the destination memory or image data memory in the previously created - * ::hsa_ext_image_create image handle can overlap. Overlapping of any of - * the source and destination memory within the export operation produces - * undefined results. - * - * @param[in] agent Agent associated with the image. - * - * @param[in] src_image Source image. - * - * @param[in] dst_memory Destination memory. Must not be NULL. - * - * @param[in] dst_row_pitch Number of bytes in one row of the destination - * memory. - * - * @param[in] dst_slice_pitch Number of bytes in one slice of the destination - * memory. - * - * @param[in] image_region Pointer to the image region to be exported. Must not - * be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p dst_memory is NULL, or @p - * image_region is NULL. - */ -hsa_status_t HSA_API - hsa_ext_image_export(hsa_agent_t agent, hsa_ext_image_t src_image, - void *dst_memory, size_t dst_row_pitch, - size_t dst_slice_pitch, - const hsa_ext_image_region_t *image_region); - -/** - * @brief Clear an image to the specified value. - * - * @details Clearing an image does not perform any format conversion and the - * provided clear data is directly stored regardless of the image format. The - * lowest bits of the data (number of bits depending on the image component - * type) stored in the cleared image are based on the image component order. - * - * The number of elements in @p data should match the number of access - * components for the channel order of @p image, as determined by the HSA - * Programmer's Reference Manual. A single element is required for - * HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH and - * HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL, while any other channel order - * requires 4 elements. - * - * Each element in @p data is a 32-bit value. The type of each element - * should match the access type associated with the channel type of @p image, - * as determined by the HSA Programmer's Reference Manual: - * - HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8, - * HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16, and - * HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 map to int32_t. - * - HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, - * HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, and - * HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 map to uint32_t. - * - Any other channel type maps to a 32-bit float. - * - * @param[in] agent Agent associated with the image. - * - * @param[in] image Image to be cleared. - * - * @param[in] data Clear value array. Specifying a clear value outside of the - * range that can be represented by an image format results in undefined - * behavior. Must not be NULL. - * - * @param[in] image_region Pointer to the image region to clear. Must not be - * NULL. If the region references an out-out-bounds element, the behavior is - * undefined. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p data is NULL, or @p - * image_region is NULL. - */ -hsa_status_t HSA_API - hsa_ext_image_clear(hsa_agent_t agent, hsa_ext_image_t image, - const void *data, - const hsa_ext_image_region_t *image_region); - -/** - * @brief Sampler handle. Samplers are populated by - * ::hsa_ext_sampler_create. Sampler handles are only unique within an - * agent, not across agents. - */ -typedef struct hsa_ext_sampler_s { - /** - * Opaque handle. - */ - uint64_t handle; -} hsa_ext_sampler_t; - -/** - * @brief Sampler address modes. The sampler address mode describes the - * processing of out-of-range image coordinates. The values match the BRIG - * type BrigSamplerAddressing. - */ -typedef enum { - /** - * Out-of-range coordinates are not handled. - */ - HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED = 0, - - /** - * Clamp out-of-range coordinates to the image edge. - */ - HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = 1, - - /** - * Clamp out-of-range coordinates to the image border. - */ - HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER = 2, - - /** - * Wrap out-of-range coordinates back into the valid coordinate range. - */ - HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT = 3, - - /** - * Mirror out-of-range coordinates back into the valid coordinate range. - */ - HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = 4 - -} hsa_ext_sampler_addressing_mode_t; - -/** - * @brief Sampler coordinate modes. The enumeration values match the BRIG - * BRIG_SAMPLER_COORD bit in BrigSamplerModifier. - */ -typedef enum { - /** - * Coordinates are all in the range of 0 to (dimension-1). - */ - HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED = 0, - - /** - * Coordinates are all in the range of 0.0 to 1.0. - */ - HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED = 1 - -} hsa_ext_sampler_coordinate_mode_t; - -/** - * @brief Sampler filter modes. The enumeration values match the BRIG type - * BrigSamplerFilter. - */ -typedef enum { - /** - * Filter to the image element nearest (in Manhattan distance) to the - * specified coordinate. - */ - HSA_EXT_SAMPLER_FILTER_MODE_NEAREST = 0, - - /** - * Filter to the image element calculated by combining the elements in a 2x2 - * square block or 2x2x2 cube block around the specified coordinate. The - * elements are combined using linear interpolation. - */ - HSA_EXT_SAMPLER_FILTER_MODE_LINEAR = 1 - -} hsa_ext_sampler_filter_mode_t; - -/** - * @brief Implementation-independent sampler descriptor. - */ -typedef struct hsa_ext_sampler_descriptor_s { - /** - * Sampler coordinate mode describes the normalization of image coordinates. - */ - hsa_ext_sampler_coordinate_mode_t coordinate_mode; - - /** - * Sampler filter type describes the type of sampling performed. - */ - hsa_ext_sampler_filter_mode_t filter_mode; - - /** - * Sampler address mode describes the processing of out-of-range image - * coordinates. - */ - hsa_ext_sampler_addressing_mode_t address_mode; - -} hsa_ext_sampler_descriptor_t; - -/** - * @brief Create a kernel agent defined sampler handle for a given combination - * of a (agent-independent) sampler descriptor and agent. - * - * @param[in] agent Agent to be associated with the sampler. - * - * @param[in] sampler_descriptor Pointer to a sampler descriptor. Must not be - * NULL. - * - * @param[out] sampler Memory location where the HSA runtime stores the newly - * created sampler handle. Must not be NULL. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - * - * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The agent cannot create the - * specified handle because it is out of resources. - * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p sampler_descriptor is NULL, or - * @p sampler is NULL. - */ -hsa_status_t HSA_API hsa_ext_sampler_create( - hsa_agent_t agent, const hsa_ext_sampler_descriptor_t *sampler_descriptor, - hsa_ext_sampler_t *sampler); - -/** - * @brief Destroy a sampler previously created using ::hsa_ext_sampler_create. - * - * @param[in] agent Agent associated with the sampler. - * - * @param[in] sampler Sampler. The sampler handle should not be destroyed while - * there are references to it queued for execution or currently being used in a - * dispatch. - * - * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * - * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been - * initialized. - * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. - */ -hsa_status_t HSA_API - hsa_ext_sampler_destroy(hsa_agent_t agent, hsa_ext_sampler_t sampler); - -/** - * @brief Enumeration constants added to ::hsa_status_t by this extension. - */ -enum { - /** - * Image format is not supported. - */ - HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED = 0x3000, - /** - * Image size is not supported. - */ - HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED = 0x3001 -}; - -/** - * @brief Enumeration constants added to ::hsa_agent_info_t by this - * extension. The value of any of these attributes is undefined if the - * agent is not a kernel agent, or the implementation does not support images. - */ -enum { - /** - * Maximum number of elements in 1D images. Must be at most 16384. The type - * of this attribute is uint32_t. - */ - HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS = 0x3000, - /** - * Maximum number of elements in 1DA images. Must be at most 16384. The type - * of this attribute is uint32_t. - */ - HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS = 0x3001, - /** - * Maximum number of elements in 1DB images. Must be at most 65536. The type - * of this attribute is uint32_t. - */ - HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS = 0x3002, - /** - * Maximum dimensions (width, height) of 2D images, in image elements. The X - * and Y maximums must be at most 16384. The type of this attribute is - * uint32_t[2]. - */ - HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS = 0x3003, - /** - * Maximum dimensions (width, height) of 2DA images, in image elements. The X - * and Y maximums must be at most 16384. The type of this attribute is - * uint32_t[2]. - */ - HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS = 0x3004, - /** - * Maximum dimensions (width, height) of 2DDEPTH images, in image - * elements. The X and Y maximums must be at most 16384. The type of this - * attribute is uint32_t[2]. - */ - HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS = 0x3005, - /** - * Maximum dimensions (width, height) of 2DADEPTH images, in image - * elements. The X and Y maximums must be at most 16384. The type of this - * attribute is uint32_t[2]. - */ - HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS = 0x3006, - /** - * Maximum dimensions (width, height, depth) of 3D images, in image - * elements. The maximum along any dimension cannot exceed 2048. The type of - * this attribute is uint32_t[3]. - */ - HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS = 0x3007, - /** - * Maximum number of image layers in a image array. Must not exceed 2048. The - * type of this attribute is uint32_t. - */ - HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS = 0x3008, - /** - * Maximum number of read-only image handles that can be created at any one - * time. Must be at least 128. The type of this attribute is uint32_t. - */ - HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES = 0x3009, - /** - * Maximum number of write-only and read-write image handles (combined) that - * can be created at any one time. Must be at least 64. The type of this - * attribute is uint32_t. - */ - HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES = 0x300A, - /** - * Maximum number of sampler handlers that can be created at any one - * time. Must be at least 16. The type of this attribute is uint32_t. - */ - HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS = 0x300B -}; - -/** @} */ - -#define hsa_ext_images_1_00 - -typedef struct hsa_ext_images_1_00_pfn_s { - hsa_status_t (*hsa_ext_image_get_capability)( - hsa_agent_t agent, hsa_ext_image_geometry_t geometry, - const hsa_ext_image_format_t *image_format, uint32_t *capability_mask); - - hsa_status_t (*hsa_ext_image_data_get_info)( - hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor, - hsa_access_permission_t access_permission, - hsa_ext_image_data_info_t *image_data_info); - - hsa_status_t (*hsa_ext_image_create)( - hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor, - const void *image_data, hsa_access_permission_t access_permission, - hsa_ext_image_t *image); - - hsa_status_t (*hsa_ext_image_destroy)(hsa_agent_t agent, - hsa_ext_image_t image); - - hsa_status_t (*hsa_ext_image_copy)(hsa_agent_t agent, - hsa_ext_image_t src_image, - const hsa_dim3_t *src_offset, - hsa_ext_image_t dst_image, - const hsa_dim3_t *dst_offset, - const hsa_dim3_t *range); - - hsa_status_t (*hsa_ext_image_import)( - hsa_agent_t agent, const void *src_memory, size_t src_row_pitch, - size_t src_slice_pitch, hsa_ext_image_t dst_image, - const hsa_ext_image_region_t *image_region); - - hsa_status_t (*hsa_ext_image_export)( - hsa_agent_t agent, hsa_ext_image_t src_image, void *dst_memory, - size_t dst_row_pitch, size_t dst_slice_pitch, - const hsa_ext_image_region_t *image_region); - - hsa_status_t (*hsa_ext_image_clear)( - hsa_agent_t agent, hsa_ext_image_t image, const void *data, - const hsa_ext_image_region_t *image_region); - - hsa_status_t (*hsa_ext_sampler_create)( - hsa_agent_t agent, const hsa_ext_sampler_descriptor_t *sampler_descriptor, - hsa_ext_sampler_t *sampler); - - hsa_status_t (*hsa_ext_sampler_destroy)(hsa_agent_t agent, - hsa_ext_sampler_t sampler); - -} hsa_ext_images_1_00_pfn_t; - -#ifdef __cplusplus -} // end extern "C" block -#endif /*__cplusplus*/ - -#endif diff --git a/runtime/hsa-runtime/utils/sp3/sp3-asic.h b/runtime/hsa-runtime/utils/sp3/sp3-asic.h deleted file mode 100644 index 5696ba53c4..0000000000 --- a/runtime/hsa-runtime/utils/sp3/sp3-asic.h +++ /dev/null @@ -1,181 +0,0 @@ -//===================================================================== -// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. -// -/// \author AMD Developer Tools Team -/// \file -/// -//===================================================================== - -#ifndef SP3_ASIC_H -#define SP3_ASIC_H - - -#include "sp3-int.h" -#include "sp3-vm.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -// ASIC types - - -enum asic_backend { - ASIC_BACKEND_SI, - ASIC_BACKEND_CI, - ASIC_BACKEND_GFX8, - ASIC_BACKEND_GFX81, - ASIC_MAX_BACKEND, // Must be the last entry -}; - - -enum asic_cap_id { - ASIC_THREAD_SIZE = 1, - ASIC_FED_INSTRUCTIONS = 2, - ASIC_LEGACY_LOG = 3, - ASIC_LARGE_DS_READ = 4, - ASIC_32BANK_LDS = 5, -}; - - -struct asic_info { - const char *name; - enum asic_backend backend; // which backend to use - int asic_thread_size; // number of threads in a wave - int asic_fed_instructions; // FED instructions are available - int asic_legacy_log; // Legacy EXP and LOG opcodes are available - int asic_large_ds_read; // Large DS read opcodes (96b and 128b) are available - int asic_32bank_lds; // Full 32 bank lds P1LL_F16 INTERP instruction available -}; - - -struct sp3_asic_state { - struct sp3_asic_aluop { - int pos; // original position in code - int op, na, nc; // na = number of args, nc = number of consts in args - int lds, offset; // lds = is an LDS_IDX_OP subop, offset = LDS offset - unsigned dst; - unsigned arg[3]; - unsigned lit[3]; // float literals are no longer float at this point - unsigned flags; - int scalar; - } bundle [5]; - unsigned lds_lit[2], lds_mask[2]; - int nbundle; - int reorder; - int last_reorder, last_po[5]; - int nscalar; // number of nominally-scalar opcodes in bundle - int barrier_after; // require barrier after this clause - - // sp3-r6xx - int asic; - struct da_reloc { - unsigned addr, ref; - struct da_reloc *next; - } *da_relocs; - struct cf_reloc **instrels; - struct cf_reloc *labels; - int sinstrels; - int slabels; - char unk_name[16]; -}; -#define A S->ap - - -extern struct asic_info asics[]; -#define ASICNAME asics[A->asic].name -#define ASIC asics[A->asic] -void set_asic(Sp, int asic); -int find_asic(const char *name); - - -// opcode tables - -void sp3_unbuild_tables(void); -void sp3_si_unbuild_tables(void); -void sp3_ci_unbuild_tables(void); -void sp3_gfx8_unbuild_tables(void); - -void sp3_build_tables(void); -void sp3_si_build_tables(void); -void sp3_ci_build_tables(void); -void sp3_gfx8_build_tables(void); - - - - -// helper functions - - -#define FMT_FMT 0x00000000 -#define FMT_COMP 0x00010000 -#define FMT_ENDIAN 0x00020000 -#define FMT_NUM 0x00030000 -#define FMT_SRF 0x00040000 -#define FMT_MASK 0xFFFF0000 -#define FMT_IMASK 0x0000FFFF - -void mark_sgpr(Sp, unsigned); -void mark_vgpr(Sp, unsigned); -void mark_global(Sp, unsigned); -void mark_ctemp(Sp, unsigned); -int is_mod_bool(Sp, pnode *, const char *); -int get_mod_bool(Sp, pnode *, const char *); -int get_mod_int(Sp, pnode *, int, int); -int get_mod_int32(Sp, pnode *); -int par_cmask(Sp, pnode *); -unsigned reg_csel(Sp, unsigned , int); -unsigned reg_msel(Sp, unsigned *, int); - -const char *spec_sel_to_name(Sp, int sel); -const char *sp3_fmt_to_name(Sp, int cls, int val); -const char *sp3_si_fmt_to_name(Sp, int cls, int val); -const char *sp3_ci_fmt_to_name(Sp, int cls, int val); -const char *sp3_gfx8_fmt_to_name(Sp, int cls, int val); - -void add_reloc_label(Sp, int li, int blame); -void add_reloc_inst(Sp, int ii, int blame); -void add_reloc_cf(Sp, int offs); - -int grouping_for_group_size(Sp, int group_size); - -//JENNICA - this block of name_tree will go away, replace -//with backend specific. - -enum nametree_enum { - NAMETREE_OPCODES, - NAMETREE_OPCODES_0ARG, - NAMETREE_OPCODES_CALL, - NAMETREE_VTX_FMTS, - NAMETREE_SPEC_SELS, - NAMETREE_SPEC_VEC_SELS, - NAMETREE_SGPR_NAME_SELS, - NAMETREE_CONSTS, - NAMETREE_DEPRECATED, -}; - -struct name_tree **get_name_tree(struct sp3_state *S, enum nametree_enum whichtree); - -extern struct name_tree *opcodes_0arg; -extern struct name_tree *opcodes_call; -extern struct name_tree *vtx_fmts; -extern struct name_tree *spec_sels; -extern struct name_tree *spec_vec_sels; -extern struct name_tree *sgpr_name_sels; -extern struct name_tree *consts; -extern struct name_tree *deprecated; - -extern struct name_tree *asic_names; -struct asic_caps{const char *name; int id;}; -extern struct asic_caps asiccaps[]; -extern struct name_tree *asic_caps; //JENNICA - this may need to go away. - -void update_sgpr_names(Sp); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/runtime/hsa-runtime/utils/sp3/sp3-int.h b/runtime/hsa-runtime/utils/sp3/sp3-int.h deleted file mode 100644 index a66550b1b7..0000000000 --- a/runtime/hsa-runtime/utils/sp3/sp3-int.h +++ /dev/null @@ -1,553 +0,0 @@ -//===================================================================== -// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. -// -/// \author AMD Developer Tools Team -/// \file -/// -//===================================================================== - -#ifndef SP3_INT_H -#define SP3_INT_H - -#include "sp3.h" - - -#ifdef _MSC_VER -#ifndef strdup -#define strdup _strdup -#endif -#ifndef stricmp -#define stricmp _stricmp -#endif -#ifndef strcasecmp -#define strcasecmp _stricmp -#endif -#pragma warning(disable:4090 4204 4245 4296 4389 4701 4702) -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -struct sp3_state; -#define Sp struct sp3_state *S - -// clause types - -#define CT_NONE 0 -#define CT_SHADER 1 - -// parse tree - -#define P_NUM 0 // integer -#define P_FLT 1 // float -#define P_STR 2 // string -#define P_REG 3 // register component(s) -#define P_RANGE 4 // closed range -#define P_RANGEL 5 // right-open range -#define P_SLICE 6 // array concatenation (used for slices) -#define P_RCAST 7 // integer -> register cast -#define P_LIST 8 // list (internal to the parser only) -#define P_VAR 9 // variable (with name) -#define P_VARE 10 // variable-element (result of lvalue slice) -#define P_CL 11 // clause -#define P_CLI 12 // clause instructions -#define P_WHILE 13 // while loop -#define P_REPEAT 14 // repeat-until loop -#define P_IF 15 // if or if-else -#define P_CFOR 16 // C-style for loop -#define P_FOR 17 // vector for loop -#define P_RET 18 // return from function -#define P_CSLICE 19 // componentwise slice -#define P_UREF 20 // unresolved reference -#define P_FREF 21 // resolved reference -#define P_CALL 22 // function call -#define P_PRINT 23 // print to stdout -#define P_PAR 24 // function parameters -#define P_NF 25 // native function -#define P_OMOD 27 // opcode modifier -#define P_OMODS 28 // opcode modifiers -#define P_OPARS 29 // opcode parameters -#define P_OP 30 // opcode -#define P_SWIZ0 31 // register swizzles with N components wrapped -#define P_SWIZ1 32 // -"- -#define P_SWIZ2 33 // -"- -#define P_SWIZ3 34 // -"- -#define P_SWIZ4 35 // -"- -#define P_VTXFMT 36 // vertex formats -#define P_LABEL 37 // unique identifier of a label -#define P_LINIT 38 // generate label identifiers -#define P_MARK 39 // mark a label -#define P_OPCALL 40 // opcode that does a clause instantiation on par0 -#define P_ASIC 41 // ASIC model -#define P_ASICCAP 42 // ASIC capability -#define P_NCLOS 43 // create closure -#define P_CLOS 44 // closure -#define P_SH 45 // compiled shader - -#define P_NOT 0x100 -#define P_BNOT 0x101 -#define P_NEG 0x102 -#define P_MUL 0x103 -#define P_DIV 0x104 -#define P_MOD 0x105 -#define P_ADD 0x106 -#define P_SUB 0x107 -#define P_SHL 0x108 -#define P_SHR 0x109 -#define P_SAR 0x10A -#define P_LT 0x10B -#define P_GT 0x10C -#define P_LEQ 0x10D -#define P_GEQ 0x10E -#define P_EQ 0x10F -#define P_NEQ 0x110 -#define P_BAND 0x111 -#define P_BOR 0x112 -#define P_BXOR 0x113 -#define P_AND 0x114 -#define P_OR 0x115 -#define P_XOR 0x116 -#define P_SEL 0x117 -#define P_XDEC 0x118 -#define P_XINC 0x119 -#define P_DECX 0x11A -#define P_INCX 0x11B -#define P_ASGN 0x11C -#define P_IND 0x11D -#define P_NOP 0x11E -#define P_VSUM 0x11F -#define P_VPROD 0x120 -#define P_VBOR 0x121 -#define P_VBAND 0x122 -#define P_VBXOR 0x123 -#define P_VOR 0x124 -#define P_VAND 0x125 -#define P_VXOR 0x126 -#define P_VMIN 0x127 -#define P_VMAX 0x128 -#define P_CADD 0x129 -#define P_CSUB 0x12A -#define P_CMUL 0x12B -#define P_CDIV 0x12C -#define P_CSHL 0x12D -#define P_CSHR 0x12E -#define P_CSAR 0x12F -#define P_CBAND 0x130 -#define P_CBOR 0x131 -#define P_CBXOR 0x132 -#define P_CAND 0x133 -#define P_COR 0x134 -#define P_CXOR 0x135 -#define P_CMIN 0x136 -#define P_CMAX 0x137 -#define P_MIN 0x138 -#define P_MAX 0x139 -#define P_PROBE 0x13A -#define P_BITS 0x13B - -// register types -#define R_VGPR 0x00000 -#define R_OFF 0x04000 -#define R_SNAME 0x06000 -#define R_INTERP 0x08000 -#define R_SPEC 0x0A000 -#define R_SGPR 0x0C000 -#define R_EXPBUF 0x0E000 -#define R_TMASK 0x1E000 - -// magic values for R_SPEC -#define R_P_CL 3 // used internally only (inline literal) -#define R_P_CI_L 0xDB // used internally only -#define R_P_LDX_L 0xDB // any LDS inline -#define R_P_LDS_L 0xDF // direct LDS inline -#define R_P_LDS_H 0xE0 -#define R_P_LDX_H 0xE0 -#define R_P_CI_S 0xF3 // end of new R8xx constants -#define R_P_CI_H 0xFC -#define R_P_NOTLAST 0xFF// notlast operand for export - -// magic values for R_SNAME -#define R_S_SCRATCH 1 -#define R_S_PSVS_STATE 2 -#define R_S_SO_WRITE_INDEX 3 -#define R_S_SO_BASE_OFFSET0 4 -#define R_S_SO_BASE_OFFSET1 5 -#define R_S_SO_BASE_OFFSET2 6 -#define R_S_SO_BASE_OFFSET3 7 -#define R_S_OFFCHIP_LDS 8 -#define R_S_IS_OFFCHIP 9 -#define R_S_RING_OFFSET 10 -#define R_S_GS_WAVE_ID 11 -#define R_S_TG_SIZE 12 -#define R_S_TF_BASE 13 -#define R_S_TGID_X 14 -#define R_S_TGID_Y 15 -#define R_S_TGID_Z 16 -#define R_S_WAVE_CNT 17 -#define R_S_GLOBAL_WAVE_ID 18 - -// register components -#define R_CMASK 0x1C00 -#define R_CSHIFT 10 -#define R_CX 0x0000 -#define R_CY 0x0400 -#define R_CZ 0x0800 -#define R_CW 0x0C00 -#define R_CS 0x1000 // used to identify scalar elements -#define R_CN 0x1800 - -#define R_IMASK 0x03FF - -// source transforms -#define R_NEG 0x80000 -#define R_ABS 0x100000 -#define R_SEXT 0x200000 - -// subencodings for export targets - -#define R_E_TMASK 0x0380 -#define R_E_MRT 0x0000 -#define R_E_Z 0x0080 -#define R_E_POS 0x0100 -#define R_E_PARAM 0x0180 -#define R_E_ATTR 0x0280 -#define R_E_NULL 0x0300 - -#define R_E_IMASK 0x007F - -// subencodings for interp - -#define R_I_TMASK 0x0380 -#define R_I_P10 0x0000 -#define R_I_P20 0x0080 -#define R_I_P0 0x0100 - -// function parameters -#define F_CANY 0x00000000 -#define F_CNUM 0x01000000 -#define F_CREG 0x02000000 -#define F_CTMP 0x03000000 -#define F_CFPTR 0x04000000 -#define F_CINT 0x05000000 -#define F_CMASK 0x07000000 -#define F_OPT 0x40000000 -#define F_VEC 0x80000000 - -typedef struct pnode { - struct pnode *gc_next; - int gc_mark; - int type; - int et; // error reporting tag - int ni; // number of items - union pnode_item { - int num; // integer - float flt; // float - char *str; // string - struct pnode *ptr; // tree item - struct { - struct pnode *v; - int e; - } ve; // variable-element pair - struct { - int p; - char *n; - } var; // variable (stack offset, name) - struct sp3_shader *sh; - unsigned int reg; // register components - struct pnode *(* nf)(Sp, struct pnode **); // native function - } i[1]; -} pnode; - -pnode *p_str(Sp, char *s); // wrap a string -pnode *p_float(Sp, float f); // wrap a float -pnode *p_num(Sp, int i); // wrap an integer -pnode *p_vec(Sp, int type, int len); // create a vector -pnode *p_list(Sp, pnode *list, pnode *item); // append item to P_LIST -pnode *p_list_rev(Sp, pnode *list); // reverse the order of the list -pnode *p_tree(Sp, int type, int nitems, ...); // create a tree node -pnode *p_l2t(Sp, int type, pnode *list); // list to tree -pnode *p_l2v(Sp, int type, pnode *list); // list to vector -pnode *p_x2x(Sp, int type, pnode *p); // cast to type -pnode *p_clause(Sp, int vstk, int lstk, pnode *parlist, pnode *instlist, int type); -pnode *p_reg(Sp, int type, int idx); // wrap a register -pnode *p_swizzle(Sp, char *str); // parse a swizzle string -pnode *p_lv2rv(Sp, pnode *lval); // lvalue to rvalue -pnode *p_newlabel(Sp, pnode *t, int tag); // define new label -pnode *p_label(Sp, int cnt); // fill with label IDs -pnode *p_clone(Sp, pnode *src); - -void print_node(pnode *); // print to stdout - -void mark_gc_storage(Sp); // mark all internal storage of sp3 for gc - -// functions provided by machine driver -int is_opcode(struct sp3_state *S, const char *name); // is an opcode (any) -int is_opcode_0arg(struct sp3_state *S, const char *name); // is an opcode (0-argument) -int is_opcode_call(struct sp3_state *S, const char *name); // is a call op (1st argument is a closure) -void sp3_gen_opcode(Sp, const char *op, pnode *par, pnode *mod); -void sp3_si_gen_opcode(Sp, const char *op, pnode *par, pnode *mod); -void sp3_ci_gen_opcode(Sp, const char *op, pnode *par, pnode *mod); -void sp3_gfx8_gen_opcode(Sp, const char *op, pnode *par, pnode *mod); -pnode *machine_const(Sp, char *name); // if a machine const, parse it (else NULL) -void mark_label(Sp, int li); // "label:" -pnode *asic_getcap(Sp, int id); // get ASIC capability #id -void mach_cleanup(Sp); // initialize generator state - -// name trees - -#define NT_SEARCH 0 -#define NT_ADD 1 -#define NT_ADD_ONLY 2 -#define NT_ADD_STRDUP 4 -struct name_tree { - const char *name; - int tag; - int add; - struct name_tree *l, *r; -}; - -struct name_tree *name_tree_operation(struct name_tree **t, const char *name, int tag, int add); -void name_tree_delete(struct name_tree **t); - -// symbol table - -void f_decl(Sp, char *, pnode *); -pnode *f_ref(Sp, char *); -void f_check(Sp); -pnode *f_call(Sp, const char *); - -void f_decl_native(Sp, int, char *, pnode *(*)(Sp, pnode **), int, ...); - -// parse-time variable stack - -void vs_decl(Sp, const char *, int tag); -int vs_lookup(Sp, const char *, pnode **, int); -char *vs_getname(pnode *); - -void vs_enter_func(Sp); -int vs_leave_func(Sp, int *); // returns number of stack allocations & - // (through param) number of lstack allocs -void vs_enter_block(Sp); -void vs_leave_block(Sp); - -int vs_get_topmax(Sp); // returns number of stack allocation for top level - -// runtime variable stack - -void rv_set(Sp, pnode *, pnode *); -pnode *rv_get(Sp, pnode *); -void rv_alloc(Sp, int); -void rv_setpar(Sp, int, pnode *); -int rv_enter(Sp, int); -void rv_leave(Sp, int); - -int rl_enter(Sp, int); -void rl_leave(Sp, int); - -void rv_leave_native(Sp); -pnode **rv_getpar_native(Sp); - -// all-in-one variable setter - -void rv_set_by_name(Sp, const char *, pnode *); - -// growable binary buffer - -typedef struct grow_buf { - int n, size; - unsigned i[1]; -} grow_buf; - -grow_buf *gb_alloc(int); -grow_buf *gb_append(grow_buf *, int, unsigned *); -grow_buf *gb_add(grow_buf *, unsigned); -grow_buf *gb_reg(grow_buf *, unsigned, unsigned); - -// clause contents - -struct clause_info { - unsigned base; - grow_buf *data; - int type; -}; - -void start_clause(Sp, int); -void cb_emit(Sp, unsigned *, int); -int cb_ptr(Sp); -void cb_patch(Sp, int, int, unsigned); - -int remap_clauses(Sp); - -struct sp3_shader *gen_output(Sp); -void convert_relocs(Sp); -void perform_relocs(Sp); - -pnode *shader_clos(Sp, pnode *); // call this to get a binary shader from closure -pnode *shader_name(Sp, const char *); // call this to get a binary shader from name - -void set_const(Sp, int idx, unsigned val); -int find_const(Sp, unsigned val); - -void set_kbuf(Sp, int kbuf, int idx, unsigned val); - -const char *asic_name(Sp); -int asic_id(Sp); -int asic_capbyname(int, const char *); -int asic_capbyid(int, int); - -// register stream packer -int sp3_guess_shader_type(struct sp3_state *S, struct sp3_shader *sh); -int sp3_si_guess_shader_type(struct sp3_shader *sh); -int sp3_ci_guess_shader_type(struct sp3_shader *sh); -int sp3_gfx8_guess_shader_type(struct sp3_shader *sh); -void sp3_pack_reg_stream(Sp, int type, struct sp3_shader *sh); -void sp3_si_pack_reg_stream(Sp, int type, struct sp3_shader *sh); -void sp3_ci_pack_reg_stream(Sp, int type, struct sp3_shader *sh); -void sp3_gfx8_pack_reg_stream(Sp, int type, struct sp3_shader *sh); -void unpack_reg_stream(Sp, struct sp3_shader *sh); - -// instances - -int new_instance(Sp, pnode *, int); -void eval_instances(Sp); -int get_instance_clause(Sp, int); -int get_instance_type(Sp, int); - -// error reporting - -void et_parse_mode(Sp, int); -int et_get_id(Sp); -#ifdef _MSC_VER -__declspec(noreturn) -#endif -void et_error(Sp, char *, char *, ...) -#ifdef __GNUC__ -__attribute__ ((__noreturn__)) -__attribute__ ((format(printf, 3, 4))) -#endif -; -void et_warning(Sp, char *, char *, ...) -#ifdef __GNUC__ -__attribute__ ((format(printf, 3, 4))) -#endif -; -void et_blame(Sp, pnode *); -void et_blame_et(Sp, int); -void et_print(Sp, pnode *); -int et_get_blame(Sp); - -// text buffer for disasm -void bprintf(Sp, char *, ...) -#ifdef __GNUC__ -__attribute__ ((format(printf, 2, 3))) -#endif -; -void bcmt(Sp, const char *cmt, const char *start, const char *line, const char *end); -void btab(Sp, int); -char *bget(Sp); - -// state structure -struct sp3_state { - // flex - void *scanner; - void *yystate; - - char *yyfile; - int yyline; - - // sp3-gc - struct sp3_gc_state *gc; - - // asic private - struct sp3_asic_state *ap; - - // sp3-eval - int retflag; - pnode *retval; - - // sp3-int - struct sp3_shader config; - - int clause_id; // counts up during evaluation - int clause_type; - struct clause_info *clauses; - int nclauses, sclauses; - - int memsize, ctsizes[4]; - int in_shader; - - char *disasm_text; - int disasm_column; - int disasm_len, disasm_maxlen; - - sp3_vma *comment_map; - void *comment_ctx; - sp3_comment_cb comment_top, comment_right; - - unsigned const_buf[1024]; - int const_vld[1024], const_vld_range; - - unsigned *kval[16]; - int knum[16]; - - struct et_record { - const char *file; - int line; - } *et_names; - int et_node; - int et_parsing; - int net_names, set_names; - - char *fname_last; - struct name_tree *fnames; - struct fsym { - char *name; - pnode *func; - struct fref *refs; - struct fsym *l, *r; - } *fsymbols; - int func_id; // counts up during parsing - - struct instance { - int type; - int clause_id; - pnode *call; - } *instances; - int ninstances, sinstances; - - struct vstack { - char *name; - int tag; - int vs_sp, vs_level; - struct vstack *next; - } *var_stack, *lbl_stack; - int vs_max, vs_sp, vs_top, vs_topmax; - int ls_max, ls_sp; - - pnode **rl_stack; - int rl_sp, rl_ss, rl_base, rl_id, rl_size; - - pnode **rv_stack; - int rv_sp, rv_ss, rv_base, rv_size; - - int werror, wcount; - const char *err_hdr; - - unsigned entry_point_table_size; - unsigned entry_point_table_alloc_size; - sp3_vmaddr *entry_point_table; -}; -struct sp3_state *sp3_new_state(void); -void sp3_asic_attach_state(Sp); -void sp3_new_parser(Sp); -void sp3_free_parser(Sp); -void sp3_free_state(Sp); - -void reg_natives(Sp); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/runtime/hsa-runtime/utils/sp3/sp3-type.h b/runtime/hsa-runtime/utils/sp3/sp3-type.h deleted file mode 100644 index 160dc945ed..0000000000 --- a/runtime/hsa-runtime/utils/sp3/sp3-type.h +++ /dev/null @@ -1,137 +0,0 @@ -//===================================================================== -// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. -// -/// \author AMD Developer Tools Team -/// \file -/// -//===================================================================== - -#ifndef SP3_TYPE_H -#define SP3_TYPE_H - -#ifdef __cplusplus -extern "C" { -#endif - -/// @file sp3-type.h -/// @brief sp3 types - -enum sp3_shtype { - SP3_SHTYPE_NONE = -1, - SP3_SHTYPE_PS = 0, - SP3_SHTYPE_VS = 1, - SP3_SHTYPE_GS = 2, - SP3_SHTYPE_ES = 3, - SP3_SHTYPE_HS = 4, - SP3_SHTYPE_LS = 5, - SP3_SHTYPE_CS = 6, -}; - -enum sp3_count { - SP3_NUM_MRT = 8, - SP3_NUM_STRM = 4, -}; - -enum sp3_flag { - SP3DIS_NO_STATE = 0x01, - SP3DIS_NO_BINARY = 0x02, - SP3DIS_COMMENTS = 0x04, - SP3DIS_NO_GPR_COUNT = 0x08, - SP3DIS_FORCEVALID = 0x10, - SP3DIS_NO_ASIC = 0x20, -}; - -/// @brief Shader context. Contains no user-visible fields. -struct sp3_context; - -/// @brief Storage entry for register streams. -struct sp3_reg { - unsigned index; ///< One of the mm* values from chip_enum.h. - unsigned value; -}; - -/// @brief Wrapped shader metadata. -/// -/// After generation, shaders are encapsulated in sp3_shader structures. -/// -/// Those structures contain the shader binary, its register stream, -/// constants and constant buffers and metadata needed for SC compatibility. -struct sp3_shader { - int type; ///< One of the SHTYPE_* constants. - int asic_int; ///< Internal ASIC index. Do not use. - const char *asic; ///< ASIC name as a string ("RV870" etc). - unsigned size; ///< Size of the compiled shader, in 32-bit words. - unsigned nsgprs; ///< Number of scalar GPRs used. - unsigned nvgprs; ///< Number of vector GPRs used. - unsigned trap_present; - unsigned user_sgpr_count; - unsigned scratch_en; - unsigned dispatch_draw_en; - unsigned so_en; - unsigned so_base0_en; - unsigned so_base1_en; - unsigned so_base2_en; - unsigned so_base3_en; - unsigned oc_lds_en; - unsigned tg_size_en; - unsigned tidig_comp_cnt; ///< Number of components(-1) enabled for thread id in group - unsigned tgid_x_en; - unsigned tgid_y_en; - unsigned tgid_z_en; - unsigned wave_cnt_en; - unsigned sgpr_scratch; - unsigned sgpr_psvs_state; - unsigned sgpr_so_write_index; - unsigned sgpr_so_base_offset0; - unsigned sgpr_so_base_offset1; - unsigned sgpr_so_base_offset2; - unsigned sgpr_so_base_offset3; - unsigned sgpr_offchip_lds; - unsigned sgpr_is_offchip; - unsigned sgpr_ring_offset; - unsigned sgpr_gs_wave_id; - unsigned sgpr_global_wave_id; - unsigned sgpr_tg_size; - unsigned sgpr_tgid_x; - unsigned sgpr_tgid_y; - unsigned sgpr_tgid_z; - unsigned sgpr_tf_base; - unsigned sgpr_wave_cnt; - unsigned pc_exports; ///< Range of parameters exported (if VS). - unsigned pos_export; ///< Shader executes a position export (if VS). - unsigned cb_exports; ///< Range of MRTs exported (if PS). - unsigned mrtz_export_format; ///< Export format of the mrtz export. - unsigned z_export; ///< Shader executes a Z export (if PS). - unsigned pops_en; ///< Shader is POPS (PS) - unsigned load_collision_waveid; ///< Shader sets load collision waveid (if PS). - unsigned stencil_test_export; ///< Shader exports stencil (if PS). - unsigned stencil_op_export; ///< Shader exports stencil (if PS). - unsigned kill_used; ///< Shader executes ALU KILL operations. - unsigned cb_masks[SP3_NUM_MRT]; ///< Component masks for each MRT exported (if PS). - unsigned emit_used; ///< EMIT opcodes used (if GS). - unsigned covmask_export; ///< Shader exports coverage mask (if PS). - unsigned mask_export; ///< Shader exports mask (if PS). - unsigned strm_used[SP3_NUM_STRM]; ///< Streamout operations used (map). - unsigned scratch_used; ///< Scratch SMX exports used. - unsigned scratch_itemsize; ///< Scratch ring item size. - unsigned reduction_used; ///< Reduction SMX exports used. - unsigned ring_used; ///< ESGS/GSVS ring SMX exports used. - unsigned ring_itemsize; ///< ESGS/GSVS ring item size (for ES/GS respectively). - unsigned vertex_size[4]; ///< GSVS ring vertex size (for GS). - unsigned mem_used; ///< Raw memory SMX exports used. - unsigned rats_used; ///< Mask of RATs (UAVs) used - unsigned group_size[3]; ///< Wavefront group size (for ELF files). - unsigned alloc_lds; ///< Number of LDS bytes allocated for wave group. (translates to lds_size in CS and LS) - unsigned *data; ///< Shader binary data. - unsigned nregs; ///< Number of register writes in the stream. - struct sp3_reg *regs; ///< Register writes (index-value pairs). -}; - -/// @brief Comment callback. -typedef const char *(*sp3_comment_cb)(void *, int); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/runtime/hsa-runtime/utils/sp3/sp3-vm.h b/runtime/hsa-runtime/utils/sp3/sp3-vm.h deleted file mode 100644 index 15c1baeb3c..0000000000 --- a/runtime/hsa-runtime/utils/sp3/sp3-vm.h +++ /dev/null @@ -1,119 +0,0 @@ -//===================================================================== -// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. -// -/// \author AMD Developer Tools Team -/// \file -/// -//===================================================================== - -#ifndef SP3_VM_H -#define SP3_VM_H - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (WIN_OS) && !defined(SP3_STATIC_LIB) - #if defined(DLL_EXPORT_SP3) - #define SP3_EXPORT __declspec(dllexport) - #else - #define SP3_EXPORT __declspec(dllimport) - #endif -#else - #define SP3_EXPORT -#endif - -#ifdef _MSC_VER -typedef __int32 int32_t; -typedef unsigned __int32 uint32_t; - -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -#else -#include -#endif - -struct sp3_vma; - -/// @file sp3-vm.h -/// @brief sp3 VM API -/// -/// The VM API is used to manage virtual memory maps. Those maps are -/// used for binary storage for disassembly, as they can naturally -/// mirror the GPU's memory map (so no register translation is needed). - -#define SP3_VM_PAGESIZE 64 - -/// @brief VM addresses are 64-bit and the address unit is 32 bits -/// -typedef uint64_t sp3_vmaddr; - -/// @brief Callback function that will fill a VMA on demand -/// -/// The VMA to be filled will be specified through the request address. -/// The callback should fill the VMA using sp3_vm_write calls. -typedef void (* sp3_vmfill)(struct sp3_vma *vm, sp3_vmaddr addr, void *ctx); - -/// @brief VM area -/// -/// VMAs are kept in a sorted list -typedef struct sp3_vma { - sp3_vmaddr base, len; - sp3_vmfill fill; - void *fill_ctx; - uint32_t *data; - struct sp3_vma *prev, *next; -} sp3_vma; - -/// @brief Create a new VM that is empty. -/// -SP3_EXPORT -sp3_vma *sp3_vm_new(void); - -/// @brief Create a new VM that has a sp3_vmfill callback. -/// -SP3_EXPORT -sp3_vma *sp3_vm_new_fill(sp3_vmfill fill, void *ctx); - -/// @brief Create a new VM from an array of words. -/// @param base VM address to load array at. -/// @param len Number of 32-bit words in the array. -/// @param data Pointer to the array. -/// -SP3_EXPORT -sp3_vma *sp3_vm_new_ptr(sp3_vmaddr base, sp3_vmaddr len, const uint32_t *data); - -/// @brief Find a VMA, optionally adding it. -/// @param vm VM to search in. -/// @param addr Address to search for. -/// @param add Flag indicating whether a failure should result in adding a new VMA. -/// -SP3_EXPORT -sp3_vma *sp3_vm_find(sp3_vma *vm, sp3_vmaddr addr, int add); - -/// @brief Write a word to a VM. -/// -SP3_EXPORT -void sp3_vm_write(sp3_vma *vm, sp3_vmaddr addr, uint32_t val); - -/// @brief Read a word from a VM. -/// -SP3_EXPORT -uint32_t sp3_vm_read(sp3_vma *vm, sp3_vmaddr addr); - -/// @brief Probe VM for presence. -/// @return 1 if the specified address is backed in the VM, 0 otherwise. -/// -SP3_EXPORT -int sp3_vm_present(sp3_vma *vm, sp3_vmaddr addr); - -/// @brief Free a VM and all its storage. -/// -SP3_EXPORT -void sp3_vm_free(sp3_vma *vm); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/runtime/hsa-runtime/utils/sp3/sp3.h b/runtime/hsa-runtime/utils/sp3/sp3.h deleted file mode 100644 index 7ecc8e67a4..0000000000 --- a/runtime/hsa-runtime/utils/sp3/sp3.h +++ /dev/null @@ -1,198 +0,0 @@ -//===================================================================== -// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. -// -/// \author AMD Developer Tools Team -/// \file -/// -//===================================================================== - -#ifndef SP3_H -#define SP3_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "sp3-vm.h" -#include "sp3-type.h" - -/// @file sp3.h -/// @brief sp3 API - -/// @brief Get version of the sp3 library. -/// -/// @return String containing the version number. -/// -SP3_EXPORT const char *sp3_version(void); - -/// @brief Create a new sp3 context. -/// -SP3_EXPORT struct sp3_context *sp3_new(void); - -/// @brief Set option for sp3. -/// -/// @param state sp3 context. -/// @param option Option name. Unknown options will raise an error. -/// @param value Option value. NULL is used to represent value-less options. -/// -SP3_EXPORT void sp3_set_option(struct sp3_context *state, const char *option, const char *value); - -/// @brief Parse a file into a context. -/// -/// If 'file' is NULL, parse stdin. -/// -SP3_EXPORT void sp3_parse_file(struct sp3_context *state, const char *file); - -/// @brief Parse a string into a context. -/// -SP3_EXPORT void sp3_parse_string(struct sp3_context *state, const char *string); - -/// @brief Parse a file from the standard library into a context. -/// -SP3_EXPORT void sp3_parse_library(struct sp3_context *state, const char *name); - -/// @brief Call a sp3 function. -/// -SP3_EXPORT void sp3_call(struct sp3_context *state, const char *func); - -/// @brief Call a sp3 CF clause. -/// -/// @param state sp3 context. -/// @param cffunc Name of clause to call. By convention, this is "main". -/// -/// @return A compiled and linked shader. Free memory with sp3_free(). -/// -SP3_EXPORT struct sp3_shader *sp3_compile(struct sp3_context *state, const char *cffunc); - -/// @brief Free a sp3_shader. -/// -SP3_EXPORT void sp3_free_shader(struct sp3_shader *sh); - -/// @brief Get current ASIC name set for a context. -/// -SP3_EXPORT const char *sp3_getasic(struct sp3_context *state); - -/// @brief Set current ASIC name for a context. -/// -SP3_EXPORT void sp3_setasic(struct sp3_context *state, const char *chip); - -/// @brief Set global variable in context to an integer. -/// -SP3_EXPORT void sp3_set_param_int(struct sp3_context *state, const char *name, int value); - -/// @brief Set global variable in context to an integer vector. -/// -SP3_EXPORT void sp3_set_param_intvec(struct sp3_context *state, const char *name, int size, const int *value); - -/// @brief Set global variable in context to a float. -/// -SP3_EXPORT void sp3_set_param_float(struct sp3_context *state, const char *name, float value); - -/// @brief Set global variable in context to a float vector. -/// -SP3_EXPORT void sp3_set_param_floatvec(struct sp3_context *state, const char *name, int size, const float *value); - -/// @brief Set error message header. -/// -SP3_EXPORT void sp3_set_error_header(struct sp3_context *state, const char *str); - -/// @brief Get ASIC metrics for the ASIC in current state. -/// -/// Used by ELF tools to fill in some CAL fields. -/// -SP3_EXPORT int sp3_asicinfo(struct sp3_context *state, const char *name); - -/// @brief Free a context allocated by sp3_new/open/parse. -/// -SP3_EXPORT void sp3_close(struct sp3_context *state); - -/// @brief Disassemble a shader. -/// -/// This call is likely to change to something that will take a filled sp3_shader structure later on. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// @param bin Memory map with the opcodes (see sp3-vm.h). -/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words). -/// @param name Same to give the disassembled shader. -/// @param shader_type One of the SHTYPE_* constants. -/// @param include Literal text to include in the CF clause (NULL includes nothing). -/// @param max_len Maximum length of CF clause. Matters if SP3DIS_FORCEVALID is set. -/// @param flags A mask of SP3DIS_* flags. -/// -/// @return Shader disassembly as a string (allocated with malloc()). Free memory with sp3_free(). -/// -SP3_EXPORT char *sp3_disasm(struct sp3_context *state, sp3_vma *bin, sp3_vmaddr base, const char *name, int shader_type, const char *include, unsigned max_len, unsigned flags); - -/// @brief Disassemble a single shader instruction. -/// -/// This call is likely to change to something that will take a filled sp3_shader structure later on. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// @param inst Pointer to dwords containing instruction (exact number of dwords required depends on instruction). -/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words). -/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words). -/// @param shader_type One of the SHTYPE_* constants. -/// @param flags A mask of SP3DIS_* flags. -/// -/// @return Shader disassembly as a string (allocated with malloc()). Free memory with sp3_free(). -/// -SP3_EXPORT char *sp3_disasm_inst(struct sp3_context *state, const unsigned inst[2], sp3_vmaddr base, sp3_vmaddr addr, int shader_type, unsigned flags); - -/// @brief Parse a register stream. -/// -/// Can be called before sp3_disasm to preset things like ALU, boolean and loop constants. -/// -/// This call is likely to merge with sp3_disasm later on. -/// -/// @param state sp3 context to fill with state. -/// @param nregs Number of register entries. -/// @param regs Register stream to parse. -/// @param shader_type One of the SHTYPE_* constants. -/// -SP3_EXPORT void sp3_setregs(struct sp3_context *state, unsigned nregs, const struct sp3_reg *regs, int shader_type); - - -/// @brief Set shader comments -/// -/// @param state sp3 context. -/// @param map Map of comments (0 for no comment, other values will be passed to the callback). -/// @param f_top Callback returning comment to place above the opcode. -/// @param f_right Callback returning comment to place to the right of the opcode. -/// @param ctx Void pointer to pass to comment callbacks. -/// -SP3_EXPORT void sp3_setcomments(struct sp3_context *state, sp3_vma *map, sp3_comment_cb f_top, sp3_comment_cb f_right, void *ctx); - -/// @brief Set alternate shader entry points -/// -/// Used for disassembly; this marks an additional location in memory -/// (besides the start address) where shader code may be found. Generally -/// required for jump tables and any case where the shader may perform -/// indirect jumps to ensure that disassembly locates all shader -/// instructions. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words). -/// -SP3_EXPORT void sp3_setentrypoint(struct sp3_context *state, sp3_vmaddr addr); - -/// @brief Clear alternate shader entry points -/// -/// Clear all entry points previously set with sp3_setentrypoint. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// -SP3_EXPORT void sp3_clearentrypoints(struct sp3_context *state); - -/// @brief Free memory allocated by sp3. -/// -/// Windows DLLs that allocate memory have to free it. This function -/// should be used to free the result of sp3_disasm, sp3_compile etc. -/// -SP3_EXPORT void sp3_free(void *ptr); - -#ifdef __cplusplus -} -#endif - - -#endif