Remove opensrc test files.
[git-p4: depot-paths = "//depot/stg/hsa/drivers/hsa/runtime/": change = 1249961]
Esse commit está contido em:
@@ -1,171 +0,0 @@
|
||||
################################################################################
|
||||
##
|
||||
## The University of Illinois/NCSA
|
||||
## Open Source License (NCSA)
|
||||
##
|
||||
## Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
##
|
||||
## Developed by:
|
||||
##
|
||||
## AMD Research and AMD HSA Software Development
|
||||
##
|
||||
## Advanced Micro Devices, Inc.
|
||||
##
|
||||
## www.amd.com
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal with the Software without restriction, including without limitation
|
||||
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
## and#or sell copies of the Software, and to permit persons to whom the
|
||||
## Software is furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## - Redistributions of source code must retain the above copyright notice,
|
||||
## this list of conditions and the following disclaimers.
|
||||
## - Redistributions in binary form must reproduce the above copyright
|
||||
## notice, this list of conditions and the following disclaimers in
|
||||
## the documentation and#or other materials provided with the distribution.
|
||||
## - Neither the names of Advanced Micro Devices, Inc,
|
||||
## nor the names of its contributors may be used to endorse or promote
|
||||
## products derived from this Software without specific prior written
|
||||
## permission.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
## DEALINGS WITH THE SOFTWARE.
|
||||
##
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required ( VERSION 2.8.0 )
|
||||
## GCC 4.8 or higher compiler required.
|
||||
|
||||
if ( WIN32 )
|
||||
MESSAGE ( FATAL_ERROR "Windows build is not supported." )
|
||||
endif ()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake_modules")
|
||||
|
||||
## Process environment variables.
|
||||
if ( "$ENV{HSATHK_BUILD_TARGET_BITS}" STREQUAL 32 )
|
||||
set ( ONLY64STR "" )
|
||||
set ( IS64BIT 0 )
|
||||
else ()
|
||||
set ( ONLY64STR "64" )
|
||||
set ( IS64BIT 1 )
|
||||
endif ()
|
||||
|
||||
if ( NOT EXISTS $ENV{HSATHK_BUILD_INC_PATH}/hsakmt.h )
|
||||
MESSAGE ( FATAL_ERROR "Environment variable HSATHK_BUILD_INC_PATH is not set to point to the location where KFD Thunk header file hsakmt.h (and rest of the thunk headers) could be found." )
|
||||
endif ()
|
||||
|
||||
if ( NOT EXISTS $ENV{HSATHK_BUILD_LIB_PATH}/libhsakmt.so.1 )
|
||||
MESSAGE ( FATAL_ERROR "Environment variable HSATHK_BUILD_LIB_PATH is not set to point to the location where KFD Thunk library libhsakmt.so.1 could be found." )
|
||||
endif ()
|
||||
|
||||
if ( EXISTS $ENV{LIBSP3_BUILD_INC_PATH}/sp3.h )
|
||||
set ( LIBSP3_BUILD_INC_PATH $ENV{LIBSP3_BUILD_INC_PATH} )
|
||||
else ()
|
||||
set ( LIBSP3_BUILD_INC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../utils/sp3 )
|
||||
endif ()
|
||||
|
||||
if ( EXISTS $ENV{LIBSP3_BUILD_LIB_PATH}/libsp3.a )
|
||||
set ( LIBSP3_BUILD_LIB_PATH $ENV{LIBSP3_BUILD_LIB_PATH} )
|
||||
else ()
|
||||
set ( LIBSP3_BUILD_LIB_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../utils/sp3 )
|
||||
endif ()
|
||||
|
||||
MESSAGE ( ------IS64BIT: ${IS64BIT} )
|
||||
MESSAGE ( ------Compiler: ${CMAKE_CXX_COMPILER} )
|
||||
MESSAGE ( ------Version: ${CMAKE_CXX_COMPILER_VERSION} )
|
||||
|
||||
## Set core runtime module name and project name.
|
||||
set ( CORE_RUNTIME_NAME "hsa-runtime" )
|
||||
set ( CORE_RUNTIME_COMPONENT "lib${CORE_RUNTIME_NAME}" )
|
||||
set ( CORE_RUNTIME_TARGET "${CORE_RUNTIME_NAME}${ONLY64STR}" )
|
||||
project ( ${CORE_RUNTIME_TARGET} )
|
||||
|
||||
## Verbose output.
|
||||
set ( CMAKE_VERBOSE_MAKEFILE on )
|
||||
|
||||
## Compiler preproc definitions.
|
||||
add_definitions ( -D__linux__ )
|
||||
add_definitions ( -DHSA_EXPORT=1 )
|
||||
add_definitions ( -DHSA_EXPORT_FINALIZER=1 )
|
||||
add_definitions ( -DHSA_EXPORT_IMAGES=1 )
|
||||
|
||||
## ------------------------- Linux Compiler and Linker options -------------------------
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-deprecated-declarations -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -Wno-error=unused-variable -Wno-error=unused-but-set-variable -Wno-error=unused-function" )
|
||||
|
||||
set ( DRVDEF "${CMAKE_CURRENT_SOURCE_DIR}/hsacore.so.def" )
|
||||
|
||||
set ( CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack -Wl,--version-script=${DRVDEF}" )
|
||||
|
||||
set ( CMAKE_SKIP_BUILD_RPATH TRUE)
|
||||
|
||||
## ------------------------- End Compiler and Linker options ----------------------------
|
||||
|
||||
## Source files.
|
||||
set ( CORE_SRCS util/lnx/os_linux.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} util/small_heap.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} util/timer.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/amd_blit_kernel.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/amd_blit_sdma.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/amd_cpu_agent.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/amd_gpu_agent.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/amd_aql_queue.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/amd_loader_context.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/amd_load_map.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/amd_memory_region.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/amd_topology.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/default_signal.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/host_queue.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/hsa.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/hsa_api_trace.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/hsa_ext_amd.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/hsa_ext_interface.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/interrupt_signal.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/isa.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/runtime.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} runtime/signal.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} common/shared.cpp )
|
||||
set ( CORE_SRCS ${CORE_SRCS} common/hsa_table_interface.cpp )
|
||||
|
||||
## Include path(s).
|
||||
include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
|
||||
include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/../inc )
|
||||
include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/inc )
|
||||
include_directories ( $ENV{HSATHK_BUILD_INC_PATH} )
|
||||
include_directories ( ${LIBSP3_BUILD_INC_PATH} )
|
||||
|
||||
## Library path(s).
|
||||
link_directories ( $ENV{HSATHK_BUILD_LIB_PATH} )
|
||||
link_directories ( ${LIBSP3_BUILD_LIB_PATH} )
|
||||
|
||||
add_library ( ${CORE_RUNTIME_TARGET} SHARED ${CORE_SRCS} )
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
if ( DEFINED VERSION_STRING )
|
||||
set_property ( TARGET ${CORE_RUNTIME_TARGET} PROPERTY VERSION "${VERSION_STRING}" )
|
||||
endif ()
|
||||
|
||||
set_property ( TARGET ${CORE_RUNTIME_TARGET} PROPERTY SOVERSION "${VERSION_MAJOR}" )
|
||||
|
||||
target_link_libraries ( ${CORE_RUNTIME_TARGET}
|
||||
PRIVATE amdhsaloader
|
||||
PRIVATE amdhsacode
|
||||
PRIVATE hsakmt
|
||||
PRIVATE sp3
|
||||
dl pthread rt
|
||||
)
|
||||
|
||||
## If the build is Release, strip the target library
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
|
||||
add_custom_command ( TARGET ${CORE_RUNTIME_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
|
||||
endif ()
|
||||
|
||||
## Set install information
|
||||
install ( TARGETS ${CORE_RUNTIME_TARGET} LIBRARY DESTINATION lib COMPONENT ${CORE_RUNTIME_COMPONENT})
|
||||
@@ -1,604 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "hsa_api_trace.h"
|
||||
|
||||
static const ApiTable* HsaApiTable;
|
||||
|
||||
void hsa_table_interface_init(const ApiTable* Table) { HsaApiTable = Table; }
|
||||
|
||||
const ApiTable* hsa_table_interface_get_table() { return HsaApiTable; }
|
||||
|
||||
// Pass through stub functions
|
||||
hsa_status_t HSA_API hsa_init() { return HsaApiTable->hsa_init_fn(); }
|
||||
|
||||
hsa_status_t HSA_API hsa_shut_down() { return HsaApiTable->hsa_shut_down_fn(); }
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_system_get_info(hsa_system_info_t attribute, void* value) {
|
||||
return HsaApiTable->hsa_system_get_info_fn(attribute, value);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_system_extension_supported(uint16_t extension, uint16_t version_major,
|
||||
uint16_t version_minor, bool* result) {
|
||||
return HsaApiTable->hsa_system_extension_supported_fn(
|
||||
extension, version_major, version_minor, result);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_system_get_extension_table(uint16_t extension, uint16_t version_major,
|
||||
uint16_t version_minor, void* table) {
|
||||
return HsaApiTable->hsa_system_get_extension_table_fn(
|
||||
extension, version_major, version_minor, table);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void* data),
|
||||
void* data) {
|
||||
return HsaApiTable->hsa_iterate_agents_fn(callback, data);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_agent_get_info(hsa_agent_t agent,
|
||||
hsa_agent_info_t attribute,
|
||||
void* value) {
|
||||
return HsaApiTable->hsa_agent_get_info_fn(agent, attribute, value);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_agent_get_exception_policies(hsa_agent_t agent,
|
||||
hsa_profile_t profile,
|
||||
uint16_t* mask) {
|
||||
return HsaApiTable->hsa_agent_get_exception_policies_fn(agent, profile, mask);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent,
|
||||
uint16_t version_major,
|
||||
uint16_t version_minor, bool* result) {
|
||||
return HsaApiTable->hsa_agent_extension_supported_fn(
|
||||
extension, agent, version_major, version_minor, result);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_queue_create(hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue) {
|
||||
return HsaApiTable->hsa_queue_create_fn(agent, size, type, callback, data,
|
||||
private_segment_size,
|
||||
group_segment_size, queue);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_soft_queue_create(hsa_region_t region, uint32_t size,
|
||||
hsa_queue_type_t type, uint32_t features,
|
||||
hsa_signal_t completion_signal, hsa_queue_t** queue) {
|
||||
return HsaApiTable->hsa_soft_queue_create_fn(region, size, type, features,
|
||||
completion_signal, queue);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_queue_destroy(hsa_queue_t* queue) {
|
||||
return HsaApiTable->hsa_queue_destroy_fn(queue);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_queue_inactivate(hsa_queue_t* queue) {
|
||||
return HsaApiTable->hsa_queue_inactivate_fn(queue);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_load_read_index_acquire(const hsa_queue_t* queue) {
|
||||
return HsaApiTable->hsa_queue_load_read_index_acquire_fn(queue);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_load_read_index_relaxed(const hsa_queue_t* queue) {
|
||||
return HsaApiTable->hsa_queue_load_read_index_relaxed_fn(queue);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_load_write_index_acquire(const hsa_queue_t* queue) {
|
||||
return HsaApiTable->hsa_queue_load_write_index_acquire_fn(queue);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_load_write_index_relaxed(const hsa_queue_t* queue) {
|
||||
return HsaApiTable->hsa_queue_load_write_index_relaxed_fn(queue);
|
||||
}
|
||||
|
||||
void HSA_API hsa_queue_store_write_index_relaxed(const hsa_queue_t* queue,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_store_write_index_relaxed_fn(queue, value);
|
||||
}
|
||||
|
||||
void HSA_API hsa_queue_store_write_index_release(const hsa_queue_t* queue,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_store_write_index_release_fn(queue, value);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_cas_write_index_acq_rel(const hsa_queue_t* queue,
|
||||
uint64_t expected,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_cas_write_index_acq_rel_fn(queue, expected,
|
||||
value);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_cas_write_index_acquire(const hsa_queue_t* queue,
|
||||
uint64_t expected,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_cas_write_index_acquire_fn(queue, expected,
|
||||
value);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_cas_write_index_relaxed(const hsa_queue_t* queue,
|
||||
uint64_t expected,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_cas_write_index_relaxed_fn(queue, expected,
|
||||
value);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_cas_write_index_release(const hsa_queue_t* queue,
|
||||
uint64_t expected,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_cas_write_index_release_fn(queue, expected,
|
||||
value);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_add_write_index_acq_rel(const hsa_queue_t* queue,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_add_write_index_acq_rel_fn(queue, value);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_add_write_index_acquire(const hsa_queue_t* queue,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_add_write_index_acquire_fn(queue, value);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_add_write_index_relaxed(const hsa_queue_t* queue,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_add_write_index_relaxed_fn(queue, value);
|
||||
}
|
||||
|
||||
uint64_t HSA_API hsa_queue_add_write_index_release(const hsa_queue_t* queue,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_add_write_index_release_fn(queue, value);
|
||||
}
|
||||
|
||||
void HSA_API hsa_queue_store_read_index_relaxed(const hsa_queue_t* queue,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_store_read_index_relaxed_fn(queue, value);
|
||||
}
|
||||
|
||||
void HSA_API hsa_queue_store_read_index_release(const hsa_queue_t* queue,
|
||||
uint64_t value) {
|
||||
return HsaApiTable->hsa_queue_store_read_index_release_fn(queue, value);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_agent_iterate_regions(
|
||||
hsa_agent_t agent,
|
||||
hsa_status_t (*callback)(hsa_region_t region, void* data), void* data) {
|
||||
return HsaApiTable->hsa_agent_iterate_regions_fn(agent, callback, data);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_region_get_info(hsa_region_t region,
|
||||
hsa_region_info_t attribute,
|
||||
void* value) {
|
||||
return HsaApiTable->hsa_region_get_info_fn(region, attribute, value);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_memory_register(void* address, size_t size) {
|
||||
return HsaApiTable->hsa_memory_register_fn(address, size);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_memory_deregister(void* address, size_t size) {
|
||||
return HsaApiTable->hsa_memory_deregister_fn(address, size);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_memory_allocate(hsa_region_t region, size_t size, void** ptr) {
|
||||
return HsaApiTable->hsa_memory_allocate_fn(region, size, ptr);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_memory_free(void* ptr) {
|
||||
return HsaApiTable->hsa_memory_free_fn(ptr);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_memory_copy(void* dst, const void* src, size_t size) {
|
||||
return HsaApiTable->hsa_memory_copy_fn(dst, src, size);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_memory_assign_agent(void* ptr, hsa_agent_t agent,
|
||||
hsa_access_permission_t access) {
|
||||
return HsaApiTable->hsa_memory_assign_agent_fn(ptr, agent, access);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
|
||||
const hsa_agent_t* consumers, hsa_signal_t* signal) {
|
||||
return HsaApiTable->hsa_signal_create_fn(initial_value, num_consumers,
|
||||
consumers, signal);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_signal_destroy(hsa_signal_t signal) {
|
||||
return HsaApiTable->hsa_signal_destroy_fn(signal);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API hsa_signal_load_relaxed(hsa_signal_t signal) {
|
||||
return HsaApiTable->hsa_signal_load_relaxed_fn(signal);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API hsa_signal_load_acquire(hsa_signal_t signal) {
|
||||
return HsaApiTable->hsa_signal_load_acquire_fn(signal);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_store_relaxed_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_store_release(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_store_release_fn(signal, value);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_wait_relaxed(hsa_signal_t signal,
|
||||
hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout_hint,
|
||||
hsa_wait_state_t wait_expectancy_hint) {
|
||||
return HsaApiTable->hsa_signal_wait_relaxed_fn(
|
||||
signal, condition, compare_value, timeout_hint, wait_expectancy_hint);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_wait_acquire(hsa_signal_t signal,
|
||||
hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout_hint,
|
||||
hsa_wait_state_t wait_expectancy_hint) {
|
||||
return HsaApiTable->hsa_signal_wait_acquire_fn(
|
||||
signal, condition, compare_value, timeout_hint, wait_expectancy_hint);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_and_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_and_relaxed_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_and_acquire(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_and_acquire_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_and_release(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_and_release_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_and_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_and_acq_rel_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_or_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_or_relaxed_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_or_acquire(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_or_acquire_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_or_release(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_or_release_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_or_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_or_acq_rel_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_xor_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_xor_relaxed_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_xor_acquire(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_xor_acquire_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_xor_release(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_xor_release_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_xor_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_xor_acq_rel_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_add_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_add_relaxed_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_add_acquire(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_add_acquire_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_add_release(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_add_release_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_add_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_add_acq_rel_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_subtract_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_subtract_relaxed_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_subtract_acquire(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_subtract_acquire_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_subtract_release(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_subtract_release_fn(signal, value);
|
||||
}
|
||||
|
||||
void HSA_API
|
||||
hsa_signal_subtract_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_subtract_acq_rel_fn(signal, value);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_exchange_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_exchange_relaxed_fn(signal, value);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_exchange_acquire(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_exchange_acquire_fn(signal, value);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_exchange_release(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_exchange_release_fn(signal, value);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_exchange_acq_rel(hsa_signal_t signal, hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_exchange_acq_rel_fn(signal, value);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(hsa_signal_t signal,
|
||||
hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_cas_relaxed_fn(signal, expected, value);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API hsa_signal_cas_acquire(hsa_signal_t signal,
|
||||
hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_cas_acquire_fn(signal, expected, value);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API hsa_signal_cas_release(hsa_signal_t signal,
|
||||
hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_cas_release_fn(signal, expected, value);
|
||||
}
|
||||
|
||||
hsa_signal_value_t HSA_API hsa_signal_cas_acq_rel(hsa_signal_t signal,
|
||||
hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
return HsaApiTable->hsa_signal_cas_acq_rel_fn(signal, expected, value);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_isa_from_name(const char* name, hsa_isa_t* isa) {
|
||||
return HsaApiTable->hsa_isa_from_name_fn(name, isa);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_isa_get_info(hsa_isa_t isa, hsa_isa_info_t attribute,
|
||||
uint32_t index, void* value) {
|
||||
return HsaApiTable->hsa_isa_get_info_fn(isa, attribute, index, value);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_isa_compatible(hsa_isa_t code_object_isa, hsa_isa_t agent_isa,
|
||||
bool* result) {
|
||||
return HsaApiTable->hsa_isa_compatible_fn(code_object_isa, agent_isa, result);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_code_object_serialize(
|
||||
hsa_code_object_t code_object,
|
||||
hsa_status_t (*alloc_callback)(size_t size, hsa_callback_data_t data,
|
||||
void** address),
|
||||
hsa_callback_data_t callback_data, const char* options,
|
||||
void** serialized_code_object, size_t* serialized_code_object_size) {
|
||||
return HsaApiTable->hsa_code_object_serialize_fn(
|
||||
code_object, alloc_callback, callback_data, options,
|
||||
serialized_code_object, serialized_code_object_size);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_code_object_deserialize(void* serialized_code_object,
|
||||
size_t serialized_code_object_size,
|
||||
const char* options,
|
||||
hsa_code_object_t* code_object) {
|
||||
return HsaApiTable->hsa_code_object_deserialize_fn(
|
||||
serialized_code_object, serialized_code_object_size, options,
|
||||
code_object);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_code_object_destroy(hsa_code_object_t code_object) {
|
||||
return HsaApiTable->hsa_code_object_destroy_fn(code_object);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_code_object_get_info(hsa_code_object_t code_object,
|
||||
hsa_code_object_info_t attribute,
|
||||
void* value) {
|
||||
return HsaApiTable->hsa_code_object_get_info_fn(code_object, attribute,
|
||||
value);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_code_object_get_symbol(hsa_code_object_t code_object,
|
||||
const char* symbol_name,
|
||||
hsa_code_symbol_t* symbol) {
|
||||
return HsaApiTable->hsa_code_object_get_symbol_fn(code_object, symbol_name,
|
||||
symbol);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_code_symbol_get_info(hsa_code_symbol_t code_symbol,
|
||||
hsa_code_symbol_info_t attribute,
|
||||
void* value) {
|
||||
return HsaApiTable->hsa_code_symbol_get_info_fn(code_symbol, attribute,
|
||||
value);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_code_object_iterate_symbols(
|
||||
hsa_code_object_t code_object,
|
||||
hsa_status_t (*callback)(hsa_code_object_t code_object,
|
||||
hsa_code_symbol_t symbol, void* data),
|
||||
void* data) {
|
||||
return HsaApiTable->hsa_code_object_iterate_symbols_fn(code_object, callback,
|
||||
data);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_executable_create(hsa_profile_t profile,
|
||||
hsa_executable_state_t executable_state,
|
||||
const char* options, hsa_executable_t* executable) {
|
||||
return HsaApiTable->hsa_executable_create_fn(profile, executable_state,
|
||||
options, executable);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_executable_destroy(hsa_executable_t executable) {
|
||||
return HsaApiTable->hsa_executable_destroy_fn(executable);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_executable_load_code_object(hsa_executable_t executable,
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
const char* options) {
|
||||
return HsaApiTable->hsa_executable_load_code_object_fn(executable, agent,
|
||||
code_object, options);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_executable_freeze(hsa_executable_t executable, const char* options) {
|
||||
return HsaApiTable->hsa_executable_freeze_fn(executable, options);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_executable_get_info(hsa_executable_t executable,
|
||||
hsa_executable_info_t attribute,
|
||||
void* value) {
|
||||
return HsaApiTable->hsa_executable_get_info_fn(executable, attribute, value);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_executable_global_variable_define(hsa_executable_t executable,
|
||||
const char* variable_name,
|
||||
void* address) {
|
||||
return HsaApiTable->hsa_executable_global_variable_define_fn(
|
||||
executable, variable_name, address);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_executable_agent_global_variable_define(hsa_executable_t executable,
|
||||
hsa_agent_t agent,
|
||||
const char* variable_name,
|
||||
void* address) {
|
||||
return HsaApiTable->hsa_executable_agent_global_variable_define_fn(
|
||||
executable, agent, variable_name, address);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_executable_readonly_variable_define(hsa_executable_t executable,
|
||||
hsa_agent_t agent,
|
||||
const char* variable_name,
|
||||
void* address) {
|
||||
return HsaApiTable->hsa_executable_readonly_variable_define_fn(
|
||||
executable, agent, variable_name, address);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_executable_validate(hsa_executable_t executable, uint32_t* result) {
|
||||
return HsaApiTable->hsa_executable_validate_fn(executable, result);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_executable_get_symbol(hsa_executable_t executable,
|
||||
const char* module_name, const char* symbol_name,
|
||||
hsa_agent_t agent, int32_t call_convention,
|
||||
hsa_executable_symbol_t* symbol) {
|
||||
return HsaApiTable->hsa_executable_get_symbol_fn(
|
||||
executable, module_name, symbol_name, agent, call_convention, symbol);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_executable_symbol_get_info(hsa_executable_symbol_t executable_symbol,
|
||||
hsa_executable_symbol_info_t attribute,
|
||||
void* value) {
|
||||
return HsaApiTable->hsa_executable_symbol_get_info_fn(executable_symbol,
|
||||
attribute, value);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_executable_iterate_symbols(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(hsa_executable_t executable,
|
||||
hsa_executable_symbol_t symbol, void* data),
|
||||
void* data) {
|
||||
return HsaApiTable->hsa_executable_iterate_symbols_fn(executable, callback,
|
||||
data);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_status_string(hsa_status_t status, const char** status_string) {
|
||||
return HsaApiTable->hsa_status_string_fn(status, status_string);
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/common/shared.h"
|
||||
|
||||
namespace core {
|
||||
std::function<void*(size_t, size_t)> BaseShared::allocate_=nullptr;
|
||||
std::function<void(void*)> BaseShared::free_=nullptr;
|
||||
}
|
||||
@@ -1,109 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_INC_SHARED_H_
|
||||
#define HSA_RUNTME_CORE_INC_SHARED_H_
|
||||
|
||||
#include "core/util/utils.h"
|
||||
#include <assert.h>
|
||||
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
|
||||
namespace core {
|
||||
/// @brief Base class encapsulating the allocator and deallocator for
|
||||
/// shared shared object.
|
||||
class BaseShared {
|
||||
public:
|
||||
static void SetAllocateAndFree(
|
||||
const std::function<void*(size_t, size_t)>& allocate,
|
||||
const std::function<void(void*)>& free) {
|
||||
allocate_ = allocate;
|
||||
free_ = free;
|
||||
}
|
||||
|
||||
protected:
|
||||
static std::function<void*(size_t, size_t)> allocate_;
|
||||
static std::function<void(void*)> free_;
|
||||
};
|
||||
|
||||
/// @brief Base class for classes that encapsulates object shared between
|
||||
/// host and agents. Alignment defaults to __alignof(T) but may be increased.
|
||||
template <typename T, size_t Align=0>
|
||||
class Shared : public BaseShared {
|
||||
public:
|
||||
Shared() {
|
||||
assert(allocate_ != nullptr && free_ != nullptr &&
|
||||
"Shared object allocator is not set");
|
||||
static_assert((__alignof(T) <= Align) || (Align == 0),
|
||||
"Align is less than alignof(T)");
|
||||
|
||||
shared_object_ =
|
||||
reinterpret_cast<T*>(allocate_(sizeof(T), Max(__alignof(T), Align)));
|
||||
|
||||
assert(shared_object_ != NULL && "Failed on allocating shared_object_");
|
||||
|
||||
if (shared_object_ != NULL) new (shared_object_) T;
|
||||
}
|
||||
|
||||
virtual ~Shared() {
|
||||
assert(allocate_ != nullptr && free_ != nullptr &&
|
||||
"Shared object allocator is not set");
|
||||
|
||||
if (IsSharedObjectAllocationValid()) {
|
||||
shared_object_->~T();
|
||||
free_(shared_object_);
|
||||
}
|
||||
}
|
||||
|
||||
T* shared_object() const { return shared_object_; }
|
||||
|
||||
bool IsSharedObjectAllocationValid() const {
|
||||
return (shared_object_ != NULL);
|
||||
}
|
||||
|
||||
private:
|
||||
T* shared_object_;
|
||||
};
|
||||
|
||||
} // namespace core
|
||||
#endif // header guard
|
||||
@@ -1,264 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// HSA runtime C++ interface file.
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_INC_AGENT_H_
|
||||
#define HSA_RUNTME_CORE_INC_AGENT_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/checked.h"
|
||||
#include "core/inc/isa.h"
|
||||
#include "core/inc/queue.h"
|
||||
#include "core/inc/memory_region.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
namespace core {
|
||||
class Signal;
|
||||
|
||||
typedef void (*HsaEventCallback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data);
|
||||
|
||||
class MemoryRegion;
|
||||
|
||||
// Agent is intended to be an pure interface class and may be wrapped or
|
||||
// replaced by tools libraries. All funtions other than Convert, node_id,
|
||||
// device_type, and public_handle must be virtual.
|
||||
class Agent : public Checked<0xF6BC25EB17E6F917> {
|
||||
public:
|
||||
// @brief Convert agent object into hsa_agent_t.
|
||||
//
|
||||
// @param [in] agent Pointer to an agent.
|
||||
//
|
||||
// @retval hsa_agent_t
|
||||
static __forceinline hsa_agent_t Convert(Agent* agent) {
|
||||
const hsa_agent_t agent_handle = {
|
||||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(agent))};
|
||||
return agent_handle;
|
||||
}
|
||||
|
||||
// @brief Convert agent object into const hsa_agent_t.
|
||||
//
|
||||
// @param [in] agent Pointer to an agent.
|
||||
//
|
||||
// @retval const hsa_agent_t
|
||||
static __forceinline const hsa_agent_t Convert(const Agent* agent) {
|
||||
const hsa_agent_t agent_handle = {
|
||||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(agent))};
|
||||
return agent_handle;
|
||||
}
|
||||
|
||||
// @brief Convert hsa_agent_t handle into Agent*.
|
||||
//
|
||||
// @param [in] agent An hsa_agent_t handle.
|
||||
//
|
||||
// @retval Agent*
|
||||
static __forceinline Agent* Convert(hsa_agent_t agent) {
|
||||
return reinterpret_cast<Agent*>(agent.handle);
|
||||
}
|
||||
|
||||
// Lightweight RTTI for vendor specific implementations.
|
||||
enum DeviceType { kAmdGpuDevice = 0, kAmdCpuDevice = 1, kUnknownDevice = 2 };
|
||||
|
||||
// @brief Agent class contructor.
|
||||
//
|
||||
// @param [in] type CPU or GPU or other.
|
||||
explicit Agent(uint32_t node_id, DeviceType type)
|
||||
: node_id_(node_id), device_type_(uint32_t(type)) {
|
||||
public_handle_ = Convert(this);
|
||||
}
|
||||
|
||||
// @brief Agent class contructor.
|
||||
//
|
||||
// @param [in] type CPU or GPU or other.
|
||||
explicit Agent(uint32_t node_id, uint32_t type)
|
||||
: node_id_(node_id), device_type_(type) {
|
||||
public_handle_ = Convert(this);
|
||||
}
|
||||
|
||||
// @brief Agent class destructor.
|
||||
virtual ~Agent() {}
|
||||
|
||||
// @brief Submit DMA copy command to move data from src to dst and wait
|
||||
// until it is finished.
|
||||
//
|
||||
// @details The agent must be able to access @p dst and @p src.
|
||||
//
|
||||
// @param [in] dst Memory address of the destination.
|
||||
// @param [in] src Memory address of the source.
|
||||
// @param [in] size Copy size in bytes.
|
||||
//
|
||||
// @retval HSA_STATUS_SUCCESS The memory copy is finished and successful.
|
||||
virtual hsa_status_t DmaCopy(void* dst, const void* src, size_t size) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// @brief Submit DMA copy command to move data from src to dst. This call
|
||||
// does not wait until the copy is finished
|
||||
//
|
||||
// @details The agent must be able to access @p dst and @p src. Memory copy
|
||||
// will be performed after all signals in @p dep_signals have value of 0.
|
||||
// On memory copy completion, the value of out_signal is decremented.
|
||||
//
|
||||
// @param [in] dst Memory address of the destination.
|
||||
// @param [in] src Memory address of the source.
|
||||
// @param [in] size Copy size in bytes.
|
||||
// @param [in] dep_signals Array of signal dependency.
|
||||
// @param [in] out_signal Completion signal.
|
||||
//
|
||||
// @retval HSA_STATUS_SUCCESS The memory copy is finished and successful.
|
||||
virtual hsa_status_t DmaCopy(void* dst, const void* src, size_t size,
|
||||
std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& out_signal) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// @brief Submit DMA command to set the content of a pointer and wait
|
||||
// until it is finished.
|
||||
//
|
||||
// @details The agent must be able to access @p ptr
|
||||
//
|
||||
// @param [in] ptr Address of the memory to be set.
|
||||
// @param [in] value The value/pattern that will be used to set @p ptr.
|
||||
// @param [in] count Number of uint32_t element to be set.
|
||||
//
|
||||
// @retval HSA_STATUS_SUCCESS The memory fill is finished and successful.
|
||||
virtual hsa_status_t DmaFill(void* ptr, uint32_t value, size_t count) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// @brief Invoke the user provided callback for each region accessible by
|
||||
// this agent.
|
||||
//
|
||||
// @param [in] callback User provided callback function.
|
||||
// @param [in] data User provided pointer as input for @p callback.
|
||||
//
|
||||
// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
|
||||
// region returns ::HSA_STATUS_SUCCESS.
|
||||
virtual hsa_status_t IterateRegion(
|
||||
hsa_status_t (*callback)(hsa_region_t region, void* data),
|
||||
void* data) const = 0;
|
||||
|
||||
// @brief Create queue.
|
||||
//
|
||||
// @param [in] size Number of packets the queue is expected to hold. Must be a
|
||||
// power of 2 greater than 0.
|
||||
// @param [in] queue_type Queue type.
|
||||
// @param [in] event_callback Callback invoked for every
|
||||
// asynchronous event related to the newly created queue. May be NULL.The HSA
|
||||
// runtime passes three arguments to the callback : a code identifying the
|
||||
// event that triggered the invocation, a pointer to the queue where the event
|
||||
// originated, and the application data.
|
||||
// @param [in] data Application data that is passed to @p callback.
|
||||
// @param [in] private_segment_size A hint to indicate the maximum expected
|
||||
// private segment usage per work-item, in bytes.
|
||||
// @param [in] group_segment_size A hint to indicate the maximum expected
|
||||
// group segment usage per work-group, in bytes.
|
||||
// @param[out] queue Memory location where the HSA runtime stores a pointer
|
||||
// to the newly created queue.
|
||||
//
|
||||
// @retval HSA_STATUS_SUCCESS The queue has been created successfully.
|
||||
virtual hsa_status_t QueueCreate(size_t size, hsa_queue_type_t queue_type,
|
||||
HsaEventCallback event_callback, void* data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
Queue** queue) = 0;
|
||||
|
||||
// @brief Query the value of an attribute.
|
||||
//
|
||||
// @param [in] attribute Attribute to query.
|
||||
// @param [out] value Pointer to store the value of the attribute.
|
||||
//
|
||||
// @param HSA_STATUS_SUCCESS @p value has been filled with the value of the
|
||||
// attribute.
|
||||
virtual hsa_status_t GetInfo(hsa_agent_info_t attribute,
|
||||
void* value) const = 0;
|
||||
|
||||
// @brief Returns an array of regions owned by the agent.
|
||||
virtual const std::vector<const core::MemoryRegion*>& regions() const = 0;
|
||||
|
||||
// @details Returns the agent's instruction set architecture.
|
||||
virtual const Isa* isa() const = 0;
|
||||
|
||||
// @brief Returns the device type (CPU/GPU/Others).
|
||||
__forceinline uint32_t device_type() const { return device_type_; }
|
||||
|
||||
// @brief Returns hsa_agent_t handle exposed to end user.
|
||||
//
|
||||
// @details Only matters when tools library need to intercept HSA calls.
|
||||
__forceinline hsa_agent_t public_handle() const { return public_handle_; }
|
||||
|
||||
// @brief Returns node id associated with this agent.
|
||||
__forceinline uint32_t node_id() const { return node_id_; }
|
||||
|
||||
protected:
|
||||
// Intention here is to have a polymorphic update procedure for public_handle_
|
||||
// which is callable on any Agent* but only from some class dervied from
|
||||
// Agent*. do_set_public_handle should remain protected or private in all
|
||||
// derived types.
|
||||
static __forceinline void set_public_handle(Agent* agent,
|
||||
hsa_agent_t handle) {
|
||||
agent->do_set_public_handle(handle);
|
||||
}
|
||||
|
||||
virtual void do_set_public_handle(hsa_agent_t handle) {
|
||||
public_handle_ = handle;
|
||||
}
|
||||
|
||||
hsa_agent_t public_handle_;
|
||||
|
||||
private:
|
||||
// @brief Node id.
|
||||
const uint32_t node_id_;
|
||||
|
||||
const uint32_t device_type_;
|
||||
|
||||
// Forbid copying and moving of this object
|
||||
DISALLOW_COPY_AND_ASSIGN(Agent);
|
||||
};
|
||||
} // namespace core
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,412 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_HW_AQL_COMMAND_PROCESSOR_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_HW_AQL_COMMAND_PROCESSOR_H_
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/inc/queue.h"
|
||||
#include "core/inc/amd_gpu_agent.h"
|
||||
|
||||
namespace amd {
|
||||
/// @brief Encapsulates HW Aql Command Processor functionality. It
|
||||
/// provide the interface for things such as Doorbell register, read,
|
||||
/// write pointers and a buffer.
|
||||
class AqlQueue : public core::Queue, public core::Signal {
|
||||
public:
|
||||
static __forceinline bool IsType(core::Signal* signal) {
|
||||
return signal->IsType(&rtti_id_);
|
||||
}
|
||||
|
||||
// Acquires/releases queue resources and requests HW schedule/deschedule.
|
||||
AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id,
|
||||
ScratchInfo& scratch, core::HsaEventCallback callback,
|
||||
void* err_data, bool is_kv = false);
|
||||
|
||||
~AqlQueue();
|
||||
|
||||
/// @brief Indicates if queue is valid or not
|
||||
bool IsValid() const { return valid_; }
|
||||
|
||||
/// @brief Queue interfaces
|
||||
hsa_status_t Inactivate();
|
||||
|
||||
/// @brief Atomically reads the Read index of with Acquire semantics
|
||||
///
|
||||
/// @return uint64_t Value of read index
|
||||
uint64_t LoadReadIndexAcquire();
|
||||
|
||||
/// @brief Atomically reads the Read index of with Relaxed semantics
|
||||
///
|
||||
/// @return uint64_t Value of read index
|
||||
uint64_t LoadReadIndexRelaxed();
|
||||
|
||||
/// @brief Atomically reads the Write index of with Acquire semantics
|
||||
///
|
||||
/// @return uint64_t Value of write index
|
||||
uint64_t LoadWriteIndexAcquire();
|
||||
|
||||
/// @brief Atomically reads the Write index of with Relaxed semantics
|
||||
///
|
||||
/// @return uint64_t Value of write index
|
||||
uint64_t LoadWriteIndexRelaxed();
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void StoreReadIndexRelaxed(uint64_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void StoreReadIndexRelease(uint64_t value) { assert(false); }
|
||||
|
||||
/// @brief Atomically writes the Write index of with Relaxed semantics
|
||||
///
|
||||
/// @param value New value of write index to update with
|
||||
void StoreWriteIndexRelaxed(uint64_t value);
|
||||
|
||||
/// @brief Atomically writes the Write index of with Release semantics
|
||||
///
|
||||
/// @param value New value of write index to update with
|
||||
void StoreWriteIndexRelease(uint64_t value);
|
||||
|
||||
/// @brief Compares and swaps Write index using Acquire and Release semantics
|
||||
///
|
||||
/// @param expected Current value of write index
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value);
|
||||
|
||||
/// @brief Compares and swaps Write index using Acquire semantics
|
||||
///
|
||||
/// @param expected Current value of write index
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value);
|
||||
|
||||
/// @brief Compares and swaps Write index using Relaxed semantics
|
||||
///
|
||||
/// @param expected Current value of write index
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value);
|
||||
|
||||
/// @brief Compares and swaps Write index using Release semantics
|
||||
///
|
||||
/// @param expected Current value of write index
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value);
|
||||
|
||||
/// @brief Updates the Write index using Acquire and Release semantics
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
uint64_t AddWriteIndexAcqRel(uint64_t value);
|
||||
|
||||
/// @brief Updates the Write index using Acquire semantics
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
uint64_t AddWriteIndexAcquire(uint64_t value);
|
||||
|
||||
/// @brief Updates the Write index using Relaxed semantics
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
uint64_t AddWriteIndexRelaxed(uint64_t value);
|
||||
|
||||
/// @brief Updates the Write index using Release semantics
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
uint64_t AddWriteIndexRelease(uint64_t value);
|
||||
|
||||
/// @brief Set CU Masking
|
||||
///
|
||||
/// @param num_cu_mask_count size of mask bit array
|
||||
///
|
||||
/// @param cu_mask pointer to cu mask
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
hsa_status_t SetCUMasking(const uint32_t num_cu_mask_count,
|
||||
const uint32_t* cu_mask);
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t LoadRelaxed() {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t LoadAcquire() {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief Update signal value using Relaxed semantics
|
||||
void StoreRelaxed(hsa_signal_value_t value);
|
||||
|
||||
/// @brief Update signal value using Release semantics
|
||||
void StoreRelease(hsa_signal_value_t value);
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout, hsa_wait_state_t wait_hint) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout, hsa_wait_state_t wait_hint) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void AndRelaxed(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void AndAcquire(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void AndRelease(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void AndAcqRel(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void OrRelaxed(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void OrAcquire(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void OrRelease(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void OrAcqRel(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void XorRelaxed(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void XorAcquire(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void XorRelease(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void XorAcqRel(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void AddRelaxed(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void AddAcquire(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void AddRelease(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void AddAcqRel(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void SubRelaxed(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void SubAcquire(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void SubRelease(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
void SubAcqRel(hsa_signal_value_t value) { assert(false); }
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t ExchAcquire(hsa_signal_value_t value) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t ExchRelease(hsa_signal_value_t value) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t CasAcquire(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t CasRelease(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
hsa_signal_value_t* ValueLocation() const {
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/// @brief This operation is illegal
|
||||
HsaEvent* EopEvent() {
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// 64 byte-aligned allocation and release, for Queue::amd_queue_.
|
||||
void* operator new(size_t size);
|
||||
void* operator new(size_t size, void* ptr) { return ptr; }
|
||||
void operator delete(void* ptr);
|
||||
void operator delete(void*, void*) {}
|
||||
|
||||
protected:
|
||||
bool _IsA(rtti_t id) const { return id == &rtti_id_; }
|
||||
|
||||
private:
|
||||
uint32_t ComputeRingBufferMinPkts();
|
||||
uint32_t ComputeRingBufferMaxPkts();
|
||||
|
||||
// (De)allocates and (de)registers ring_buf_.
|
||||
void AllocRegisteredRingBuffer(uint32_t queue_size_pkts);
|
||||
void FreeRegisteredRingBuffer();
|
||||
|
||||
static bool DynamicScratchHandler(hsa_signal_value_t error_code, void* arg);
|
||||
|
||||
// AQL packet ring buffer
|
||||
void* ring_buf_;
|
||||
|
||||
// Size of ring_buf_ allocation.
|
||||
// This may be larger than (amd_queue_.hsa_queue.size * sizeof(AqlPacket)).
|
||||
uint32_t ring_buf_alloc_bytes_;
|
||||
|
||||
// Id of the Queue used in communication with thunk
|
||||
HSA_QUEUEID queue_id_;
|
||||
|
||||
// Indicates is queue is valid
|
||||
bool valid_;
|
||||
|
||||
// Indicates if queue is inactive
|
||||
int32_t active_;
|
||||
|
||||
// Cached value of HsaNodeProperties.HSA_CAPABILITY.DoorbellType
|
||||
int doorbell_type_;
|
||||
|
||||
// Handle of agent, which queue is attached to
|
||||
GpuAgent* agent_;
|
||||
|
||||
hsa_profile_t agent_profile_;
|
||||
|
||||
uint32_t queue_full_workaround_;
|
||||
|
||||
// Handle of scratch memory descriptor
|
||||
ScratchInfo queue_scratch_;
|
||||
|
||||
core::HsaEventCallback errors_callback_;
|
||||
|
||||
void* errors_data_;
|
||||
|
||||
// Is KV device queue
|
||||
bool is_kv_queue_;
|
||||
|
||||
// Shared event used for queue errors
|
||||
static HsaEvent* queue_event_;
|
||||
|
||||
// Queue count - used to ref count queue_event_
|
||||
static volatile uint32_t queue_count_;
|
||||
|
||||
// Mutex for queue_event_ manipulation
|
||||
static KernelMutex queue_lock_;
|
||||
|
||||
static int rtti_id_;
|
||||
|
||||
// Forbid copying and moving of this object
|
||||
DISALLOW_COPY_AND_ASSIGN(AqlQueue);
|
||||
};
|
||||
} // namespace amd
|
||||
#endif // header guard
|
||||
@@ -1,174 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "core/inc/blit.h"
|
||||
|
||||
namespace amd {
|
||||
class BlitKernel : public core::Blit {
|
||||
public:
|
||||
explicit BlitKernel();
|
||||
virtual ~BlitKernel() override;
|
||||
|
||||
/// @brief Initialize a blit kernel object.
|
||||
///
|
||||
/// @param agent Pointer to the agent that will execute the AQL packets.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Initialize(const core::Agent& agent) override;
|
||||
|
||||
/// @brief Marks the blit kernel object as invalid and uncouples its link with
|
||||
/// the underlying AQL kernel queue. Use of the blit object
|
||||
/// once it has been release is illegal and any behavior is indeterminate
|
||||
///
|
||||
/// @note: The call will block until all AQL packets have been executed.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Destroy() override;
|
||||
|
||||
/// @brief Submit an AQL packet to perform vector copy. The call is blocking
|
||||
/// until the command execution is finished.
|
||||
///
|
||||
/// @param dst Memory address of the copy destination.
|
||||
/// @param src Memory address of the copy source.
|
||||
/// @param size Size of the data to be copied.
|
||||
virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src,
|
||||
size_t size) override;
|
||||
|
||||
/// @brief Submit a linear copy command to the the underlying compute device's
|
||||
/// control block. The call is non blocking. The memory transfer will start
|
||||
/// after all dependent signals are satisfied. After the transfer is
|
||||
/// completed, the out signal will be decremented.
|
||||
///
|
||||
/// @param dst Memory address of the copy destination.
|
||||
/// @param src Memory address of the copy source.
|
||||
/// @param size Size of the data to be copied.
|
||||
/// @param dep_signals Arrays of dependent signal.
|
||||
/// @param out_signal Output signal.
|
||||
virtual hsa_status_t SubmitLinearCopyCommand(
|
||||
void* dst, const void* src, size_t size,
|
||||
std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& out_signal) override;
|
||||
|
||||
/// @brief Submit an AQL packet to perform memory fill. The call is blocking
|
||||
/// until the command execution is finished.
|
||||
///
|
||||
/// @param ptr Memory address of the fill destination.
|
||||
/// @param value Value to be set.
|
||||
/// @param count Number of uint32_t element to be set to the value.
|
||||
virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value,
|
||||
size_t count) override;
|
||||
|
||||
private:
|
||||
struct __ALIGNED__(16) KernelCopyArgs {
|
||||
const void* src;
|
||||
void* dst;
|
||||
uint64_t size;
|
||||
uint32_t use_vector;
|
||||
};
|
||||
|
||||
struct __ALIGNED__(16) KernelFillArgs {
|
||||
void* ptr;
|
||||
uint64_t num;
|
||||
uint32_t value;
|
||||
};
|
||||
|
||||
/// Reserve a slot in the queue buffer. The call will wait until the queue
|
||||
/// buffer has a room.
|
||||
uint64_t AcquireWriteIndex(uint32_t num_packet);
|
||||
|
||||
/// Update the queue doorbell register with ::write_index. This
|
||||
/// function also serializes concurrent doorbell update to ensure that the
|
||||
/// packet processor doesn't get invalid packet.
|
||||
void ReleaseWriteIndex(uint64_t write_index, uint32_t num_packet);
|
||||
|
||||
/// Wait until all packets are finished.
|
||||
hsa_status_t FenceRelease(uint64_t write_index, uint32_t num_copy_packet,
|
||||
hsa_fence_scope_t fence);
|
||||
|
||||
void PopulateQueue(uint64_t index, uint64_t code_handle, void* args,
|
||||
uint32_t grid_size_x, hsa_signal_t completion_signal);
|
||||
|
||||
KernelCopyArgs* ObtainAsyncKernelCopyArg();
|
||||
|
||||
/// Handles to the vector copy kernel.
|
||||
uint64_t copy_code_handle_;
|
||||
|
||||
/// Handles to the vector copy aligned kernel.
|
||||
uint64_t copy_aligned_code_handle_;
|
||||
|
||||
/// Handles to the fill memory kernel.
|
||||
uint64_t fill_code_handle_;
|
||||
|
||||
/// AQL queue for submitting the vector copy kernel.
|
||||
hsa_queue_t* queue_;
|
||||
uint32_t queue_bitmask_;
|
||||
|
||||
/// Index to track concurrent kernel launch.
|
||||
volatile uint64_t cached_index_;
|
||||
|
||||
/// Pointer to the kernel argument buffer.
|
||||
void* kernarg_;
|
||||
KernelCopyArgs* kernarg_async_;
|
||||
uint32_t kernarg_async_mask_;
|
||||
volatile uint32_t kernarg_async_counter_;
|
||||
|
||||
/// Completion signal for every kernel dispatched.
|
||||
hsa_signal_t completion_signal_;
|
||||
|
||||
/// Lock to synchronize access to kernarg_ and completion_signal_
|
||||
std::mutex lock_;
|
||||
|
||||
/// Pointer to memory containing the ISA and argument buffer.
|
||||
void* code_arg_buffer_;
|
||||
|
||||
static const size_t kMaxCopyCount;
|
||||
static const size_t kMaxFillCount;
|
||||
static const uint32_t kGroupSize;
|
||||
};
|
||||
} // namespace amd
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,479 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_KV_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_KV_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#define HSA_VECTOR_COPY_KV_AKC_SIZE 368
|
||||
#define HSA_VECTOR_COPY_KV_AKC_OFFSET 256
|
||||
|
||||
/*****HSAIL code of the ISA in ::kVectorCopyRawKv.
|
||||
module &m:1:0:$full:$large:$default;
|
||||
|
||||
prog kernel &__vector_copy_kernel(
|
||||
kernarg_u64 %src,
|
||||
kernarg_u64 %dst,
|
||||
kernarg_u64 %size)
|
||||
{
|
||||
@__vector_copy_kernel_entry:
|
||||
// BB#0: // %entry
|
||||
workitemabsid_u32 $s0, 0;
|
||||
cvt_u64_u32 $d0, $s0;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%size];
|
||||
cmp_ge_b1_u64 $c0, $d0, $d1;
|
||||
cbr_b1 $c0, @BB0_2;
|
||||
// BB#1: // %if.end
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%src];
|
||||
ld_kernarg_align(8)_width(all)_u64 $d2, [%dst];
|
||||
add_u64 $d2, $d2, $d0;
|
||||
add_u64 $d0, $d1, $d0;
|
||||
ld_global_u8 $s0, [$d0];
|
||||
st_global_u8 $s0, [$d2];
|
||||
|
||||
@BB0_2:
|
||||
// %return
|
||||
ret;
|
||||
};
|
||||
*/
|
||||
|
||||
static char kVectorCopyRawKv[] = {
|
||||
127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0,
|
||||
0, -104, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
64, 0, 56, 0, 1, 0, 64, 0, 6, 0, 5, 0, 3,
|
||||
0, 0, 96, 6, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 112, 1, 0, 0, 0, 0, 0, 0,
|
||||
112, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 65, 0, -116, 0, -112, 0, 0, 0,
|
||||
11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 11, 0, 5, 0, 5, 0, 0, 0, 9, 0, 0,
|
||||
0, 0, 0, 0, 0, 3, 0, 0, 6, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 5, 0, -64, 127, 0, -116, -65,
|
||||
0, -1, -128, -109, 0, 0, 16, 0, 0, 8, 0, -109, 0,
|
||||
0, 0, 74, 4, 7, 64, -64, -128, 2, 2, 126, 127, 0,
|
||||
-116, -65, 0, 0, -56, 125, 106, 36, -128, -66, 15, 0, -120,
|
||||
-65, 0, 7, -126, -64, 127, 0, -116, -65, 4, 0, 2, 74,
|
||||
5, 2, 4, 126, 2, 106, 80, -46, 2, 1, -87, 1, 0,
|
||||
0, 32, -36, 1, 0, 0, 1, 6, 0, 6, 74, 7, 2,
|
||||
4, 126, 4, 106, 80, -46, 2, 1, -87, 1, 112, 0, -116,
|
||||
-65, 0, 0, 96, -36, 3, 1, 0, 0, 0, 0, -127, -65,
|
||||
3, 0, 0, 0, 8, 0, 0, 0, 1, 0, 0, 0, 65,
|
||||
77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0,
|
||||
0, 0, 12, 0, 0, 0, 2, 0, 0, 0, 65, 77, 68,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
|
||||
3, 0, 0, 0, 28, 0, 0, 0, 3, 0, 0, 0, 65,
|
||||
77, 68, 0, 4, 0, 7, 0, 7, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 65, 77, 68, 0, 65, 77, 68,
|
||||
71, 80, 85, 0, 0, 3, 0, 0, 0, 40, 0, 0, 0,
|
||||
4, 0, 0, 0, 65, 77, 68, 0, 26, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 65, 77, 68, 32, 72, 83,
|
||||
65, 32, 82, 117, 110, 116, 105, 109, 101, 32, 70, 105, 110,
|
||||
97, 108, 105, 122, 101, 114, 0, 0, 0, 38, 95, 95, 118,
|
||||
101, 99, 116, 111, 114, 95, 99, 111, 112, 121, 95, 107, 101,
|
||||
114, 110, 101, 108, 0, 95, 95, 104, 115, 97, 95, 115, 101,
|
||||
99, 116, 105, 111, 110, 46, 104, 115, 97, 116, 101, 120, 116,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 22, 0, 0, 0, 3, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 46, 104, 115, 97, 116, 101, 120, 116, 0, 46, 110,
|
||||
111, 116, 101, 0, 46, 115, 116, 114, 116, 97, 98, 0, 46,
|
||||
115, 121, 109, 116, 97, 98, 0, 46, 115, 104, 115, 116, 114,
|
||||
116, 97, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
1, 0, 0, 0, 7, 0, -64, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 112, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 7,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 112, 2, 0, 0, 0, 0, 0,
|
||||
0, -104, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 3, 0,
|
||||
0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 8, 3, 0, 0, 0, 0, 0, 0,
|
||||
44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 2, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 56, 3, 0, 0, 0, 0, 0, 0, 48,
|
||||
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
|
||||
0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0,
|
||||
0, 0, 0, 0, 0, 32, 0, 0, 0, 3, 0, 0, 0,
|
||||
32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 104, 3, 0, 0, 0, 0, 0, 0, 42, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
};
|
||||
extern char* const kVectorCopyKvObject = &kVectorCopyRawKv[0];
|
||||
extern size_t const kVectorCopyKvObjectSize = sizeof(kVectorCopyRawKv);
|
||||
|
||||
#define HSA_VECTOR_COPY_ALIGNED_KV_AKC_SIZE 436
|
||||
#define HSA_VECTOR_COPY_ALIGNED_KV_AKC_OFFSET 256
|
||||
|
||||
/*****HSAIL code of the ISA in ::kVectorCopyAlignedRawKv.
|
||||
module &m:1:0:$full:$large:$default;
|
||||
extension "amd:gcn";
|
||||
|
||||
prog kernel &__copy_buffer_aligned_kernel(
|
||||
kernarg_u64 %src,
|
||||
kernarg_u64 %dst,
|
||||
kernarg_u64 %size,
|
||||
kernarg_u32 %use_vector)
|
||||
{
|
||||
@__copy_buffer_aligned_kernel_entry:
|
||||
// BB#0: // %entry
|
||||
workitemabsid_u32 $s0, 0;
|
||||
cvt_u64_u32 $d0, $s0;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%size];
|
||||
cmp_ge_b1_u64 $c0, $d0, $d1;
|
||||
cbr_b1 $c0, @LBB0_4;
|
||||
// BB#1: // %if.end
|
||||
ld_kernarg_align(8)_width(all)_u64 $d2, [%dst];
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%src];
|
||||
ld_kernarg_align(4)_width(all)_u32 $s0, [%use_vector];
|
||||
cmp_ne_b1_s32 $c0, $s0, 1;
|
||||
cbr_b1 $c0, @LBB0_3;
|
||||
// BB#2: // %if.then2
|
||||
shl_u64 $d0, $d0, 4;
|
||||
add_u64 $d2, $d2, $d0;
|
||||
add_u64 $d0, $d1, $d0;
|
||||
ld_v4_global_align(16)_const_u32 ($s0, $s1, $s2, $s3), [$d0];
|
||||
st_v4_global_align(16)_u32 ($s0, $s1, $s2, $s3), [$d2];
|
||||
br @LBB0_4;
|
||||
|
||||
@LBB0_3:
|
||||
// %if.else
|
||||
shl_u64 $d0, $d0, 2;
|
||||
add_u64 $d2, $d2, $d0;
|
||||
add_u64 $d0, $d1, $d0;
|
||||
ld_global_align(4)_const_u32 $s0, [$d0];
|
||||
st_global_align(4)_u32 $s0, [$d2];
|
||||
|
||||
@LBB0_4:
|
||||
// %if.end6
|
||||
ret;
|
||||
};
|
||||
*/
|
||||
|
||||
static char kVectorCopyAlignedRawKv[] = {
|
||||
127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0,
|
||||
0, -8, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
64, 0, 56, 0, 1, 0, 64, 0, 6, 0, 5, 0, 3,
|
||||
0, 0, 96, 6, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, -76, 1, 0, 0, 0, 0, 0, 0,
|
||||
-76, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 65, 0, -84, 0, -112, 0, 0, 0,
|
||||
11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 11, 0, 7, 0, 7, 0, 0, 0, 9, 0, 0,
|
||||
0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 5, 0, -64, 127, 0, -116, -65,
|
||||
0, -1, -128, -109, 0, 0, 16, 0, 0, 8, 0, -109, 0,
|
||||
0, 0, 74, 4, 7, 64, -64, -128, 2, 2, 126, 127, 0,
|
||||
-116, -65, 0, 0, -56, 125, 106, 36, -128, -66, 32, 0, -120,
|
||||
-65, 6, 7, 1, -64, 0, 7, -126, -64, 127, 0, -116, -65,
|
||||
2, -127, 0, -65, 14, 0, -124, -65, 0, 0, -62, -46, 0,
|
||||
9, 1, 0, 4, 0, 4, 74, 5, 2, 6, 126, 3, 3,
|
||||
6, 80, 0, 0, 56, -36, 2, 0, 0, 2, 6, 0, 0,
|
||||
74, 7, 2, 12, 126, 6, 3, 2, 80, 112, 0, -116, -65,
|
||||
0, 0, 120, -36, 0, 2, 0, 0, 13, 0, -126, -65, 0,
|
||||
0, -62, -46, 0, 5, 1, 0, 4, 0, 4, 74, 5, 2,
|
||||
6, 126, 3, 3, 6, 80, 0, 0, 48, -36, 2, 0, 0,
|
||||
2, 6, 0, 0, 74, 7, 2, 6, 126, 3, 3, 2, 80,
|
||||
112, 0, -116, -65, 0, 0, 112, -36, 0, 2, 0, 0, 0,
|
||||
0, -127, -65, 0, 0, 0, 0, 4, 0, 0, 0, 8, 0,
|
||||
0, 0, 1, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 4, 0, 0, 0, 12, 0, 0, 0,
|
||||
2, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 1, 1, 0, 4, 0, 0, 0, 25, 0,
|
||||
0, 0, 5, 0, 0, 0, 65, 77, 68, 0, 22, 0, 45,
|
||||
104, 115, 97, 95, 99, 97, 108, 108, 95, 99, 111, 110, 118,
|
||||
101, 110, 116, 105, 111, 110, 61, 0, 0, 0, 0, 0, 4,
|
||||
0, 0, 0, 30, 0, 0, 0, 3, 0, 0, 0, 65, 77,
|
||||
68, 0, 4, 0, 7, 0, 7, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 65, 77, 68, 0, 65, 77, 68, 71,
|
||||
80, 85, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 8,
|
||||
0, 0, 0, 4, 0, 0, 0, 65, 77, 68, 0, -32, 101,
|
||||
-118, -12, -1, 127, 0, 0, 38, 95, 95, 99, 111, 112, 121,
|
||||
95, 98, 117, 102, 102, 101, 114, 95, 97, 108, 105, 103, 110,
|
||||
101, 100, 95, 107, 101, 114, 110, 101, 108, 0, 95, 95, 104,
|
||||
115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, 104, 115,
|
||||
97, 116, 101, 120, 116, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 26, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
-76, 1, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 3,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 46, 104, 115, 97, 116, 101,
|
||||
120, 116, 0, 46, 110, 111, 116, 101, 0, 46, 115, 116, 114,
|
||||
116, 97, 98, 0, 46, 115, 121, 109, 116, 97, 98, 0, 46,
|
||||
115, 104, 115, 116, 114, 116, 97, 98, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 1, 0, 0, 0, 7, 0, -64, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, -76, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
10, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -72, 2,
|
||||
0, 0, 0, 0, 0, 0, -88, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16,
|
||||
0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 3, 0,
|
||||
0, 0, 0, 0, 0, 52, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0,
|
||||
0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, -104, 3, 0, 0,
|
||||
0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 3,
|
||||
0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0,
|
||||
0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0,
|
||||
0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, -56, 3, 0, 0, 0,
|
||||
0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
extern char* const kVectorCopyAlignedKvObject = &kVectorCopyAlignedRawKv[0];
|
||||
extern size_t const kVectorCopyAlignedKvObjectSize =
|
||||
sizeof(kVectorCopyAlignedRawKv);
|
||||
|
||||
#define HSA_FILL_MEMORY_KV_AKC_SIZE 352
|
||||
#define HSA_FILL_MEMORY_KV_AKC_OFFSET 256
|
||||
|
||||
/*****HSAIL code of the ISA in ::kFillMemoryRawKv.
|
||||
module &m:1:0:$full:$large:$default;
|
||||
extension "amd:gcn";
|
||||
|
||||
prog kernel &__fill_memory_kernel(
|
||||
kernarg_u64 %ptr,
|
||||
kernarg_u64 %num,
|
||||
kernarg_u32 %value)
|
||||
{
|
||||
@__fill_memory_kernel_entry:
|
||||
// BB#0: // %entry
|
||||
workitemabsid_u32 $s0, 0;
|
||||
cvt_u64_u32 $d0, $s0;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%num];
|
||||
cmp_ge_b1_u64 $c0, $d0, $d1;
|
||||
cbr_b1 $c0, @LBB0_2;
|
||||
// BB#1: // %if.end
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%ptr];
|
||||
ld_kernarg_align(4)_width(all)_u32 $s0, [%value];
|
||||
shl_u64 $d0, $d0, 2;
|
||||
add_u64 $d0, $d1, $d0;
|
||||
st_global_align(4)_u32 $s0, [$d0];
|
||||
|
||||
@LBB0_2:
|
||||
// %return
|
||||
ret;
|
||||
};
|
||||
*/
|
||||
|
||||
static char kFillMemoryRawKv[] = {
|
||||
127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, -104, 3,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 56, 0,
|
||||
1, 0, 64, 0, 6, 0, 5, 0, 3, 0, 0, 96, 6, 0,
|
||||
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 1,
|
||||
0, 0, 0, 0, 0, 0, 96, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, -84, 0,
|
||||
-112, 0, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 11, 0, 3, 0, 3, 0, 0, 0, 9, 0,
|
||||
0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 0, -64, 127, 0,
|
||||
-116, -65, 0, -1, -128, -109, 0, 0, 16, 0, 0, 8, 0, -109,
|
||||
0, 0, 0, 74, 2, 7, 64, -64, -128, 2, 2, 126, 127, 0,
|
||||
-116, -65, 0, 0, -56, 125, 106, 36, -128, -66, 11, 0, -120, -65,
|
||||
0, 7, 65, -64, 4, 7, 2, -64, 0, 0, -62, -46, 0, 5,
|
||||
1, 0, 127, 0, -116, -65, 2, 0, 0, 74, 3, 2, 4, 126,
|
||||
2, 3, 2, 80, 4, 2, 4, 126, 0, 0, 112, -36, 0, 2,
|
||||
0, 0, 0, 0, -127, -65, 4, 0, 0, 0, 8, 0, 0, 0,
|
||||
1, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 4, 0, 0, 0, 12, 0, 0, 0, 2, 0, 0, 0,
|
||||
65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
1, 0, 4, 0, 0, 0, 25, 0, 0, 0, 5, 0, 0, 0,
|
||||
65, 77, 68, 0, 22, 0, 45, 104, 115, 97, 95, 99, 97, 108,
|
||||
108, 95, 99, 111, 110, 118, 101, 110, 116, 105, 111, 110, 61, 0,
|
||||
0, 0, 0, 0, 4, 0, 0, 0, 30, 0, 0, 0, 3, 0,
|
||||
0, 0, 65, 77, 68, 0, 4, 0, 7, 0, 7, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 65, 77, 68, 0, 65, 77,
|
||||
68, 71, 80, 85, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0,
|
||||
8, 0, 0, 0, 4, 0, 0, 0, 65, 77, 68, 0, 48, 123,
|
||||
44, -103, -4, 127, 0, 0, 38, 95, 95, 102, 105, 108, 108, 95,
|
||||
109, 101, 109, 111, 114, 121, 95, 107, 101, 114, 110, 101, 108, 0,
|
||||
95, 95, 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46,
|
||||
104, 115, 97, 116, 101, 120, 116, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 26, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
96, 1, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, 3, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 46, 104, 115, 97, 116, 101, 120, 116, 0,
|
||||
46, 110, 111, 116, 101, 0, 46, 115, 116, 114, 116, 97, 98, 0,
|
||||
46, 115, 121, 109, 116, 97, 98, 0, 46, 115, 104, 115, 116, 114,
|
||||
116, 97, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 7, 0,
|
||||
-64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 96, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0,
|
||||
0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 96, 2, 0, 0, 0, 0,
|
||||
0, 0, -88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 3, 0, 0, 0,
|
||||
32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 8, 3, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
24, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 3, 0, 0,
|
||||
0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 3, 0,
|
||||
0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0,
|
||||
24, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 3, 0,
|
||||
0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 104, 3, 0, 0, 0, 0, 0, 0, 42, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0,
|
||||
};
|
||||
|
||||
extern char* const kFillMemoryKvObject = &kFillMemoryRawKv[0];
|
||||
extern size_t const kFillMemoryKvObjectSize = sizeof(kFillMemoryRawKv);
|
||||
#endif // header guard
|
||||
@@ -1,490 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_VI_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_VI_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#define HSA_VECTOR_COPY_VI_AKC_SIZE 380
|
||||
#define HSA_VECTOR_COPY_VI_AKC_OFFSET 256
|
||||
|
||||
/*****HSAIL code of the ISA in ::kVectorCopyRawVi.
|
||||
module &m:1:0:$full:$large:$default;
|
||||
|
||||
prog kernel &__vector_copy_kernel(
|
||||
kernarg_u64 %src,
|
||||
kernarg_u64 %dst,
|
||||
kernarg_u64 %size)
|
||||
{
|
||||
@__vector_copy_kernel_entry:
|
||||
// BB#0: // %entry
|
||||
workitemabsid_u32 $s0, 0;
|
||||
cvt_u64_u32 $d0, $s0;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%size];
|
||||
cmp_ge_b1_u64 $c0, $d0, $d1;
|
||||
cbr_b1 $c0, @BB0_2;
|
||||
// BB#1: // %if.end
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%src];
|
||||
ld_kernarg_align(8)_width(all)_u64 $d2, [%dst];
|
||||
add_u64 $d2, $d2, $d0;
|
||||
add_u64 $d0, $d1, $d0;
|
||||
ld_global_u8 $s0, [$d0];
|
||||
st_global_u8 $s0, [$d2];
|
||||
|
||||
@BB0_2:
|
||||
// %return
|
||||
ret;
|
||||
};
|
||||
*/
|
||||
|
||||
static char kVectorCopyRawVi[] = {
|
||||
127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0,
|
||||
0, -72, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
64, 0, 56, 0, 1, 0, 64, 0, 6, 0, 5, 0, 3,
|
||||
0, 0, 96, 6, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 124, 1, 0, 0, 0, 0, 0, 0,
|
||||
124, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, -63, 2, -84, 0, -112, 0, 0, 0,
|
||||
11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 96, 0, 5, 0, 5, 0, 0, 0, 9, 0, 0,
|
||||
0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 2, 0, 2, -64, 4, 0, 0, 0,
|
||||
127, 0, -116, -65, 0, -1, -128, -110, 0, 0, 16, 0, 0,
|
||||
8, 0, -110, 0, 0, 0, 50, 3, 0, 6, -64, 16, 0,
|
||||
0, 0, -128, 2, 2, 126, 127, 0, -116, -65, 0, 0, -40,
|
||||
125, 106, 32, -128, -66, 16, 0, -120, -65, 3, 1, 10, -64,
|
||||
0, 0, 0, 0, 127, 0, -116, -65, 4, 0, 2, 50, 5,
|
||||
2, 4, 126, 2, 106, 28, -47, 2, 1, -87, 1, 0, 0,
|
||||
64, -36, 1, 0, 0, 1, 6, 0, 6, 50, 7, 2, 4,
|
||||
126, 4, 106, 28, -47, 2, 1, -87, 1, 112, 0, -116, -65,
|
||||
0, 0, 96, -36, 3, 1, 0, 0, 0, 0, -127, -65, 0,
|
||||
0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 1, 0,
|
||||
0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 4, 0, 0, 0, 12, 0, 0, 0, 2, 0, 0, 0,
|
||||
65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
1, 1, 0, 4, 0, 0, 0, 25, 0, 0, 0, 5, 0,
|
||||
0, 0, 65, 77, 68, 0, 22, 0, 45, 104, 115, 97, 95,
|
||||
99, 97, 108, 108, 95, 99, 111, 110, 118, 101, 110, 116, 105,
|
||||
111, 110, 61, 0, 0, 0, 0, 0, 4, 0, 0, 0, 30,
|
||||
0, 0, 0, 3, 0, 0, 0, 65, 77, 68, 0, 4, 0,
|
||||
7, 0, 8, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
|
||||
0, 65, 77, 68, 0, 65, 77, 68, 71, 80, 85, 0, 0,
|
||||
0, 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 4,
|
||||
0, 0, 0, 65, 77, 68, 0, 32, 103, -72, 81, -3, 127,
|
||||
0, 0, 38, 95, 95, 118, 101, 99, 116, 111, 114, 95, 99,
|
||||
111, 112, 121, 95, 107, 101, 114, 110, 101, 108, 0, 95, 95,
|
||||
104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, 104,
|
||||
115, 97, 116, 101, 120, 116, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 26, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 124, 1, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0,
|
||||
3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 46, 104, 115, 97, 116,
|
||||
101, 120, 116, 0, 46, 110, 111, 116, 101, 0, 46, 115, 116,
|
||||
114, 116, 97, 98, 0, 46, 115, 121, 109, 116, 97, 98, 0,
|
||||
46, 115, 104, 115, 116, 114, 116, 97, 98, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 7, 0, -64,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 124, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 10, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128,
|
||||
2, 0, 0, 0, 0, 0, 0, -88, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
16, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 3,
|
||||
0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24,
|
||||
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 3, 0,
|
||||
0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0,
|
||||
3, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0,
|
||||
0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 32, 0,
|
||||
0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, -120, 3, 0, 0,
|
||||
0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
extern char* const kVectorCopyViObject = &kVectorCopyRawVi[0];
|
||||
extern size_t const kVectorCopyViObjectSize = sizeof(kVectorCopyRawVi);
|
||||
|
||||
#define HSA_VECTOR_COPY_ALIGNED_VI_AKC_SIZE 452
|
||||
#define HSA_VECTOR_COPY_ALIGNED_VI_AKC_OFFSET 256
|
||||
|
||||
/*****HSAIL code of the ISA in ::kVectorCopyAlignedRawVi.
|
||||
module &m:1:0:$full:$large:$default;
|
||||
extension "amd:gcn";
|
||||
|
||||
prog kernel &__copy_buffer_aligned_kernel(
|
||||
kernarg_u64 %src,
|
||||
kernarg_u64 %dst,
|
||||
kernarg_u64 %size,
|
||||
kernarg_u32 %use_vector)
|
||||
{
|
||||
@__copy_buffer_aligned_kernel_entry:
|
||||
// BB#0: // %entry
|
||||
workitemabsid_u32 $s0, 0;
|
||||
cvt_u64_u32 $d0, $s0;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%size];
|
||||
cmp_ge_b1_u64 $c0, $d0, $d1;
|
||||
cbr_b1 $c0, @LBB0_4;
|
||||
// BB#1: // %if.end
|
||||
ld_kernarg_align(8)_width(all)_u64 $d2, [%dst];
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%src];
|
||||
ld_kernarg_align(4)_width(all)_u32 $s0, [%use_vector];
|
||||
cmp_ne_b1_s32 $c0, $s0, 1;
|
||||
cbr_b1 $c0, @LBB0_3;
|
||||
// BB#2: // %if.then2
|
||||
shl_u64 $d0, $d0, 4;
|
||||
add_u64 $d2, $d2, $d0;
|
||||
add_u64 $d0, $d1, $d0;
|
||||
ld_v4_global_align(16)_const_u32 ($s0, $s1, $s2, $s3), [$d0];
|
||||
st_v4_global_align(16)_u32 ($s0, $s1, $s2, $s3), [$d2];
|
||||
br @LBB0_4;
|
||||
|
||||
@LBB0_3:
|
||||
// %if.else
|
||||
shl_u64 $d0, $d0, 2;
|
||||
add_u64 $d2, $d2, $d0;
|
||||
add_u64 $d0, $d1, $d0;
|
||||
ld_global_align(4)_const_u32 $s0, [$d0];
|
||||
st_global_align(4)_u32 $s0, [$d2];
|
||||
|
||||
@LBB0_4:
|
||||
// %if.end6
|
||||
ret;
|
||||
};
|
||||
*/
|
||||
|
||||
static char kVectorCopyAlignedRawVi[] = {
|
||||
127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0,
|
||||
0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
64, 0, 56, 0, 1, 0, 64, 0, 6, 0, 5, 0, 3,
|
||||
0, 0, 96, 6, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, -60, 1, 0, 0, 0, 0, 0, 0,
|
||||
-60, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 65, 0, -84, 0, -112, 0, 0, 0,
|
||||
11, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 16, 0, 8, 0, 8, 0, 0, 0, 12, 0, 0,
|
||||
0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 2, 0, 2, -64, 4, 0, 0, 0,
|
||||
127, 0, -116, -65, 0, -1, -128, -110, 0, 0, 16, 0, 0,
|
||||
8, 0, -110, 0, 0, 0, 50, 3, 0, 6, -64, 16, 0,
|
||||
0, 0, -128, 2, 2, 126, 127, 0, -116, -65, 0, 0, -40,
|
||||
125, 106, 32, -128, -66, 34, 0, -120, -65, -125, 0, 2, -64,
|
||||
24, 0, 0, 0, 3, 2, 10, -64, 0, 0, 0, 0, 127,
|
||||
0, -116, -65, 2, -127, 0, -65, 14, 0, -124, -65, 0, 0,
|
||||
-113, -46, -124, 0, 2, 0, 8, 0, 4, 50, 9, 2, 6,
|
||||
126, 3, 3, 6, 56, 0, 0, 92, -36, 2, 0, 0, 4,
|
||||
10, 0, 0, 50, 11, 2, 4, 126, 2, 3, 2, 56, 112,
|
||||
0, -116, -65, 0, 0, 124, -36, 0, 4, 0, 0, 13, 0,
|
||||
-126, -65, 0, 0, -113, -46, -126, 0, 2, 0, 8, 0, 4,
|
||||
50, 9, 2, 6, 126, 3, 3, 6, 56, 0, 0, 80, -36,
|
||||
2, 0, 0, 4, 10, 0, 0, 50, 11, 2, 4, 126, 2,
|
||||
3, 2, 56, 112, 0, -116, -65, 0, 0, 112, -36, 0, 4,
|
||||
0, 0, 0, 0, -127, -65, 0, 0, 0, 0, 4, 0, 0,
|
||||
0, 8, 0, 0, 0, 1, 0, 0, 0, 65, 77, 68, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 12,
|
||||
0, 0, 0, 2, 0, 0, 0, 65, 77, 68, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 4, 0, 0,
|
||||
0, 25, 0, 0, 0, 5, 0, 0, 0, 65, 77, 68, 0,
|
||||
22, 0, 45, 104, 115, 97, 95, 99, 97, 108, 108, 95, 99,
|
||||
111, 110, 118, 101, 110, 116, 105, 111, 110, 61, 0, 0, 0,
|
||||
0, 0, 4, 0, 0, 0, 30, 0, 0, 0, 3, 0, 0,
|
||||
0, 65, 77, 68, 0, 4, 0, 7, 0, 8, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 65, 77, 68, 0, 65,
|
||||
77, 68, 71, 80, 85, 0, 0, 0, 0, 0, 0, 4, 0,
|
||||
0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 65, 77, 68,
|
||||
0, 96, 62, -27, 85, -1, 127, 0, 0, 38, 95, 95, 99,
|
||||
111, 112, 121, 95, 98, 117, 102, 102, 101, 114, 95, 97, 108,
|
||||
105, 103, 110, 101, 100, 95, 107, 101, 114, 110, 101, 108, 0,
|
||||
95, 95, 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110,
|
||||
46, 104, 115, 97, 116, 101, 120, 116, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 26, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, -60, 1, 0, 0, 0, 0, 0, 0, 30, 0,
|
||||
0, 0, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 104, 115,
|
||||
97, 116, 101, 120, 116, 0, 46, 110, 111, 116, 101, 0, 46,
|
||||
115, 116, 114, 116, 97, 98, 0, 46, 115, 121, 109, 116, 97,
|
||||
98, 0, 46, 115, 104, 115, 116, 114, 116, 97, 98, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 7,
|
||||
0, -64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, -60, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 10, 0, 0, 0, 7, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, -56, 2, 0, 0, 0, 0, 0, 0, -88, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 16, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
112, 3, 0, 0, 0, 0, 0, 0, 52, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 24, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -88,
|
||||
3, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0,
|
||||
0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0,
|
||||
0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0,
|
||||
32, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 3,
|
||||
0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
extern char* const kVectorCopyAlignedViObject = &kVectorCopyAlignedRawVi[0];
|
||||
extern size_t const kVectorCopyAlignedViObjectSize =
|
||||
sizeof(kVectorCopyAlignedRawVi);
|
||||
|
||||
#define HSA_FILL_MEMORY_VI_AKC_SIZE 368
|
||||
#define HSA_FILL_MEMORY_VI_AKC_OFFSET 256
|
||||
|
||||
/*****HSAIL code of the ISA in ::kFillMemoryRawVi.
|
||||
module &m:1:0:$full:$large:$default;
|
||||
extension "amd:gcn";
|
||||
|
||||
prog kernel &__fill_memory_kernel(
|
||||
kernarg_u64 %ptr,
|
||||
kernarg_u64 %num,
|
||||
kernarg_u32 %value)
|
||||
{
|
||||
@__fill_memory_kernel_entry:
|
||||
// BB#0: // %entry
|
||||
workitemabsid_u32 $s0, 0;
|
||||
cvt_u64_u32 $d0, $s0;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%num];
|
||||
cmp_ge_b1_u64 $c0, $d0, $d1;
|
||||
cbr_b1 $c0, @LBB0_2;
|
||||
// BB#1: // %if.end
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%ptr];
|
||||
ld_kernarg_align(4)_width(all)_u32 $s0, [%value];
|
||||
shl_u64 $d0, $d0, 2;
|
||||
add_u64 $d0, $d1, $d0;
|
||||
st_global_align(4)_u32 $s0, [$d0];
|
||||
|
||||
@LBB0_2:
|
||||
// %return
|
||||
ret;
|
||||
};
|
||||
*/
|
||||
|
||||
static char kFillMemoryRawVi[] = {
|
||||
127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, -32, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0,
|
||||
0, -88, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
64, 0, 56, 0, 1, 0, 64, 0, 6, 0, 5, 0, 3,
|
||||
0, 0, 96, 6, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 112, 1, 0, 0, 0, 0, 0, 0,
|
||||
112, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 64, 0, -84, 0, -112, 0, 0, 0,
|
||||
11, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 13, 0, 3, 0, 3, 0, 0, 0, 9, 0, 0,
|
||||
0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 2, 0, 2, -64, 4, 0, 0, 0,
|
||||
127, 0, -116, -65, 0, -1, -128, -110, 0, 0, 16, 0, 0,
|
||||
8, 0, -110, 0, 0, 0, 50, 3, 0, 6, -64, 8, 0,
|
||||
0, 0, -128, 2, 2, 126, 127, 0, -116, -65, 0, 0, -40,
|
||||
125, 106, 32, -128, -66, 13, 0, -120, -65, -125, 0, 6, -64,
|
||||
0, 0, 0, 0, 3, 1, 2, -64, 16, 0, 0, 0, 0,
|
||||
0, -113, -46, -126, 0, 2, 0, 127, 0, -116, -65, 2, 0,
|
||||
0, 50, 3, 2, 4, 126, 2, 3, 2, 56, 4, 2, 4,
|
||||
126, 0, 0, 112, -36, 0, 2, 0, 0, 0, 0, -127, -65,
|
||||
4, 0, 0, 0, 8, 0, 0, 0, 1, 0, 0, 0, 65,
|
||||
77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0,
|
||||
0, 0, 12, 0, 0, 0, 2, 0, 0, 0, 65, 77, 68,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
|
||||
4, 0, 0, 0, 25, 0, 0, 0, 5, 0, 0, 0, 65,
|
||||
77, 68, 0, 22, 0, 45, 104, 115, 97, 95, 99, 97, 108,
|
||||
108, 95, 99, 111, 110, 118, 101, 110, 116, 105, 111, 110, 61,
|
||||
0, 0, 0, 0, 0, 4, 0, 0, 0, 30, 0, 0, 0,
|
||||
3, 0, 0, 0, 65, 77, 68, 0, 4, 0, 7, 0, 8,
|
||||
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 65, 77,
|
||||
68, 0, 65, 77, 68, 71, 80, 85, 0, 0, 0, 0, 0,
|
||||
0, 4, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0,
|
||||
65, 77, 68, 0, 16, -20, 88, 97, -4, 127, 0, 0, 38,
|
||||
95, 95, 102, 105, 108, 108, 95, 109, 101, 109, 111, 114, 121,
|
||||
95, 107, 101, 114, 110, 101, 108, 0, 95, 95, 104, 115, 97,
|
||||
95, 115, 101, 99, 116, 105, 111, 110, 46, 104, 115, 97, 116,
|
||||
101, 120, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 1,
|
||||
0, 0, 0, 0, 0, 0, 22, 0, 0, 0, 3, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 46, 104, 115, 97, 116, 101, 120, 116,
|
||||
0, 46, 110, 111, 116, 101, 0, 46, 115, 116, 114, 116, 97,
|
||||
98, 0, 46, 115, 121, 109, 116, 97, 98, 0, 46, 115, 104,
|
||||
115, 116, 114, 116, 97, 98, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 1, 0, 0, 0, 7, 0, -64, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 112, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0,
|
||||
0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 2, 0, 0,
|
||||
0, 0, 0, 0, -88, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0,
|
||||
0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 24, 3, 0, 0, 0,
|
||||
0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0,
|
||||
2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 72, 3, 0, 0, 0, 0,
|
||||
0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0,
|
||||
0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0,
|
||||
24, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 3,
|
||||
0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 120, 3, 0, 0, 0, 0, 0,
|
||||
0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
extern char* const kFillMemoryViObject = &kFillMemoryRawVi[0];
|
||||
extern size_t const kFillMemoryViObjectSize = sizeof(kFillMemoryRawVi);
|
||||
#endif // header guard
|
||||
@@ -1,218 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_SDMA_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_BLIT_SDMA_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
#include "core/inc/blit.h"
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
namespace amd {
|
||||
class BlitSdma : public core::Blit {
|
||||
public:
|
||||
explicit BlitSdma();
|
||||
|
||||
virtual ~BlitSdma() override;
|
||||
|
||||
/// @brief Initialize a User Mode SDMA Queue object. Input parameters specify
|
||||
/// properties of queue being created.
|
||||
///
|
||||
/// @param agent Pointer to the agent that will execute the PM4 commands.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Initialize(const core::Agent& agent) override;
|
||||
|
||||
/// @brief Marks the queue object as invalid and uncouples its link with
|
||||
/// the underlying compute device's control block. Use of queue object
|
||||
/// once it has been release is illegal and any behavior is indeterminate
|
||||
///
|
||||
/// @note: The call will block until all packets have executed.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Destroy() override;
|
||||
|
||||
/// @brief Submit a linear copy command to the queue buffer.
|
||||
///
|
||||
/// @param dst Memory address of the copy destination.
|
||||
/// @param src Memory address of the copy source.
|
||||
/// @param size Size of the data to be copied.
|
||||
virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src,
|
||||
size_t size) override;
|
||||
|
||||
/// @brief Submit a linear copy command to the the underlying compute device's
|
||||
/// control block. The call is non blocking. The memory transfer will start
|
||||
/// after all dependent signals are satisfied. After the transfer is
|
||||
/// completed, the out signal will be decremented.
|
||||
///
|
||||
/// @param dst Memory address of the copy destination.
|
||||
/// @param src Memory address of the copy source.
|
||||
/// @param size Size of the data to be copied.
|
||||
/// @param dep_signals Arrays of dependent signal.
|
||||
/// @param out_signal Output signal.
|
||||
virtual hsa_status_t SubmitLinearCopyCommand(
|
||||
void* dst, const void* src, size_t size,
|
||||
std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& out_signal) override;
|
||||
|
||||
/// @brief Submit a linear fill command to the queue buffer
|
||||
///
|
||||
/// @param ptr Memory address of the fill destination.
|
||||
/// @param value Value to be set.
|
||||
/// @param count Number of uint32_t element to be set to the value.
|
||||
virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value,
|
||||
size_t count) override;
|
||||
|
||||
protected:
|
||||
/// @brief Acquires the address into queue buffer where a new command
|
||||
/// packet of specified size could be written. The address that is
|
||||
/// returned is guaranteed to be unique even in a multi-threaded access
|
||||
/// scenario. This function is guaranteed to return a pointer for writing
|
||||
/// data into the queue buffer.
|
||||
///
|
||||
/// @param cmd_size Command packet size in bytes.
|
||||
///
|
||||
/// @return pointer into the queue buffer where a PM4 packet of specified size
|
||||
/// could be written. NULL if input size is greater than the size of queue
|
||||
/// buffer.
|
||||
char* AcquireWriteAddress(uint32_t cmd_size);
|
||||
|
||||
void UpdateWriteAndDoorbellRegister(uint32_t current_offset,
|
||||
uint32_t new_offset);
|
||||
|
||||
/// @brief Updates the Write Register of compute device to the end of
|
||||
/// SDMA packet written into queue buffer. The update to Write Register
|
||||
/// will be safe under multi-threaded usage scenario. Furthermore, updates
|
||||
/// to Write Register are blocking until all prior updates are completed
|
||||
/// i.e. if two threads T1 & T2 were to call release, then updates by T2
|
||||
/// will block until T1 has completed its update (assumes T1 acquired the
|
||||
/// write address first).
|
||||
///
|
||||
/// @param cmd_addr pointer into the queue buffer where a PM4 packet was
|
||||
/// written.
|
||||
///
|
||||
/// @param cmd_size Command packet size in bytes.
|
||||
void ReleaseWriteAddress(char* cmd_addr, uint32_t cmd_size);
|
||||
|
||||
/// @brief Writes NO-OP words into queue buffer in case writing a command
|
||||
/// causes the queue buffer to wrap.
|
||||
///
|
||||
/// @param cmd_size Size in bytes of command causing queue buffer to wrap.
|
||||
void WrapQueue(uint32_t cmd_size);
|
||||
|
||||
/// @brief Build fence command
|
||||
void BuildFenceCommand(char* fence_command_addr, uint32_t* fence,
|
||||
uint32_t fence_value);
|
||||
|
||||
uint32_t* ObtainFenceObject();
|
||||
|
||||
void WaitFence(uint32_t* fence, uint32_t fence_value);
|
||||
|
||||
void BuildCopyCommand(char* cmd_addr, uint32_t num_copy_command, void* dst,
|
||||
const void* src, size_t size);
|
||||
|
||||
void BuildPollCommand(char* cmd_addr, void* addr, uint32_t reference);
|
||||
|
||||
void BuildAtomicDecrementCommand(char* cmd_addr, void* addr);
|
||||
|
||||
/// Indicates size of Queue buffer in bytes.
|
||||
uint32_t queue_size_;
|
||||
|
||||
/// Base address of the Queue buffer at construction time.
|
||||
char* queue_start_addr_;
|
||||
|
||||
uint32_t* fence_base_addr_;
|
||||
uint32_t fence_pool_size_;
|
||||
uint32_t fence_pool_mask_;
|
||||
volatile uint32_t fence_pool_counter_;
|
||||
|
||||
/// Queue resource descriptor for doorbell, read
|
||||
/// and write indices
|
||||
HsaQueueResource queue_resource_;
|
||||
|
||||
/// @brief Current address of execution in Queue buffer.
|
||||
///
|
||||
/// @note: The value of address is obtained by reading
|
||||
/// the value of Write Register of the compute device.
|
||||
/// Users should write to the Queue buffer at the current
|
||||
/// address, else it will lead to execution error and potentially
|
||||
/// a hang.
|
||||
///
|
||||
/// @note: The value of Write Register does not always begin
|
||||
/// with Zero after a Queue has been created. This needs to be
|
||||
/// understood better. This means that current address number of
|
||||
/// words of Queue buffer is unavailable for use.
|
||||
volatile uint32_t cached_reserve_offset_;
|
||||
volatile uint32_t cached_commit_offset_;
|
||||
|
||||
uint32_t linear_copy_command_size_;
|
||||
|
||||
uint32_t fill_command_size_;
|
||||
|
||||
uint32_t fence_command_size_;
|
||||
|
||||
uint32_t poll_command_size_;
|
||||
|
||||
uint32_t atomic_command_size_;
|
||||
|
||||
// Max copy size of a single linear copy command packet.
|
||||
size_t max_single_linear_copy_size_;
|
||||
|
||||
/// Max total copy size supported by the queue.
|
||||
size_t max_total_linear_copy_size_;
|
||||
|
||||
/// Max count of uint32_t of a single fill command packet.
|
||||
size_t max_single_fill_size_;
|
||||
|
||||
/// Max total fill count supported by the queue.
|
||||
size_t max_total_fill_size_;
|
||||
|
||||
std::mutex wrap_lock_;
|
||||
};
|
||||
} // namespace amd
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,154 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// AMD specific HSA backend.
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_CPU_AGENT_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_CPU_AGENT_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/agent.h"
|
||||
#include "core/inc/queue.h"
|
||||
|
||||
namespace amd {
|
||||
// @brief Class to represent a CPU device.
|
||||
class CpuAgent : public core::Agent {
|
||||
public:
|
||||
// @brief CpuAgent constructor.
|
||||
//
|
||||
// @param [in] node Node id. Each CPU in different socket will get distinct
|
||||
// id.
|
||||
// @param [in] node_props Node property.
|
||||
CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props);
|
||||
|
||||
// @brief CpuAgent destructor.
|
||||
~CpuAgent();
|
||||
|
||||
// @brief Invoke the user provided callback for each region accessible by
|
||||
// this agent.
|
||||
//
|
||||
// @param [in] include_peer If true, the callback will be also invoked on each
|
||||
// peer memory region accessible by this agent. If false, only invoke the
|
||||
// callback on memory region owned by this agent.
|
||||
// @param [in] callback User provided callback function.
|
||||
// @param [in] data User provided pointer as input for @p callback.
|
||||
//
|
||||
// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
|
||||
// region returns ::HSA_STATUS_SUCCESS.
|
||||
hsa_status_t VisitRegion(bool include_peer,
|
||||
hsa_status_t (*callback)(hsa_region_t region,
|
||||
void* data),
|
||||
void* data) const;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region,
|
||||
void* data),
|
||||
void* data) const override;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t GetInfo(hsa_agent_info_t attribute, void* value) const override;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t QueueCreate(size_t size, hsa_queue_type_t queue_type,
|
||||
core::HsaEventCallback event_callback, void* data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
core::Queue** queue) override;
|
||||
|
||||
// @brief Returns number of data caches.
|
||||
__forceinline size_t num_cache() const { return cache_props_.size(); }
|
||||
|
||||
// @brief Returns data cache property.
|
||||
//
|
||||
// @param [in] idx Cache level.
|
||||
__forceinline const HsaCacheProperties& cache_prop(int idx) const {
|
||||
return cache_props_[idx];
|
||||
}
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
const std::vector<const core::MemoryRegion*>& regions() const override {
|
||||
return regions_;
|
||||
}
|
||||
|
||||
// @brief OVerride from core::Agent.
|
||||
const core::Isa* isa() const override { return NULL; }
|
||||
|
||||
private:
|
||||
// @brief Query the driver to get the region list owned by this agent.
|
||||
void InitRegionList();
|
||||
|
||||
// @brief Query the driver to get the cache properties.
|
||||
void InitCacheList();
|
||||
|
||||
// @brief Invoke the user provided callback for every region in @p regions.
|
||||
//
|
||||
// @param [in] regions Array of region object.
|
||||
// @param [in] callback User provided callback function.
|
||||
// @param [in] data User provided pointer as input for @p callback.
|
||||
//
|
||||
// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
|
||||
// region returns ::HSA_STATUS_SUCCESS.
|
||||
hsa_status_t VisitRegion(
|
||||
const std::vector<const core::MemoryRegion*>& regions,
|
||||
hsa_status_t (*callback)(hsa_region_t region, void* data),
|
||||
void* data) const;
|
||||
|
||||
// @brief Node property.
|
||||
const HsaNodeProperties properties_;
|
||||
|
||||
// @brief Array of data cache property. The array index represents the cache
|
||||
// level.
|
||||
std::vector<HsaCacheProperties> cache_props_;
|
||||
|
||||
// @brief Array of regions owned by this agent.
|
||||
std::vector<const core::MemoryRegion*> regions_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(CpuAgent);
|
||||
};
|
||||
|
||||
} // namespace amd
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,222 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_ELF_IMAGE_HPP_
|
||||
#define AMD_ELF_IMAGE_HPP_
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
namespace amd {
|
||||
namespace elf {
|
||||
class Symbol;
|
||||
class SymbolTable;
|
||||
class Section;
|
||||
class RelocationSection;
|
||||
|
||||
class Segment {
|
||||
public:
|
||||
virtual ~Segment() { }
|
||||
virtual uint64_t type() const = 0;
|
||||
virtual uint64_t memSize() const = 0;
|
||||
virtual uint64_t align() const = 0;
|
||||
virtual uint64_t imageSize() const = 0;
|
||||
virtual uint64_t vaddr() const = 0;
|
||||
virtual uint64_t flags() const = 0;
|
||||
virtual const char* data() const = 0;
|
||||
virtual uint16_t getSegmentIndex() = 0;
|
||||
virtual bool updateAddSection(Section *section) = 0;
|
||||
};
|
||||
|
||||
class Section {
|
||||
public:
|
||||
virtual ~Section() { }
|
||||
virtual uint16_t getSectionIndex() const = 0;
|
||||
virtual uint32_t type() const = 0;
|
||||
virtual std::string Name() const = 0;
|
||||
virtual uint64_t offset() const = 0;
|
||||
virtual uint64_t addr() const = 0;
|
||||
virtual bool updateAddr(uint64_t addr) = 0;
|
||||
virtual uint64_t addralign() const = 0;
|
||||
virtual uint64_t flags() const = 0;
|
||||
virtual uint64_t size() const = 0;
|
||||
virtual uint64_t nextDataOffset(uint64_t align) const = 0;
|
||||
virtual uint64_t addData(const void *src, uint64_t size, uint64_t align) = 0;
|
||||
virtual bool getData(uint64_t offset, void* dest, uint64_t size) = 0;
|
||||
virtual Segment* segment() = 0;
|
||||
virtual RelocationSection* asRelocationSection() = 0;
|
||||
virtual bool hasRelocationSection() const = 0;
|
||||
virtual RelocationSection* relocationSection(SymbolTable* symtab = 0) = 0;
|
||||
virtual bool setMemSize(uint64_t s) = 0;
|
||||
virtual uint64_t memSize() const = 0;
|
||||
virtual bool setAlign(uint64_t a) = 0;
|
||||
virtual uint64_t memAlign() const = 0;
|
||||
};
|
||||
|
||||
class Relocation {
|
||||
public:
|
||||
virtual ~Relocation() { }
|
||||
virtual RelocationSection* section() = 0;
|
||||
virtual uint32_t type() = 0;
|
||||
virtual uint32_t symbolIndex() = 0;
|
||||
virtual Symbol* symbol() = 0;
|
||||
virtual uint64_t offset() = 0;
|
||||
virtual int64_t addend() = 0;
|
||||
};
|
||||
|
||||
class RelocationSection : public virtual Section {
|
||||
public:
|
||||
virtual Relocation* addRelocation(uint32_t type, Symbol* symbol, uint64_t offset, int64_t addend) = 0;
|
||||
virtual size_t relocationCount() const = 0;
|
||||
virtual Relocation* relocation(size_t i) = 0;
|
||||
virtual Section* targetSection() = 0;
|
||||
};
|
||||
|
||||
class StringTable : public virtual Section {
|
||||
public:
|
||||
virtual const char* addString(const std::string& s) = 0;
|
||||
virtual size_t addString1(const std::string& s) = 0;
|
||||
virtual const char* getString(size_t ndx) = 0;
|
||||
virtual size_t getStringIndex(const char* name) = 0;
|
||||
};
|
||||
|
||||
class Symbol {
|
||||
public:
|
||||
virtual ~Symbol() { }
|
||||
virtual uint32_t index() = 0;
|
||||
virtual uint32_t type() = 0;
|
||||
virtual uint32_t binding() = 0;
|
||||
virtual uint64_t size() = 0;
|
||||
virtual uint64_t value() = 0;
|
||||
virtual unsigned char other() = 0;
|
||||
virtual std::string name() = 0;
|
||||
virtual Section* section() = 0;
|
||||
virtual void setValue(uint64_t value) = 0;
|
||||
virtual void setSize(uint64_t size) = 0;
|
||||
};
|
||||
|
||||
class SymbolTable : public virtual Section {
|
||||
public:
|
||||
virtual Symbol* addSymbol(Section* section, const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, unsigned char other = 0) = 0;
|
||||
virtual size_t symbolCount() = 0;
|
||||
virtual Symbol* symbol(size_t i) = 0;
|
||||
};
|
||||
|
||||
class NoteSection : public virtual Section {
|
||||
public:
|
||||
virtual bool addNote(const std::string& name, uint32_t type, const void* desc = 0, uint32_t desc_size = 0) = 0;
|
||||
virtual bool getNote(const std::string& name, uint32_t type, void** desc, uint32_t* desc_size) = 0;
|
||||
};
|
||||
|
||||
class Image {
|
||||
public:
|
||||
virtual ~Image() { }
|
||||
|
||||
virtual bool initNew(uint16_t machine, uint16_t type, uint8_t os_abi = 0, uint8_t abi_version = 0, uint32_t e_flags = 0) = 0;
|
||||
virtual bool loadFromFile(const std::string& filename) = 0;
|
||||
virtual bool saveToFile(const std::string& filename) = 0;
|
||||
virtual bool initFromBuffer(const void* buffer, size_t size) = 0;
|
||||
virtual bool initAsBuffer(const void* buffer, size_t size) = 0;
|
||||
virtual bool writeTo(const std::string& filename) = 0;
|
||||
virtual bool copyToBuffer(void** buf, size_t* size = 0) = 0; // Copy to new buffer allocated with malloc
|
||||
virtual bool copyToBuffer(void* buf, size_t size) = 0; // Copy to existing buffer of given size.
|
||||
|
||||
virtual const char* data() = 0;
|
||||
virtual uint64_t size() = 0;
|
||||
|
||||
virtual uint16_t Machine() = 0;
|
||||
virtual uint16_t Type() = 0;
|
||||
|
||||
std::string output() { return out.str(); }
|
||||
|
||||
virtual bool Freeze() = 0;
|
||||
virtual bool Validate() = 0;
|
||||
|
||||
virtual StringTable* shstrtab() = 0;
|
||||
virtual StringTable* strtab() = 0;
|
||||
virtual SymbolTable* symtab() = 0;
|
||||
virtual SymbolTable* getSymtab(uint16_t index) = 0;
|
||||
|
||||
virtual StringTable* addStringTable(const std::string& name) = 0;
|
||||
virtual StringTable* getStringTable(uint16_t index) = 0;
|
||||
|
||||
virtual SymbolTable* addSymbolTable(const std::string& name, StringTable* stab = 0) = 0;
|
||||
|
||||
virtual size_t segmentCount() = 0;
|
||||
virtual Segment* segment(size_t i) = 0;
|
||||
virtual Segment* segmentByVAddr(uint64_t vaddr) = 0;
|
||||
|
||||
virtual size_t sectionCount() = 0;
|
||||
virtual Section* section(size_t i) = 0;
|
||||
virtual Section* sectionByVAddr(uint64_t vaddr) = 0;
|
||||
|
||||
virtual NoteSection* note() = 0;
|
||||
virtual NoteSection* addNoteSection(const std::string& name) = 0;
|
||||
|
||||
virtual Segment* initSegment(uint32_t type, uint32_t flags, uint64_t paddr = 0) = 0;
|
||||
virtual bool addSegments() = 0;
|
||||
|
||||
virtual Section* addSection(const std::string &name,
|
||||
uint32_t type,
|
||||
uint64_t flags = 0,
|
||||
uint64_t entsize = 0,
|
||||
Segment* segment = 0) = 0;
|
||||
|
||||
virtual RelocationSection* relocationSection(Section* sec, SymbolTable* symtab = 0) = 0;
|
||||
|
||||
protected:
|
||||
std::ostringstream out;
|
||||
};
|
||||
|
||||
Image* NewElf32Image();
|
||||
Image* NewElf64Image();
|
||||
|
||||
uint64_t ElfSize(const void* buffer);
|
||||
|
||||
std::string GetNoteString(uint32_t s_size, const char* s);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // AMD_ELF_IMAGE_HPP_
|
||||
@@ -1,354 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// AMD specific HSA backend.
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_GPU_AGENT_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_GPU_AGENT_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/agent.h"
|
||||
#include "core/inc/blit.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/util/small_heap.h"
|
||||
#include "core/util/locks.h"
|
||||
|
||||
namespace amd {
|
||||
// @brief Contains scratch memory information.
|
||||
struct ScratchInfo {
|
||||
void* queue_base;
|
||||
size_t size;
|
||||
size_t size_per_thread;
|
||||
ptrdiff_t queue_process_offset;
|
||||
};
|
||||
|
||||
// @brief Interface to represent a GPU agent.
|
||||
class GpuAgentInt : public core::Agent {
|
||||
public:
|
||||
// @brief Constructor
|
||||
GpuAgentInt(uint32_t node_id)
|
||||
: core::Agent(node_id, core::Agent::DeviceType::kAmdGpuDevice) {}
|
||||
|
||||
// @brief Invoke the user provided callback for each region accessible by
|
||||
// this agent.
|
||||
//
|
||||
// @param [in] include_peer If true, the callback will be also invoked on each
|
||||
// peer memory region accessible by this agent. If false, only invoke the
|
||||
// callback on memory region owned by this agent.
|
||||
// @param [in] callback User provided callback function.
|
||||
// @param [in] data User provided pointer as input for @p callback.
|
||||
//
|
||||
// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
|
||||
// region returns ::HSA_STATUS_SUCCESS.
|
||||
virtual hsa_status_t VisitRegion(bool include_peer,
|
||||
hsa_status_t (*callback)(hsa_region_t region,
|
||||
void* data),
|
||||
void* data) const = 0;
|
||||
|
||||
// @brief Carve scratch memory from scratch pool.
|
||||
//
|
||||
// @param [out] scratch Structure to be populated with the carved memory
|
||||
// information.
|
||||
virtual void AcquireQueueScratch(ScratchInfo& scratch) = 0;
|
||||
|
||||
// @brief Release scratch memory back to scratch pool.
|
||||
//
|
||||
// @param [in] base Address of scratch memory previously acquired with
|
||||
// call to ::AcquireQueueScratch.
|
||||
virtual void ReleaseQueueScratch(void* base) = 0;
|
||||
|
||||
// @brief Translate the kernel start and end dispatch timestamp from agent
|
||||
// domain to host domain.
|
||||
//
|
||||
// @param [in] signal Pointer to signal that provides the dispatch timing.
|
||||
// @param [out] time Structure to be populated with the host domain value.
|
||||
virtual void TranslateTime(core::Signal* signal,
|
||||
hsa_amd_profiling_dispatch_time_t& time) = 0;
|
||||
|
||||
// @brief Translate timestamp agent domain to host domain.
|
||||
//
|
||||
// @param [out] time Timestamp in agent domain.
|
||||
virtual uint64_t TranslateTime(uint64_t tick) = 0;
|
||||
|
||||
// @brief Sets the coherency type of this agent.
|
||||
//
|
||||
// @param [in] type New coherency type.
|
||||
//
|
||||
// @retval true The new coherency type is set successfuly.
|
||||
virtual bool current_coherency_type(hsa_amd_coherency_type_t type) = 0;
|
||||
|
||||
// @brief Returns the current coherency type of this agent.
|
||||
//
|
||||
// @retval Coherency type.
|
||||
virtual hsa_amd_coherency_type_t current_coherency_type() const = 0;
|
||||
|
||||
// @brief Query if agent represent Kaveri GPU.
|
||||
//
|
||||
// @retval true if agent is Kaveri GPU.
|
||||
virtual bool is_kv_device() const = 0;
|
||||
|
||||
// @brief Query the agent HSA profile.
|
||||
//
|
||||
// @retval HSA profile.
|
||||
virtual hsa_profile_t profile() const = 0;
|
||||
};
|
||||
|
||||
class GpuAgent : public GpuAgentInt {
|
||||
public:
|
||||
// @brief GPU agent constructor.
|
||||
//
|
||||
// @param [in] node Node id. Each CPU in different socket will get distinct
|
||||
// id.
|
||||
// @param [in] node_props Node property.
|
||||
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props);
|
||||
|
||||
// @brief GPU agent destructor.
|
||||
~GpuAgent();
|
||||
|
||||
// @brief Initialize DMA queue.
|
||||
//
|
||||
// @retval HSA_STATUS_SUCCESS DMA queue initialization is successful.
|
||||
hsa_status_t InitDma();
|
||||
|
||||
uint16_t GetMicrocodeVersion() const;
|
||||
|
||||
// @brief Assembles SP3 shader source into executable code.
|
||||
//
|
||||
// @param [in] src_sp3 SP3 shader source text representation.
|
||||
// @param [in] func_name Name of the SP3 function to assemble.
|
||||
// @param [out] code_buf Executable code buffer.
|
||||
// @param [out] code_buf_size Size of executable code buffer in bytes.
|
||||
void AssembleShader(const char* src_sp3, const char* func_name,
|
||||
void*& code_buf, size_t& code_buf_size);
|
||||
|
||||
// @brief Frees executable code created by AssembleShader.
|
||||
//
|
||||
// @param [in] code_buf Executable code buffer.
|
||||
// @param [in] code_buf_size Size of executable code buffer in bytes.
|
||||
void ReleaseShader(void* code_buf, size_t code_buf_size);
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t VisitRegion(bool include_peer,
|
||||
hsa_status_t (*callback)(hsa_region_t region,
|
||||
void* data),
|
||||
void* data) const override;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region,
|
||||
void* data),
|
||||
void* data) const override;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t DmaCopy(void* dst, const void* src, size_t size) override;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t DmaCopy(void* dst, const void* src, size_t size,
|
||||
std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& out_signal) override;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t DmaFill(void* ptr, uint32_t value, size_t count) override;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t GetInfo(hsa_agent_info_t attribute, void* value) const override;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t QueueCreate(size_t size, hsa_queue_type_t queue_type,
|
||||
core::HsaEventCallback event_callback, void* data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
core::Queue** queue) override;
|
||||
|
||||
// @brief Override from amd::GpuAgentInt.
|
||||
void AcquireQueueScratch(ScratchInfo& scratch) override;
|
||||
|
||||
// @brief Override from amd::GpuAgentInt.
|
||||
void ReleaseQueueScratch(void* base) override;
|
||||
|
||||
// @brief Override from amd::GpuAgentInt.
|
||||
void TranslateTime(core::Signal* signal,
|
||||
hsa_amd_profiling_dispatch_time_t& time) override;
|
||||
|
||||
// @brief Override from amd::GpuAgentInt.
|
||||
uint64_t TranslateTime(uint64_t tick) override;
|
||||
|
||||
// @brief Override from amd::GpuAgentInt.
|
||||
bool current_coherency_type(hsa_amd_coherency_type_t type) override;
|
||||
|
||||
// @brief Override from amd::GpuAgentInt.
|
||||
hsa_amd_coherency_type_t current_coherency_type() const override {
|
||||
return current_coherency_type_;
|
||||
}
|
||||
|
||||
// Getter & setters.
|
||||
|
||||
// @brief Returns node property.
|
||||
__forceinline const HsaNodeProperties& properties() const {
|
||||
return properties_;
|
||||
}
|
||||
|
||||
// @brief Returns number of data caches.
|
||||
__forceinline size_t num_cache() const { return cache_props_.size(); }
|
||||
|
||||
// @brief Returns data cache property.
|
||||
//
|
||||
// @param [in] idx Cache level.
|
||||
__forceinline const HsaCacheProperties& cache_prop(int idx) const {
|
||||
return cache_props_[idx];
|
||||
}
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
const std::vector<const core::MemoryRegion*>& regions() const override {
|
||||
return regions_;
|
||||
}
|
||||
|
||||
// @brief OVerride from core::Agent.
|
||||
const core::Isa* isa() const override { return isa_; }
|
||||
|
||||
// @brief Override from amd::GpuAgentInt.
|
||||
__forceinline bool is_kv_device() const override { return is_kv_device_; }
|
||||
|
||||
// @brief Override from amd::GpuAgentInt.
|
||||
__forceinline hsa_profile_t profile() const override { return profile_; }
|
||||
|
||||
protected:
|
||||
static const uint32_t minAqlSize_ = 0x1000; // 4KB min
|
||||
static const uint32_t maxAqlSize_ = 0x20000; // 8MB max
|
||||
|
||||
// @brief Invoke the user provided callback for every region in @p regions.
|
||||
//
|
||||
// @param [in] regions Array of region object.
|
||||
// @param [in] callback User provided callback function.
|
||||
// @param [in] data User provided pointer as input for @p callback.
|
||||
//
|
||||
// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
|
||||
// region returns ::HSA_STATUS_SUCCESS.
|
||||
hsa_status_t VisitRegion(
|
||||
const std::vector<const core::MemoryRegion*>& regions,
|
||||
hsa_status_t (*callback)(hsa_region_t region, void* data),
|
||||
void* data) const;
|
||||
|
||||
// @brief Update ::t1_ tick count.
|
||||
void SyncClocks();
|
||||
|
||||
// @brief Binds the second-level trap handler to this node.
|
||||
void BindTrapHandler();
|
||||
|
||||
// @brief Node properties.
|
||||
const HsaNodeProperties properties_;
|
||||
|
||||
// @brief Current coherency type.
|
||||
hsa_amd_coherency_type_t current_coherency_type_;
|
||||
|
||||
// @brief Maximum number of queues that can be created.
|
||||
uint32_t max_queues_;
|
||||
|
||||
// @brief Object to manage scratch memory.
|
||||
SmallHeap scratch_pool_;
|
||||
|
||||
// @brief Default scratch size per queue.
|
||||
size_t queue_scratch_len_;
|
||||
|
||||
// @brief Default scratch size per work item.
|
||||
size_t scratch_per_thread_;
|
||||
|
||||
// @brief Blit object to handle memory copy/fill.
|
||||
core::Blit* blit_;
|
||||
|
||||
// @brief Mutex to protect the update to coherency type.
|
||||
KernelMutex coherency_lock_;
|
||||
|
||||
// @brief Mutex to protect access to scratch pool.
|
||||
KernelMutex scratch_lock_;
|
||||
|
||||
// @brief Mutex to protect access to ::t1_.
|
||||
KernelMutex t1_lock_;
|
||||
|
||||
// @brief GPU tick on initialization.
|
||||
HsaClockCounters t0_;
|
||||
|
||||
HsaClockCounters t1_;
|
||||
|
||||
// @brief Array of GPU cache property.
|
||||
std::vector<HsaCacheProperties> cache_props_;
|
||||
|
||||
// @brief Array of regions owned by this agent.
|
||||
std::vector<const core::MemoryRegion*> regions_;
|
||||
|
||||
core::Isa* isa_;
|
||||
|
||||
// @brief HSA profile.
|
||||
hsa_profile_t profile_;
|
||||
|
||||
bool is_kv_device_;
|
||||
|
||||
void* trap_code_buf_;
|
||||
|
||||
size_t trap_code_buf_size_;
|
||||
|
||||
private:
|
||||
// @brief Query the driver to get the region list owned by this agent.
|
||||
void InitRegionList();
|
||||
|
||||
// @brief Reserve memory for scratch pool to be used by AQL queue of this
|
||||
// agent.
|
||||
void InitScratchPool();
|
||||
|
||||
// @brief Query the driver to get the cache properties.
|
||||
void InitCacheList();
|
||||
|
||||
// @brief Alternative aperture base address. Only on KV.
|
||||
uintptr_t ape1_base_;
|
||||
|
||||
// @brief Alternative aperture size. Only on KV.
|
||||
size_t ape1_size_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(GpuAgent);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,387 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_CODE_HPP_
|
||||
#define AMD_HSA_CODE_HPP_
|
||||
|
||||
#include "amd_elf_image.hpp"
|
||||
#include "amd_hsa_elf.h"
|
||||
#include "amd_hsa_kernel_code.h"
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace common {
|
||||
|
||||
template<uint64_t signature>
|
||||
class Signed {
|
||||
public:
|
||||
static const uint64_t CT_SIGNATURE;
|
||||
const uint64_t RT_SIGNATURE;
|
||||
|
||||
protected:
|
||||
Signed(): RT_SIGNATURE(signature) {}
|
||||
virtual ~Signed() {}
|
||||
};
|
||||
|
||||
template<uint64_t signature>
|
||||
const uint64_t Signed<signature>::CT_SIGNATURE = signature;
|
||||
|
||||
bool IsAccessibleMemoryAddress(uint64_t address);
|
||||
|
||||
template<typename class_type, typename member_type>
|
||||
size_t OffsetOf(member_type class_type::*member)
|
||||
{
|
||||
return (char*)&((class_type*)nullptr->*member) - (char*)nullptr;
|
||||
}
|
||||
|
||||
template<typename class_type>
|
||||
class_type* ObjectAt(uint64_t address)
|
||||
{
|
||||
if (!IsAccessibleMemoryAddress(address)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const uint64_t *rt_signature =
|
||||
(const uint64_t*)(address + OffsetOf(&class_type::RT_SIGNATURE));
|
||||
if (nullptr == rt_signature) {
|
||||
return nullptr;
|
||||
}
|
||||
if (class_type::CT_SIGNATURE != *rt_signature) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return (class_type*)address;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace code {
|
||||
|
||||
typedef amd::elf::Segment Segment;
|
||||
typedef amd::elf::Section Section;
|
||||
typedef amd::elf::RelocationSection RelocationSection;
|
||||
typedef amd::elf::Relocation Relocation;
|
||||
|
||||
class KernelSymbol;
|
||||
class VariableSymbol;
|
||||
|
||||
class Symbol {
|
||||
protected:
|
||||
amd::elf::Symbol* elfsym;
|
||||
|
||||
public:
|
||||
explicit Symbol(amd::elf::Symbol* elfsym_)
|
||||
: elfsym(elfsym_) { }
|
||||
virtual ~Symbol() { }
|
||||
virtual bool IsKernelSymbol() const { return false; }
|
||||
virtual KernelSymbol* AsKernelSymbol() { assert(false); return 0; }
|
||||
virtual bool IsVariableSymbol() const { return false; }
|
||||
virtual VariableSymbol* AsVariableSymbol() { assert(false); return 0; }
|
||||
amd::elf::Symbol* elfSym() { return elfsym; }
|
||||
std::string Name() const { return elfsym ? elfsym->name() : ""; }
|
||||
Section* GetSection() { return elfsym->section(); }
|
||||
virtual uint64_t SectionOffset() const { return elfsym->value(); }
|
||||
virtual uint64_t VAddr() const { return elfsym->section()->addr() + elfsym->value(); }
|
||||
uint32_t Index() const { return elfsym ? elfsym->index() : 0; }
|
||||
bool IsDeclaration() const;
|
||||
bool IsDefinition() const;
|
||||
virtual bool IsAgent() const;
|
||||
virtual hsa_symbol_kind_t Kind() const = 0;
|
||||
hsa_symbol_linkage_t Linkage() const;
|
||||
hsa_variable_allocation_t Allocation() const;
|
||||
hsa_variable_segment_t Segment() const;
|
||||
uint64_t Size() const;
|
||||
uint32_t Size32() const;
|
||||
uint32_t Alignment() const;
|
||||
bool IsConst() const;
|
||||
virtual hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value);
|
||||
static hsa_code_symbol_t ToHandle(Symbol* sym);
|
||||
static Symbol* FromHandle(hsa_code_symbol_t handle);
|
||||
void setValue(uint64_t value) { elfsym->setValue(value); }
|
||||
void setSize(uint32_t size) { elfsym->setSize(size); }
|
||||
};
|
||||
|
||||
class KernelSymbol : public Symbol {
|
||||
private:
|
||||
uint32_t kernarg_segment_size, kernarg_segment_alignment;
|
||||
uint32_t group_segment_size, private_segment_size;
|
||||
bool is_dynamic_callstack;
|
||||
|
||||
public:
|
||||
explicit KernelSymbol(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc);
|
||||
bool IsKernelSymbol() const override { return true; }
|
||||
KernelSymbol* AsKernelSymbol() override { return this; }
|
||||
hsa_symbol_kind_t Kind() const override { return HSA_SYMBOL_KIND_KERNEL; }
|
||||
hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value) override;
|
||||
};
|
||||
|
||||
class VariableSymbol : public Symbol {
|
||||
public:
|
||||
explicit VariableSymbol(amd::elf::Symbol* elfsym_)
|
||||
: Symbol(elfsym_) { }
|
||||
bool IsVariableSymbol() const override { return true; }
|
||||
VariableSymbol* AsVariableSymbol() override { return this; }
|
||||
hsa_symbol_kind_t Kind() const override { return HSA_SYMBOL_KIND_VARIABLE; }
|
||||
hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value) override;
|
||||
};
|
||||
|
||||
class AmdHsaCode {
|
||||
private:
|
||||
std::ostringstream out;
|
||||
std::unique_ptr<amd::elf::Image> img;
|
||||
std::vector<Segment*> dataSegments;
|
||||
std::vector<Section*> dataSections;
|
||||
std::vector<RelocationSection*> relocationSections;
|
||||
std::vector<Symbol*> symbols;
|
||||
bool combineDataSegments;
|
||||
Segment* hsaSegments[AMDGPU_HSA_SEGMENT_LAST][2];
|
||||
Section* hsaSections[AMDGPU_HSA_SECTION_LAST];
|
||||
|
||||
amd::elf::Section* hsatext;
|
||||
amd::elf::Section* imageInit;
|
||||
amd::elf::Section* samplerInit;
|
||||
amd::elf::Section* debugInfo;
|
||||
amd::elf::Section* debugLine;
|
||||
amd::elf::Section* debugAbbrev;
|
||||
|
||||
bool PullElf();
|
||||
bool PullElfV1();
|
||||
bool PullElfV2();
|
||||
|
||||
void AddAmdNote(uint32_t type, const void* desc, uint32_t desc_size);
|
||||
template <typename S>
|
||||
bool GetAmdNote(uint32_t type, S** desc)
|
||||
{
|
||||
uint32_t desc_size;
|
||||
if (!img->note()->getNote("AMD", type, (void**) desc, &desc_size)) {
|
||||
out << "Failed to find note, type: " << type << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (desc_size < sizeof(S)) {
|
||||
out << "Note size mismatch, type: " << type << " size: " << desc_size << " expected at least " << sizeof(S) << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void PrintSegment(std::ostream& out, Segment* segment);
|
||||
void PrintSection(std::ostream& out, Section* section);
|
||||
void PrintRawData(std::ostream& out, Section* section);
|
||||
void PrintRawData(std::ostream& out, const unsigned char *data, size_t size);
|
||||
void PrintRelocationData(std::ostream& out, RelocationSection* section);
|
||||
void PrintSymbol(std::ostream& out, Symbol* sym);
|
||||
void PrintDisassembly(std::ostream& out, const unsigned char *isa, size_t size, uint32_t isa_offset = 0);
|
||||
std::string MangleSymbolName(const std::string& module_name, const std::string symbol_name);
|
||||
bool ElfImageError();
|
||||
|
||||
public:
|
||||
bool HasHsaText() const { return hsatext != 0; }
|
||||
amd::elf::Section* HsaText() { assert(hsatext); return hsatext; }
|
||||
const amd::elf::Section* HsaText() const { assert(hsatext); return hsatext; }
|
||||
amd::elf::SymbolTable* Symtab() { assert(img); return img->symtab(); }
|
||||
uint16_t Machine() { return img->Machine(); }
|
||||
|
||||
AmdHsaCode(bool combineDataSegments = true);
|
||||
virtual ~AmdHsaCode();
|
||||
|
||||
std::string output() { return out.str(); }
|
||||
bool LoadFromFile(const std::string& filename);
|
||||
bool SaveToFile(const std::string& filename);
|
||||
bool WriteToBuffer(void* buffer);
|
||||
bool InitFromBuffer(const void* buffer, size_t size);
|
||||
bool InitAsBuffer(const void* buffer, size_t size);
|
||||
bool InitAsHandle(hsa_code_object_t code_handle);
|
||||
bool InitNew(bool xnack = false);
|
||||
bool Freeze();
|
||||
hsa_code_object_t GetHandle();
|
||||
const char* ElfData();
|
||||
uint64_t ElfSize();
|
||||
bool Validate();
|
||||
void Print(std::ostream& out);
|
||||
void PrintNotes(std::ostream& out);
|
||||
void PrintSegments(std::ostream& out);
|
||||
void PrintSections(std::ostream& out);
|
||||
void PrintSymbols(std::ostream& out);
|
||||
void PrintMachineCode(std::ostream& out);
|
||||
void PrintMachineCode(std::ostream& out, KernelSymbol* sym);
|
||||
bool PrintToFile(const std::string& filename);
|
||||
|
||||
void AddNoteCodeObjectVersion(uint32_t major, uint32_t minor);
|
||||
bool GetNoteCodeObjectVersion(uint32_t* major, uint32_t* minor);
|
||||
bool GetNoteCodeObjectVersion(std::string& version);
|
||||
void AddNoteHsail(uint32_t hsail_major, uint32_t hsail_minor, hsa_profile_t profile, hsa_machine_model_t machine_model, hsa_default_float_rounding_mode_t rounding_mode);
|
||||
bool GetNoteHsail(uint32_t* hsail_major, uint32_t* hsail_minor, hsa_profile_t* profile, hsa_machine_model_t* machine_model, hsa_default_float_rounding_mode_t* default_float_round);
|
||||
void AddNoteIsa(const std::string& vendor_name, const std::string& architecture_name, uint32_t major, uint32_t minor, uint32_t stepping);
|
||||
bool GetNoteIsa(std::string& vendor_name, std::string& architecture_name, uint32_t* major_version, uint32_t* minor_version, uint32_t* stepping);
|
||||
bool GetNoteIsa(std::string& isaName);
|
||||
void AddNoteProducer(uint32_t major, uint32_t minor, const std::string& producer);
|
||||
bool GetNoteProducer(uint32_t* major, uint32_t* minor, std::string& producer_name);
|
||||
void AddNoteProducerOptions(const std::string& options);
|
||||
void AddNoteProducerOptions(int32_t call_convention, const hsa_ext_control_directives_t& user_directives, const std::string& user_options);
|
||||
bool GetNoteProducerOptions(std::string& options);
|
||||
|
||||
hsa_status_t GetInfo(hsa_code_object_info_t attribute, void *value);
|
||||
hsa_status_t GetSymbol(const char *module_name, const char *symbol_name, hsa_code_symbol_t *sym);
|
||||
hsa_status_t IterateSymbols(hsa_code_object_t code_object,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_code_object_t code_object,
|
||||
hsa_code_symbol_t symbol,
|
||||
void* data),
|
||||
void* data);
|
||||
|
||||
void AddHsaTextData(const void* buffer, size_t size);
|
||||
uint64_t NextKernelCodeOffset() const;
|
||||
bool AddKernelCode(KernelSymbol* sym, const void* code, size_t size);
|
||||
|
||||
Symbol* AddKernelDefinition(const std::string& name, const void* isa, size_t isa_size);
|
||||
|
||||
size_t DataSegmentCount() { return dataSegments.size(); }
|
||||
Segment* DataSegment(size_t i) { return dataSegments[i]; }
|
||||
|
||||
size_t DataSectionCount() { return dataSections.size(); }
|
||||
Section* DataSection(size_t i) { return dataSections[i]; }
|
||||
|
||||
Section* AddEmptySection();
|
||||
Section* AddCodeSection(Segment* segment);
|
||||
Section* AddDataSection(const std::string &name,
|
||||
uint32_t type,
|
||||
uint64_t flags,
|
||||
Segment* segment);
|
||||
|
||||
bool HasImageInitSection() const { return imageInit != 0; }
|
||||
Section* ImageInitSection();
|
||||
void AddImageInitializer(Symbol* image, uint64_t destOffset, const amdgpu_hsa_image_descriptor_t& init);
|
||||
void AddImageInitializer(Symbol* image, uint64_t destOffset,
|
||||
amdgpu_hsa_metadata_kind16_t kind,
|
||||
amdgpu_hsa_image_geometry8_t geometry,
|
||||
amdgpu_hsa_image_channel_order8_t channel_order, amdgpu_hsa_image_channel_type8_t channel_type,
|
||||
uint64_t width, uint64_t height, uint64_t depth, uint64_t array);
|
||||
|
||||
|
||||
bool HasSamplerInitSection() const { return samplerInit != 0; }
|
||||
amd::elf::Section* SamplerInitSection();
|
||||
amd::elf::Section* AddSamplerInit();
|
||||
void AddSamplerInitializer(Symbol* sampler, uint64_t destOffset, const amdgpu_hsa_sampler_descriptor_t& init);
|
||||
void AddSamplerInitializer(Symbol* sampler, uint64_t destOffset,
|
||||
amdgpu_hsa_sampler_coord8_t coord,
|
||||
amdgpu_hsa_sampler_filter8_t filter,
|
||||
amdgpu_hsa_sampler_addressing8_t addressing);
|
||||
|
||||
void AddInitVarWithAddress(bool large, Symbol* dest, uint64_t destOffset, Symbol* addrOf, uint64_t addrAddend);
|
||||
|
||||
void InitHsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable);
|
||||
bool AddHsaSegments();
|
||||
Segment* HsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable);
|
||||
|
||||
void InitHsaSectionSegment(amdgpu_hsa_elf_section_t section, bool combineSegments = true);
|
||||
Section* HsaDataSection(amdgpu_hsa_elf_section_t section, bool combineSegments = true);
|
||||
|
||||
Symbol* AddExecutableSymbol(const std::string &name,
|
||||
unsigned char type,
|
||||
unsigned char binding,
|
||||
unsigned char other,
|
||||
Section *section = 0);
|
||||
|
||||
Symbol* AddVariableSymbol(const std::string &name,
|
||||
unsigned char type,
|
||||
unsigned char binding,
|
||||
unsigned char other,
|
||||
Section *section,
|
||||
uint64_t value,
|
||||
uint64_t size);
|
||||
void AddSectionSymbols();
|
||||
|
||||
size_t RelocationSectionCount() { return relocationSections.size(); }
|
||||
RelocationSection* GetRelocationSection(size_t i) { return relocationSections[i]; }
|
||||
|
||||
size_t SymbolCount() { return symbols.size(); }
|
||||
Symbol* GetSymbol(size_t i) { return symbols[i]; }
|
||||
Symbol* GetSymbolByElfIndex(size_t index);
|
||||
Symbol* FindSymbol(const std::string &n);
|
||||
|
||||
void AddData(amdgpu_hsa_elf_section_t section, const void* data = 0, size_t size = 0);
|
||||
|
||||
Section* DebugInfo();
|
||||
Section* DebugLine();
|
||||
Section* DebugAbbrev();
|
||||
|
||||
Section* AddHsaHlDebug(const std::string& name, const void* data, size_t size);
|
||||
};
|
||||
|
||||
class AmdHsaCodeManager {
|
||||
private:
|
||||
typedef std::unordered_map<uint64_t, AmdHsaCode*> CodeMap;
|
||||
CodeMap codeMap;
|
||||
|
||||
public:
|
||||
AmdHsaCode* FromHandle(hsa_code_object_t handle);
|
||||
bool Destroy(hsa_code_object_t handle);
|
||||
};
|
||||
|
||||
class KernelSymbolV2 : public KernelSymbol {
|
||||
private:
|
||||
public:
|
||||
explicit KernelSymbolV2(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc);
|
||||
bool IsAgent() const override { return true; }
|
||||
uint64_t SectionOffset() const override { return elfsym->value() - elfsym->section()->addr(); }
|
||||
uint64_t VAddr() const override { return elfsym->value(); }
|
||||
};
|
||||
|
||||
class VariableSymbolV2 : public VariableSymbol {
|
||||
private:
|
||||
public:
|
||||
explicit VariableSymbolV2(amd::elf::Symbol* elfsym_) : VariableSymbol(elfsym_) { }
|
||||
bool IsAgent() const override { return false; }
|
||||
uint64_t SectionOffset() const override { return elfsym->value() - elfsym->section()->addr(); }
|
||||
uint64_t VAddr() const override { return elfsym->value(); }
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // AMD_HSA_CODE_HPP_
|
||||
@@ -1,358 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_LOADER_HPP
|
||||
#define AMD_HSA_LOADER_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_image.h"
|
||||
#include "amd_hsa_elf.h"
|
||||
#include "amd_load_map.h"
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
/// @brief Major version of the AMD HSA Loader. Major versions are not backwards
|
||||
/// compatible.
|
||||
#define AMD_HSA_LOADER_VERSION_MAJOR 0
|
||||
|
||||
/// @brief Minor version of the AMD HSA Loader. Minor versions are backwards
|
||||
/// compatible.
|
||||
#define AMD_HSA_LOADER_VERSION_MINOR 5
|
||||
|
||||
/// @brief Descriptive version of the AMD HSA Loader.
|
||||
#define AMD_HSA_LOADER_VERSION "AMD HSA Loader v0.05 (June 16, 2015)"
|
||||
|
||||
enum hsa_ext_symbol_info_t {
|
||||
HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE = 100,
|
||||
HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_ALIGN = 101,
|
||||
};
|
||||
|
||||
typedef uint32_t hsa_symbol_info32_t;
|
||||
typedef hsa_executable_symbol_t hsa_symbol_t;
|
||||
typedef hsa_executable_symbol_info_t hsa_symbol_info_t;
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace loader {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Context. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Context {
|
||||
public:
|
||||
virtual ~Context() {}
|
||||
|
||||
virtual hsa_isa_t IsaFromName(const char *name) = 0;
|
||||
|
||||
virtual bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) = 0;
|
||||
|
||||
virtual void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) = 0;
|
||||
|
||||
virtual bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) = 0;
|
||||
|
||||
virtual void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) = 0;
|
||||
|
||||
virtual void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) = 0;
|
||||
|
||||
virtual void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) = 0;
|
||||
|
||||
virtual bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) = 0;
|
||||
|
||||
virtual bool ImageExtensionSupported() = 0;
|
||||
|
||||
virtual hsa_status_t ImageCreate(
|
||||
hsa_agent_t agent,
|
||||
hsa_access_permission_t image_permission,
|
||||
const hsa_ext_image_descriptor_t *image_descriptor,
|
||||
const void *image_data,
|
||||
hsa_ext_image_t *image_handle) = 0;
|
||||
|
||||
virtual hsa_status_t ImageDestroy(
|
||||
hsa_agent_t agent, hsa_ext_image_t image_handle) = 0;
|
||||
|
||||
virtual hsa_status_t SamplerCreate(
|
||||
hsa_agent_t agent,
|
||||
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
|
||||
hsa_ext_sampler_t *sampler_handle) = 0;
|
||||
|
||||
virtual hsa_status_t SamplerDestroy(
|
||||
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) = 0;
|
||||
|
||||
protected:
|
||||
Context() {}
|
||||
|
||||
private:
|
||||
Context(const Context &c);
|
||||
Context& operator=(const Context &c);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Symbol. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Symbol {
|
||||
public:
|
||||
static hsa_symbol_t Handle(Symbol *symbol) {
|
||||
hsa_symbol_t symbol_handle =
|
||||
{reinterpret_cast<uint64_t>(symbol)};
|
||||
return symbol_handle;
|
||||
}
|
||||
|
||||
static Symbol* Object(hsa_symbol_t symbol_handle) {
|
||||
Symbol *symbol =
|
||||
reinterpret_cast<Symbol*>(symbol_handle.handle);
|
||||
return symbol;
|
||||
}
|
||||
|
||||
virtual ~Symbol() {}
|
||||
|
||||
virtual bool GetInfo(hsa_symbol_info32_t symbol_info, void *value) = 0;
|
||||
|
||||
protected:
|
||||
Symbol() {}
|
||||
|
||||
private:
|
||||
Symbol(const Symbol &s);
|
||||
Symbol& operator=(const Symbol &s);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// LoadedCodeObject. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class LoadedCodeObject {
|
||||
public:
|
||||
static amd_loaded_code_object_t Handle(LoadedCodeObject *object) {
|
||||
amd_loaded_code_object_t handle =
|
||||
{reinterpret_cast<uint64_t>(object)};
|
||||
return handle;
|
||||
}
|
||||
|
||||
static LoadedCodeObject* Object(amd_loaded_code_object_t handle) {
|
||||
LoadedCodeObject *object =
|
||||
reinterpret_cast<LoadedCodeObject*>(handle.handle);
|
||||
return object;
|
||||
}
|
||||
|
||||
virtual ~LoadedCodeObject() {}
|
||||
|
||||
virtual bool GetInfo(amd_loaded_code_object_info_t attribute, void *value) = 0;
|
||||
|
||||
virtual hsa_status_t IterateLoadedSegments(
|
||||
hsa_status_t (*callback)(
|
||||
amd_loaded_segment_t loaded_segment,
|
||||
void *data),
|
||||
void *data) = 0;
|
||||
|
||||
protected:
|
||||
LoadedCodeObject() {}
|
||||
|
||||
private:
|
||||
LoadedCodeObject(const LoadedCodeObject&);
|
||||
LoadedCodeObject& operator=(const LoadedCodeObject&);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// LoadedSegment. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class LoadedSegment {
|
||||
public:
|
||||
static amd_loaded_segment_t Handle(LoadedSegment *object) {
|
||||
amd_loaded_segment_t handle =
|
||||
{reinterpret_cast<uint64_t>(object)};
|
||||
return handle;
|
||||
}
|
||||
|
||||
static LoadedSegment* Object(amd_loaded_segment_t handle) {
|
||||
LoadedSegment *object =
|
||||
reinterpret_cast<LoadedSegment*>(handle.handle);
|
||||
return object;
|
||||
}
|
||||
|
||||
virtual ~LoadedSegment() {}
|
||||
|
||||
virtual bool GetInfo(amd_loaded_segment_info_t attribute, void *value) = 0;
|
||||
|
||||
protected:
|
||||
LoadedSegment() {}
|
||||
|
||||
private:
|
||||
LoadedSegment(const LoadedSegment&);
|
||||
LoadedSegment& operator=(const LoadedSegment&);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Executable. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Executable {
|
||||
public:
|
||||
static hsa_executable_t Handle(Executable *executable) {
|
||||
hsa_executable_t executable_handle =
|
||||
{reinterpret_cast<uint64_t>(executable)};
|
||||
return executable_handle;
|
||||
}
|
||||
|
||||
static Executable* Object(hsa_executable_t executable_handle) {
|
||||
Executable *executable =
|
||||
reinterpret_cast<Executable*>(executable_handle.handle);
|
||||
return executable;
|
||||
}
|
||||
|
||||
virtual ~Executable() {}
|
||||
|
||||
virtual hsa_status_t GetInfo(
|
||||
hsa_executable_info_t executable_info, void *value) = 0;
|
||||
|
||||
virtual hsa_status_t DefineProgramExternalVariable(
|
||||
const char *name, void *address) = 0;
|
||||
|
||||
virtual hsa_status_t DefineAgentExternalVariable(
|
||||
const char *name,
|
||||
hsa_agent_t agent,
|
||||
hsa_variable_segment_t segment,
|
||||
void *address) = 0;
|
||||
|
||||
virtual hsa_status_t LoadCodeObject(
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
const char *options,
|
||||
amd_loaded_code_object_t *loaded_code_object = nullptr) = 0;
|
||||
|
||||
virtual hsa_status_t LoadCodeObject(
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
size_t code_object_size,
|
||||
const char *options,
|
||||
amd_loaded_code_object_t *loaded_code_object = nullptr) = 0;
|
||||
|
||||
virtual hsa_status_t Freeze(const char *options) = 0;
|
||||
|
||||
virtual hsa_status_t Validate(uint32_t *result) = 0;
|
||||
|
||||
virtual Symbol* GetSymbol(
|
||||
const char *module_name,
|
||||
const char *symbol_name,
|
||||
hsa_agent_t agent,
|
||||
int32_t call_convention) = 0;
|
||||
|
||||
typedef hsa_status_t (*iterate_symbols_f)(
|
||||
hsa_executable_t executable,
|
||||
hsa_symbol_t symbol_handle,
|
||||
void *data);
|
||||
|
||||
virtual hsa_status_t IterateSymbols(
|
||||
iterate_symbols_f callback, void *data) = 0;
|
||||
|
||||
virtual hsa_status_t IterateLoadedCodeObjects(
|
||||
hsa_status_t (*callback)(
|
||||
amd_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data) = 0;
|
||||
|
||||
protected:
|
||||
Executable() {}
|
||||
|
||||
private:
|
||||
Executable(const Executable &e);
|
||||
Executable& operator=(const Executable &e);
|
||||
|
||||
static std::vector<Executable*> executables;
|
||||
static std::mutex executables_mutex;
|
||||
};
|
||||
|
||||
/// @class Loader
|
||||
class Loader {
|
||||
public:
|
||||
/// @brief Destructor.
|
||||
virtual ~Loader() {}
|
||||
|
||||
/// @brief Creates AMD HSA Loader with specified @p context.
|
||||
///
|
||||
/// @param[in] context Context. Must not be null.
|
||||
///
|
||||
/// @returns AMD HSA Loader on success, null on failure.
|
||||
static Loader* Create(Context* context);
|
||||
|
||||
/// @brief Destroys AMD HSA Loader @p Loader_object.
|
||||
///
|
||||
/// @param[in] loader AMD HSA Loader to destroy. Must not be null.
|
||||
static void Destroy(Loader *loader);
|
||||
|
||||
/// @returns Context associated with Loader.
|
||||
virtual Context* GetContext() const = 0;
|
||||
|
||||
/// @brief Creates empty AMD HSA Executable with specified @p profile,
|
||||
/// @p options
|
||||
virtual Executable* CreateExecutable(hsa_profile_t profile, const char *options) = 0;
|
||||
|
||||
virtual void DestroyExecutable(Executable *executable) = 0;
|
||||
|
||||
virtual hsa_status_t IterateExecutables(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data) = 0;
|
||||
|
||||
protected:
|
||||
/// @brief Default constructor.
|
||||
Loader() {}
|
||||
|
||||
private:
|
||||
/// @brief Copy constructor - not available.
|
||||
Loader(const Loader&);
|
||||
|
||||
/// @brief Assignment operator - not available.
|
||||
Loader& operator=(const Loader&);
|
||||
};
|
||||
|
||||
|
||||
} // namespace loader
|
||||
} // namespace hsa
|
||||
} // namespace amd
|
||||
|
||||
#endif // AMD_HSA_LOADER_HPP
|
||||
@@ -1,174 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_LOAD_MAP_H
|
||||
#define AMD_LOAD_MAP_H
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
/// @todo.
|
||||
enum {
|
||||
AMD_EXTENSION_LOAD_MAP = 0x1002
|
||||
};
|
||||
|
||||
/// @todo.
|
||||
typedef struct amd_loaded_code_object_s {
|
||||
uint64_t handle;
|
||||
} amd_loaded_code_object_t;
|
||||
|
||||
/// @todo.
|
||||
enum amd_loaded_code_object_info_t {
|
||||
AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE = 0,
|
||||
AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE_SIZE = 1
|
||||
};
|
||||
|
||||
/// @todo.
|
||||
typedef struct amd_loaded_segment_s {
|
||||
uint64_t handle;
|
||||
} amd_loaded_segment_t;
|
||||
|
||||
/// @todo.
|
||||
enum amd_loaded_segment_info_t {
|
||||
AMD_LOADED_SEGMENT_INFO_TYPE = 0,
|
||||
AMD_LOADED_SEGMENT_INFO_ELF_BASE_ADDRESS = 1,
|
||||
AMD_LOADED_SEGMENT_INFO_LOAD_BASE_ADDRESS = 2,
|
||||
AMD_LOADED_SEGMENT_INFO_SIZE = 3
|
||||
};
|
||||
|
||||
/// @todo.
|
||||
hsa_status_t amd_executable_load_code_object(
|
||||
hsa_executable_t executable,
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
const char *options,
|
||||
amd_loaded_code_object_t *loaded_code_object);
|
||||
|
||||
/// @brief Invokes @p callback for each available executable in current
|
||||
/// process.
|
||||
hsa_status_t amd_iterate_executables(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
/// @brief Invokes @p callback for each loaded code object in specified
|
||||
/// @p executable.
|
||||
hsa_status_t amd_executable_iterate_loaded_code_objects(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
amd_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
/// @brief Retrieves current value of specified @p loaded_code_object's
|
||||
/// @p attribute.
|
||||
hsa_status_t amd_loaded_code_object_get_info(
|
||||
amd_loaded_code_object_t loaded_code_object,
|
||||
amd_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
/// @brief Invokes @p callback for each loaded segment in specified
|
||||
/// @p loaded_code_object.
|
||||
hsa_status_t amd_loaded_code_object_iterate_loaded_segments(
|
||||
amd_loaded_code_object_t loaded_code_object,
|
||||
hsa_status_t (*callback)(
|
||||
amd_loaded_segment_t loaded_segment,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
/// @brief Retrieves current value of specified @p loaded_segment's
|
||||
/// @p attribute.
|
||||
hsa_status_t amd_loaded_segment_get_info(
|
||||
amd_loaded_segment_t loaded_segment,
|
||||
amd_loaded_segment_info_t attribute,
|
||||
void *value);
|
||||
|
||||
#define amd_load_map_1_00
|
||||
|
||||
typedef struct amd_load_map_1_00_pfn_s {
|
||||
hsa_status_t (*amd_executable_load_code_object)(
|
||||
hsa_executable_t executable,
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
const char *options,
|
||||
amd_loaded_code_object_t *loaded_code_object);
|
||||
|
||||
hsa_status_t (*amd_iterate_executables)(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*amd_executable_iterate_loaded_code_objects)(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
amd_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*amd_loaded_code_object_get_info)(
|
||||
amd_loaded_code_object_t loaded_code_object,
|
||||
amd_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t (*amd_loaded_code_object_iterate_loaded_segments)(
|
||||
amd_loaded_code_object_t loaded_code_object,
|
||||
hsa_status_t (*callback)(
|
||||
amd_loaded_segment_t loaded_segment,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*amd_loaded_segment_get_info)(
|
||||
amd_loaded_segment_t loaded_segment,
|
||||
amd_loaded_segment_info_t attribute,
|
||||
void *value);
|
||||
} amd_load_map_1_00_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // AMD_LOAD_MAP_H
|
||||
@@ -1,97 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP
|
||||
|
||||
#include "core/inc/amd_hsa_loader.hpp"
|
||||
|
||||
namespace amd {
|
||||
|
||||
class LoaderContext final: public hsa::loader::Context {
|
||||
public:
|
||||
LoaderContext(): hsa::loader::Context() {}
|
||||
|
||||
~LoaderContext() {}
|
||||
|
||||
hsa_isa_t IsaFromName(const char *name) override;
|
||||
|
||||
bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t code_object_isa) override;
|
||||
|
||||
void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) override;
|
||||
|
||||
bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) override;
|
||||
|
||||
void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size = 0) override;
|
||||
|
||||
void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override;
|
||||
|
||||
void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override;
|
||||
|
||||
bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) override;
|
||||
|
||||
bool ImageExtensionSupported();
|
||||
|
||||
hsa_status_t ImageCreate(
|
||||
hsa_agent_t agent,
|
||||
hsa_access_permission_t image_permission,
|
||||
const hsa_ext_image_descriptor_t *image_descriptor,
|
||||
const void *image_data,
|
||||
hsa_ext_image_t *image_handle);
|
||||
|
||||
hsa_status_t ImageDestroy(hsa_agent_t agent, hsa_ext_image_t image_handle);
|
||||
|
||||
hsa_status_t SamplerCreate(
|
||||
hsa_agent_t agent,
|
||||
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
|
||||
hsa_ext_sampler_t *sampler_handle);
|
||||
|
||||
hsa_status_t SamplerDestroy(hsa_agent_t agent, hsa_ext_sampler_t sampler_handle);
|
||||
|
||||
private:
|
||||
LoaderContext(const LoaderContext&);
|
||||
LoaderContext& operator=(const LoaderContext&);
|
||||
};
|
||||
|
||||
} // namespace amd
|
||||
|
||||
#endif // HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP
|
||||
@@ -1,191 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// AMD specific HSA backend.
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_MEMORY_REGION_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_MEMORY_REGION_H_
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
#include "core/inc/agent.h"
|
||||
#include "core/inc/memory_region.h"
|
||||
|
||||
#include "inc/hsa_ext_amd.h"
|
||||
|
||||
namespace amd {
|
||||
class MemoryRegion : public core::MemoryRegion {
|
||||
public:
|
||||
/// @brief Convert this object into hsa_region_t.
|
||||
static __forceinline hsa_region_t Convert(MemoryRegion* region) {
|
||||
const hsa_region_t region_handle = {
|
||||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(region))};
|
||||
return region_handle;
|
||||
}
|
||||
|
||||
static __forceinline const hsa_region_t Convert(const MemoryRegion* region) {
|
||||
const hsa_region_t region_handle = {
|
||||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(region))};
|
||||
return region_handle;
|
||||
}
|
||||
|
||||
/// @brief Convert hsa_region_t into amd::MemoryRegion *.
|
||||
static __forceinline MemoryRegion* Convert(hsa_region_t region) {
|
||||
return reinterpret_cast<MemoryRegion*>(region.handle);
|
||||
}
|
||||
|
||||
/// @brief Allocate agent accessible memory (system / local memory).
|
||||
static void* AllocateKfdMemory(const HsaMemFlags& flag, HSAuint32 node_id,
|
||||
size_t size);
|
||||
|
||||
/// @brief Free agent accessible memory (system / local memory).
|
||||
static void FreeKfdMemory(void* ptr, size_t size);
|
||||
|
||||
static bool RegisterMemory(void* ptr, size_t size, size_t num_nodes,
|
||||
const uint32_t* nodes);
|
||||
|
||||
static void DeregisterMemory(void* ptr);
|
||||
|
||||
/// @brief Pin memory.
|
||||
static bool MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes,
|
||||
void* ptr, size_t size,
|
||||
uint64_t* alternate_va,
|
||||
HsaMemMapFlags map_flag);
|
||||
|
||||
/// @brief Unpin memory.
|
||||
static void MakeKfdMemoryUnresident(void* ptr);
|
||||
|
||||
MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owner,
|
||||
const HsaMemoryProperties& mem_props);
|
||||
|
||||
~MemoryRegion();
|
||||
|
||||
hsa_status_t Allocate(size_t size, void** address) const;
|
||||
|
||||
hsa_status_t Allocate(bool restrict_access, size_t size,
|
||||
void** address) const;
|
||||
|
||||
hsa_status_t Free(void* address, size_t size) const;
|
||||
|
||||
hsa_status_t GetInfo(hsa_region_info_t attribute, void* value) const;
|
||||
|
||||
hsa_status_t GetPoolInfo(hsa_amd_memory_pool_info_t attribute,
|
||||
void* value) const;
|
||||
|
||||
hsa_status_t GetAgentPoolInfo(const core::Agent& agent,
|
||||
hsa_amd_agent_memory_pool_info_t attribute,
|
||||
void* value) const;
|
||||
|
||||
hsa_status_t AllowAccess(uint32_t num_agents, const hsa_agent_t* agents,
|
||||
const void* ptr, size_t size) const;
|
||||
|
||||
hsa_status_t CanMigrate(const MemoryRegion& dst, bool& result) const;
|
||||
|
||||
hsa_status_t Migrate(uint32_t flag, const void* ptr) const;
|
||||
|
||||
hsa_status_t Lock(uint32_t num_agents, const hsa_agent_t* agents,
|
||||
void* host_ptr, size_t size, void** agent_ptr) const;
|
||||
|
||||
hsa_status_t Unlock(void* host_ptr) const;
|
||||
|
||||
HSAuint64 GetBaseAddress() const { return mem_props_.VirtualBaseAddress; }
|
||||
|
||||
HSAuint64 GetPhysicalSize() const { return mem_props_.SizeInBytes; }
|
||||
|
||||
HSAuint64 GetVirtualSize() const { return virtual_size_; }
|
||||
|
||||
hsa_status_t AssignAgent(void* ptr, size_t size, const core::Agent& agent,
|
||||
hsa_access_permission_t access) const;
|
||||
|
||||
__forceinline bool IsLocalMemory() const {
|
||||
return ((mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE) ||
|
||||
(mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC));
|
||||
}
|
||||
|
||||
__forceinline bool IsPublic() const {
|
||||
return (mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC);
|
||||
}
|
||||
|
||||
__forceinline bool IsSystem() const {
|
||||
return mem_props_.HeapType == HSA_HEAPTYPE_SYSTEM;
|
||||
}
|
||||
|
||||
__forceinline bool IsLDS() const {
|
||||
return mem_props_.HeapType == HSA_HEAPTYPE_GPU_LDS;
|
||||
}
|
||||
|
||||
__forceinline bool IsGDS() const {
|
||||
return mem_props_.HeapType == HSA_HEAPTYPE_GPU_GDS;
|
||||
}
|
||||
|
||||
__forceinline bool IsScratch() const {
|
||||
return mem_props_.HeapType == HSA_HEAPTYPE_GPU_SCRATCH;
|
||||
}
|
||||
|
||||
__forceinline bool IsSvm() const {
|
||||
return mem_props_.HeapType == HSA_HEAPTYPE_DEVICE_SVM;
|
||||
}
|
||||
|
||||
__forceinline uint32_t BusWidth() const {
|
||||
return static_cast<uint32_t>(mem_props_.Width);
|
||||
}
|
||||
|
||||
__forceinline uint32_t MaxMemCloc() const {
|
||||
return static_cast<uint32_t>(mem_props_.MemoryClockMax);
|
||||
}
|
||||
|
||||
private:
|
||||
const HsaMemoryProperties mem_props_;
|
||||
|
||||
HsaMemFlags mem_flag_;
|
||||
|
||||
HsaMemMapFlags map_flag_;
|
||||
|
||||
size_t max_single_alloc_size_;
|
||||
|
||||
HSAuint64 virtual_size_;
|
||||
|
||||
static const size_t kPageSize_ = 4096;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,56 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_TOPOLOGY_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_TOPOLOGY_H_
|
||||
|
||||
namespace amd {
|
||||
/// @brief Initializes the runtime.
|
||||
/// Should not be called directly, must be called only from Runtime::Acquire()
|
||||
bool Load();
|
||||
|
||||
/// @brief Shutdown/cleanup of runtime.
|
||||
/// Should not be called directly, must be called only from Runtime::Release()
|
||||
bool Unload();
|
||||
} // namespace
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,108 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_BLIT_H_
|
||||
#define HSA_RUNTIME_CORE_INC_BLIT_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "core/inc/agent.h"
|
||||
|
||||
namespace core {
|
||||
class Blit {
|
||||
public:
|
||||
explicit Blit() {}
|
||||
virtual ~Blit() {}
|
||||
|
||||
/// @brief Initialize a blit object.
|
||||
///
|
||||
/// @param agent Pointer to the agent that will execute the blit commands.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Initialize(const core::Agent& agent) = 0;
|
||||
|
||||
/// @brief Marks the blit object as invalid and uncouples its link with
|
||||
/// the underlying compute device's control block. Use of blit object
|
||||
/// once it has been release is illegal and any behavior is indeterminate
|
||||
///
|
||||
/// @note: The call will block until all commands have executed.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Destroy() = 0;
|
||||
|
||||
/// @brief Submit a linear copy command to the the underlying compute device's
|
||||
/// control block. The call is blocking until the command execution is
|
||||
/// finished.
|
||||
///
|
||||
/// @param dst Memory address of the copy destination.
|
||||
/// @param src Memory address of the copy source.
|
||||
/// @param size Size of the data to be copied.
|
||||
virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src,
|
||||
size_t size) = 0;
|
||||
|
||||
/// @brief Submit a linear copy command to the the underlying compute device's
|
||||
/// control block. The call is non blocking. The memory transfer will start
|
||||
/// after all dependent signals are satisfied. After the transfer is
|
||||
/// completed, the out signal will be decremented.
|
||||
///
|
||||
/// @param dst Memory address of the copy destination.
|
||||
/// @param src Memory address of the copy source.
|
||||
/// @param size Size of the data to be copied.
|
||||
/// @param dep_signals Arrays of dependent signal.
|
||||
/// @param out_signal Output signal.
|
||||
virtual hsa_status_t SubmitLinearCopyCommand(
|
||||
void* dst, const void* src, size_t size,
|
||||
std::vector<core::Signal*>& dep_signals, core::Signal& out_signal) = 0;
|
||||
|
||||
/// @brief Submit a linear fill command to the the underlying compute device's
|
||||
/// control block. The call is blocking until the command execution is
|
||||
/// finished.
|
||||
///
|
||||
/// @param ptr Memory address of the fill destination.
|
||||
/// @param value Value to be set.
|
||||
/// @param num Number of uint32_t element to be set to the value.
|
||||
virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value,
|
||||
size_t num) = 0;
|
||||
};
|
||||
} // namespace core
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,75 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_INC_CHECKED_H_
|
||||
#define HSA_RUNTME_CORE_INC_CHECKED_H_
|
||||
|
||||
#include "stdint.h"
|
||||
|
||||
namespace core {
|
||||
|
||||
/// @brief Base class for all classes whose validity can be checked using
|
||||
/// IsValid() method.
|
||||
template <uint64_t code>
|
||||
class Checked {
|
||||
public:
|
||||
typedef Checked<code> CheckedType;
|
||||
|
||||
Checked() { object_ = uintptr_t(this) ^ uintptr_t(code); }
|
||||
Checked(const Checked&) { object_ = uintptr_t(this) ^ uintptr_t(code); }
|
||||
Checked(Checked&&) { object_ = uintptr_t(this) ^ uintptr_t(code); }
|
||||
|
||||
virtual ~Checked() { object_ = NULL; }
|
||||
|
||||
const Checked& operator=(Checked&& rhs) { return *this; }
|
||||
const Checked& operator=(const Checked& rhs) { return *this; }
|
||||
|
||||
bool IsValid() const {
|
||||
return object_ == (uintptr_t(this) ^ uintptr_t(code));
|
||||
}
|
||||
|
||||
private:
|
||||
uintptr_t object_;
|
||||
};
|
||||
|
||||
} // namespace core
|
||||
#endif // header guard
|
||||
@@ -1,174 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// HSA runtime C++ interface file.
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_INC_DEFAULT_SIGNAL_H_
|
||||
#define HSA_RUNTME_CORE_INC_DEFAULT_SIGNAL_H_
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
namespace core {
|
||||
|
||||
/// @brief Simple pure memory based signal.
|
||||
/// @brief See base class Signal.
|
||||
class DefaultSignal : public Signal {
|
||||
public:
|
||||
/// @brief Determines if a Signal* can be safely converted to DefaultSignal*
|
||||
/// via static_cast.
|
||||
static __forceinline bool IsType(Signal* ptr) {
|
||||
return ptr->IsType(&rtti_id_);
|
||||
}
|
||||
|
||||
/// @brief See base class Signal.
|
||||
explicit DefaultSignal(hsa_signal_value_t initial_value);
|
||||
|
||||
/// @brief See base class Signal.
|
||||
~DefaultSignal();
|
||||
|
||||
// Below are various methods corresponding to the APIs, which load/store the
|
||||
// signal value or modify the existing signal value automically and with
|
||||
// specified memory ordering semantics.
|
||||
|
||||
hsa_signal_value_t LoadRelaxed();
|
||||
|
||||
hsa_signal_value_t LoadAcquire();
|
||||
|
||||
void StoreRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void StoreRelease(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout, hsa_wait_state_t wait_hint);
|
||||
|
||||
hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout, hsa_wait_state_t wait_hint);
|
||||
|
||||
void AndRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void AndAcquire(hsa_signal_value_t value);
|
||||
|
||||
void AndRelease(hsa_signal_value_t value);
|
||||
|
||||
void AndAcqRel(hsa_signal_value_t value);
|
||||
|
||||
void OrRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void OrAcquire(hsa_signal_value_t value);
|
||||
|
||||
void OrRelease(hsa_signal_value_t value);
|
||||
|
||||
void OrAcqRel(hsa_signal_value_t value);
|
||||
|
||||
void XorRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void XorAcquire(hsa_signal_value_t value);
|
||||
|
||||
void XorRelease(hsa_signal_value_t value);
|
||||
|
||||
void XorAcqRel(hsa_signal_value_t value);
|
||||
|
||||
void AddRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void AddAcquire(hsa_signal_value_t value);
|
||||
|
||||
void AddRelease(hsa_signal_value_t value);
|
||||
|
||||
void AddAcqRel(hsa_signal_value_t value);
|
||||
|
||||
void SubRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void SubAcquire(hsa_signal_value_t value);
|
||||
|
||||
void SubRelease(hsa_signal_value_t value);
|
||||
|
||||
void SubAcqRel(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t ExchAcquire(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t ExchRelease(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t CasAcquire(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t CasRelease(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
|
||||
/// @brief see the base class Signal
|
||||
__forceinline hsa_signal_value_t* ValueLocation() const {
|
||||
return (hsa_signal_value_t*)&signal_.value;
|
||||
}
|
||||
|
||||
/// @brief see the base class Signal
|
||||
__forceinline HsaEvent* EopEvent() { return NULL; }
|
||||
|
||||
/// @brief prevent throwing exceptions
|
||||
void* operator new(size_t size) { return malloc(size); }
|
||||
|
||||
/// @brief prevent throwing exceptions
|
||||
void operator delete(void* ptr) { free(ptr); }
|
||||
|
||||
protected:
|
||||
bool _IsA(rtti_t id) const { return id == &rtti_id_; }
|
||||
|
||||
private:
|
||||
static int rtti_id_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(DefaultSignal);
|
||||
};
|
||||
|
||||
} // namespace core
|
||||
#endif // header guard
|
||||
@@ -1,167 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_HOST_QUEUE_H_
|
||||
#define HSA_RUNTIME_CORE_INC_HOST_QUEUE_H_
|
||||
|
||||
#include "core/inc/memory_region.h"
|
||||
#include "core/inc/queue.h"
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/signal.h"
|
||||
|
||||
namespace core {
|
||||
class HostQueue : public Queue {
|
||||
public:
|
||||
HostQueue(hsa_region_t region, uint32_t ring_size, hsa_queue_type_t type,
|
||||
uint32_t features, hsa_signal_t doorbell_signal);
|
||||
|
||||
~HostQueue();
|
||||
|
||||
hsa_status_t Inactivate() { return HSA_STATUS_SUCCESS; }
|
||||
|
||||
uint64_t LoadReadIndexAcquire() {
|
||||
return atomic::Load(&amd_queue_.read_dispatch_id,
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
|
||||
uint64_t LoadReadIndexRelaxed() {
|
||||
return atomic::Load(&amd_queue_.read_dispatch_id,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
uint64_t LoadWriteIndexAcquire() {
|
||||
return atomic::Load(&amd_queue_.write_dispatch_id,
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
|
||||
uint64_t LoadWriteIndexRelaxed() {
|
||||
return atomic::Load(&amd_queue_.write_dispatch_id,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void StoreReadIndexRelaxed(uint64_t value) {
|
||||
atomic::Store(&amd_queue_.read_dispatch_id, value,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void StoreReadIndexRelease(uint64_t value) {
|
||||
atomic::Store(&amd_queue_.read_dispatch_id, value,
|
||||
std::memory_order_release);
|
||||
}
|
||||
|
||||
void StoreWriteIndexRelaxed(uint64_t value) {
|
||||
atomic::Store(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void StoreWriteIndexRelease(uint64_t value) {
|
||||
atomic::Store(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_release);
|
||||
}
|
||||
|
||||
uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) {
|
||||
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
|
||||
std::memory_order_acq_rel);
|
||||
}
|
||||
|
||||
uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) {
|
||||
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
|
||||
uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) {
|
||||
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) {
|
||||
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
|
||||
std::memory_order_release);
|
||||
}
|
||||
|
||||
uint64_t AddWriteIndexAcqRel(uint64_t value) {
|
||||
return atomic::Add(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_acq_rel);
|
||||
}
|
||||
|
||||
uint64_t AddWriteIndexAcquire(uint64_t value) {
|
||||
return atomic::Add(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
|
||||
uint64_t AddWriteIndexRelaxed(uint64_t value) {
|
||||
return atomic::Add(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
uint64_t AddWriteIndexRelease(uint64_t value) {
|
||||
return atomic::Add(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_release);
|
||||
}
|
||||
|
||||
hsa_status_t SetCUMasking(const uint32_t num_cu_mask_count,
|
||||
const uint32_t* cu_mask) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
bool active() const { return active_; }
|
||||
|
||||
void* operator new(size_t size) {
|
||||
return _aligned_malloc(size, HSA_QUEUE_ALIGN_BYTES);
|
||||
}
|
||||
|
||||
void* operator new(size_t size, void* ptr) { return ptr; }
|
||||
|
||||
void operator delete(void* ptr) { _aligned_free(ptr); }
|
||||
|
||||
void operator delete(void*, void*) {}
|
||||
|
||||
private:
|
||||
static const size_t kRingAlignment = 256;
|
||||
const uint32_t size_;
|
||||
bool active_;
|
||||
void* ring_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HostQueue);
|
||||
};
|
||||
} // namespace core
|
||||
#endif // header guard
|
||||
@@ -1,63 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_HSA_API_TRACE_INT_H
|
||||
#define HSA_RUNTIME_CORE_INC_HSA_API_TRACE_INT_H
|
||||
|
||||
#include "inc/hsa_api_trace.h"
|
||||
#include "core/inc/hsa_internal.h"
|
||||
|
||||
namespace core {
|
||||
struct ApiTable {
|
||||
::ApiTable table;
|
||||
ExtTable extension_backup;
|
||||
|
||||
ApiTable();
|
||||
void Reset();
|
||||
void LinkExts(ExtTable* ptr);
|
||||
};
|
||||
|
||||
extern ApiTable hsa_api_table_;
|
||||
extern ApiTable hsa_internal_api_table_;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,80 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_INC_AMD_EXT_INTERFACE_H_
|
||||
#define HSA_RUNTME_CORE_INC_AMD_EXT_INTERFACE_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "hsa_api_trace_int.h"
|
||||
|
||||
#include "core/util/os.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
namespace core {
|
||||
struct ExtTableInternal : public ExtTable {
|
||||
decltype(::hsa_amd_image_get_info_max_dim)* hsa_amd_image_get_info_max_dim_fn;
|
||||
decltype(::hsa_amd_image_create)* hsa_amd_image_create_fn;
|
||||
};
|
||||
|
||||
class ExtensionEntryPoints {
|
||||
public:
|
||||
ExtTableInternal table;
|
||||
|
||||
ExtensionEntryPoints();
|
||||
|
||||
bool Load(std::string library_name);
|
||||
void Unload();
|
||||
|
||||
private:
|
||||
typedef void (*Load_t)(const ::ApiTable* table);
|
||||
typedef void (*Unload_t)();
|
||||
|
||||
std::vector<os::LibHandle> libs_;
|
||||
|
||||
void InitTable();
|
||||
DISALLOW_COPY_AND_ASSIGN(ExtensionEntryPoints);
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,347 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_HSA_INTERNAL_H
|
||||
#define HSA_RUNTIME_CORE_INC_HSA_INTERNAL_H
|
||||
|
||||
#include "inc/hsa.h"
|
||||
|
||||
namespace HSA
|
||||
{
|
||||
|
||||
// Define core namespace interfaces - copy of function declarations in hsa.h
|
||||
hsa_status_t HSA_API hsa_init();
|
||||
hsa_status_t HSA_API hsa_shut_down();
|
||||
hsa_status_t HSA_API
|
||||
hsa_system_get_info(hsa_system_info_t attribute, void *value);
|
||||
hsa_status_t HSA_API
|
||||
hsa_system_extension_supported(uint16_t extension, uint16_t version_major,
|
||||
uint16_t version_minor, bool *result);
|
||||
hsa_status_t HSA_API
|
||||
hsa_system_get_extension_table(uint16_t extension, uint16_t version_major,
|
||||
uint16_t version_minor, void *table);
|
||||
hsa_status_t HSA_API
|
||||
hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void *data),
|
||||
void *data);
|
||||
hsa_status_t HSA_API hsa_agent_get_info(hsa_agent_t agent,
|
||||
hsa_agent_info_t attribute,
|
||||
void *value);
|
||||
hsa_status_t HSA_API hsa_agent_get_exception_policies(hsa_agent_t agent,
|
||||
hsa_profile_t profile,
|
||||
uint16_t *mask);
|
||||
hsa_status_t HSA_API
|
||||
hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent,
|
||||
uint16_t version_major,
|
||||
uint16_t version_minor, bool *result);
|
||||
hsa_status_t HSA_API
|
||||
hsa_queue_create(hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t *source,
|
||||
void *data),
|
||||
void *data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t **queue);
|
||||
hsa_status_t HSA_API
|
||||
hsa_soft_queue_create(hsa_region_t region, uint32_t size,
|
||||
hsa_queue_type_t type, uint32_t features,
|
||||
hsa_signal_t completion_signal, hsa_queue_t **queue);
|
||||
hsa_status_t HSA_API hsa_queue_destroy(hsa_queue_t *queue);
|
||||
hsa_status_t HSA_API hsa_queue_inactivate(hsa_queue_t *queue);
|
||||
uint64_t HSA_API hsa_queue_load_read_index_acquire(const hsa_queue_t *queue);
|
||||
uint64_t HSA_API hsa_queue_load_read_index_relaxed(const hsa_queue_t *queue);
|
||||
uint64_t HSA_API hsa_queue_load_write_index_acquire(const hsa_queue_t *queue);
|
||||
uint64_t HSA_API hsa_queue_load_write_index_relaxed(const hsa_queue_t *queue);
|
||||
void HSA_API hsa_queue_store_write_index_relaxed(const hsa_queue_t *queue,
|
||||
uint64_t value);
|
||||
void HSA_API hsa_queue_store_write_index_release(const hsa_queue_t *queue,
|
||||
uint64_t value);
|
||||
uint64_t HSA_API hsa_queue_cas_write_index_acq_rel(const hsa_queue_t *queue,
|
||||
uint64_t expected,
|
||||
uint64_t value);
|
||||
uint64_t HSA_API hsa_queue_cas_write_index_acquire(const hsa_queue_t *queue,
|
||||
uint64_t expected,
|
||||
uint64_t value);
|
||||
uint64_t HSA_API hsa_queue_cas_write_index_relaxed(const hsa_queue_t *queue,
|
||||
uint64_t expected,
|
||||
uint64_t value);
|
||||
uint64_t HSA_API hsa_queue_cas_write_index_release(const hsa_queue_t *queue,
|
||||
uint64_t expected,
|
||||
uint64_t value);
|
||||
uint64_t HSA_API
|
||||
hsa_queue_add_write_index_acq_rel(const hsa_queue_t *queue, uint64_t value);
|
||||
uint64_t HSA_API
|
||||
hsa_queue_add_write_index_acquire(const hsa_queue_t *queue, uint64_t value);
|
||||
uint64_t HSA_API
|
||||
hsa_queue_add_write_index_relaxed(const hsa_queue_t *queue, uint64_t value);
|
||||
uint64_t HSA_API
|
||||
hsa_queue_add_write_index_release(const hsa_queue_t *queue, uint64_t value);
|
||||
void HSA_API hsa_queue_store_read_index_relaxed(const hsa_queue_t *queue,
|
||||
uint64_t value);
|
||||
void HSA_API hsa_queue_store_read_index_release(const hsa_queue_t *queue,
|
||||
uint64_t value);
|
||||
hsa_status_t HSA_API hsa_agent_iterate_regions(
|
||||
hsa_agent_t agent,
|
||||
hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
|
||||
hsa_status_t HSA_API hsa_region_get_info(hsa_region_t region,
|
||||
hsa_region_info_t attribute,
|
||||
void *value);
|
||||
hsa_status_t HSA_API hsa_memory_register(void *address, size_t size);
|
||||
hsa_status_t HSA_API hsa_memory_deregister(void *address, size_t size);
|
||||
hsa_status_t HSA_API
|
||||
hsa_memory_allocate(hsa_region_t region, size_t size, void **ptr);
|
||||
hsa_status_t HSA_API hsa_memory_free(void *ptr);
|
||||
hsa_status_t HSA_API hsa_memory_copy(void *dst, const void *src, size_t size);
|
||||
hsa_status_t HSA_API hsa_memory_assign_agent(void *ptr, hsa_agent_t agent,
|
||||
hsa_access_permission_t access);
|
||||
hsa_status_t HSA_API
|
||||
hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
|
||||
const hsa_agent_t *consumers, hsa_signal_t *signal);
|
||||
hsa_status_t HSA_API hsa_signal_destroy(hsa_signal_t signal);
|
||||
hsa_signal_value_t HSA_API hsa_signal_load_relaxed(hsa_signal_t signal);
|
||||
hsa_signal_value_t HSA_API hsa_signal_load_acquire(hsa_signal_t signal);
|
||||
void HSA_API
|
||||
hsa_signal_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_store_release(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_wait_relaxed(hsa_signal_t signal,
|
||||
hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout_hint,
|
||||
hsa_wait_state_t wait_expectancy_hint);
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_wait_acquire(hsa_signal_t signal,
|
||||
hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout_hint,
|
||||
hsa_wait_state_t wait_expectancy_hint);
|
||||
void HSA_API
|
||||
hsa_signal_and_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_and_acquire(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_and_release(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_and_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_or_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_or_acquire(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_or_release(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_or_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_xor_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_xor_acquire(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_xor_release(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_xor_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_add_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_add_acquire(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_add_release(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_add_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_subtract_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_subtract_acquire(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_subtract_release(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
void HSA_API
|
||||
hsa_signal_subtract_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_exchange_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_exchange_acquire(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_exchange_release(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
hsa_signal_value_t HSA_API
|
||||
hsa_signal_exchange_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
|
||||
hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(hsa_signal_t signal,
|
||||
hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
hsa_signal_value_t HSA_API hsa_signal_cas_acquire(hsa_signal_t signal,
|
||||
hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
hsa_signal_value_t HSA_API hsa_signal_cas_release(hsa_signal_t signal,
|
||||
hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
hsa_signal_value_t HSA_API hsa_signal_cas_acq_rel(hsa_signal_t signal,
|
||||
hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
hsa_status_t hsa_isa_from_name(
|
||||
const char *name,
|
||||
hsa_isa_t *isa
|
||||
);
|
||||
hsa_status_t HSA_API hsa_isa_get_info(
|
||||
hsa_isa_t isa,
|
||||
hsa_isa_info_t attribute,
|
||||
uint32_t index,
|
||||
void *value
|
||||
);
|
||||
hsa_status_t hsa_isa_compatible(
|
||||
hsa_isa_t code_object_isa,
|
||||
hsa_isa_t agent_isa,
|
||||
bool *result
|
||||
);
|
||||
hsa_status_t HSA_API hsa_code_object_serialize(
|
||||
hsa_code_object_t code_object,
|
||||
hsa_status_t (*alloc_callback)(
|
||||
size_t size, hsa_callback_data_t data, void **address
|
||||
),
|
||||
hsa_callback_data_t callback_data,
|
||||
const char *options,
|
||||
void **serialized_code_object,
|
||||
size_t *serialized_code_object_size
|
||||
);
|
||||
hsa_status_t HSA_API hsa_code_object_deserialize(
|
||||
void *serialized_code_object,
|
||||
size_t serialized_code_object_size,
|
||||
const char *options,
|
||||
hsa_code_object_t *code_object
|
||||
);
|
||||
hsa_status_t HSA_API hsa_code_object_destroy(
|
||||
hsa_code_object_t code_object
|
||||
);
|
||||
hsa_status_t HSA_API hsa_code_object_get_info(
|
||||
hsa_code_object_t code_object,
|
||||
hsa_code_object_info_t attribute,
|
||||
void *value
|
||||
);
|
||||
hsa_status_t HSA_API hsa_code_object_get_symbol(
|
||||
hsa_code_object_t code_object,
|
||||
const char *symbol_name,
|
||||
hsa_code_symbol_t *symbol
|
||||
);
|
||||
hsa_status_t HSA_API hsa_code_symbol_get_info(
|
||||
hsa_code_symbol_t code_symbol,
|
||||
hsa_code_symbol_info_t attribute,
|
||||
void *value
|
||||
);
|
||||
hsa_status_t HSA_API hsa_code_object_iterate_symbols(
|
||||
hsa_code_object_t code_object,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_code_object_t code_object, hsa_code_symbol_t symbol, void *data
|
||||
),
|
||||
void *data
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_create(
|
||||
hsa_profile_t profile,
|
||||
hsa_executable_state_t executable_state,
|
||||
const char *options,
|
||||
hsa_executable_t *executable
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_destroy(
|
||||
hsa_executable_t executable
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_load_code_object(
|
||||
hsa_executable_t executable,
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
const char *options
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_freeze(
|
||||
hsa_executable_t executable,
|
||||
const char *options
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_get_info(
|
||||
hsa_executable_t executable,
|
||||
hsa_executable_info_t attribute,
|
||||
void *value
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_global_variable_define(
|
||||
hsa_executable_t executable,
|
||||
const char *variable_name,
|
||||
void *address
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_agent_global_variable_define(
|
||||
hsa_executable_t executable,
|
||||
hsa_agent_t agent,
|
||||
const char *variable_name,
|
||||
void *address
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_readonly_variable_define(
|
||||
hsa_executable_t executable,
|
||||
hsa_agent_t agent,
|
||||
const char *variable_name,
|
||||
void *address
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_validate(
|
||||
hsa_executable_t executable,
|
||||
uint32_t *result
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_get_symbol(
|
||||
hsa_executable_t executable,
|
||||
const char *module_name,
|
||||
const char *symbol_name,
|
||||
hsa_agent_t agent,
|
||||
int32_t call_convention,
|
||||
hsa_executable_symbol_t *symbol
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_symbol_get_info(
|
||||
hsa_executable_symbol_t executable_symbol,
|
||||
hsa_executable_symbol_info_t attribute,
|
||||
void *value
|
||||
);
|
||||
hsa_status_t HSA_API hsa_executable_iterate_symbols(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable, hsa_executable_symbol_t symbol, void *data
|
||||
),
|
||||
void *data
|
||||
);
|
||||
hsa_status_t HSA_API
|
||||
hsa_status_string(hsa_status_t status, const char **status_string);
|
||||
|
||||
}
|
||||
|
||||
#ifdef BUILDING_HSA_CORE_RUNTIME
|
||||
//This using declaration is deliberate!
|
||||
//We want unqualified name resolution to fail when building the runtime. This is a guard against accidental use of the intercept layer in the runtime.
|
||||
using namespace HSA;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,47 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "hsa_api_trace.h"
|
||||
|
||||
void hsa_table_interface_init(const ApiTable* table);
|
||||
|
||||
const ApiTable* hsa_table_interface_get_table();
|
||||
@@ -1,206 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// HSA runtime C++ interface file.
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_INC_INTERRUPT_SIGNAL_H_
|
||||
#define HSA_RUNTME_CORE_INC_INTERRUPT_SIGNAL_H_
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
namespace core {
|
||||
|
||||
/// @brief A Signal implementation using interrupts versus plain memory based.
|
||||
/// Also see base class Signal.
|
||||
///
|
||||
/// Breaks common/vendor separation - signals in general needs to be re-worked
|
||||
/// at the foundation level to make sense in a multi-device system.
|
||||
/// Supports only one waiter for now.
|
||||
/// KFD changes are needed to support multiple waiters and have device
|
||||
/// signaling.
|
||||
class InterruptSignal : public Signal {
|
||||
public:
|
||||
static HsaEvent* CreateEvent(HSA_EVENTTYPE type, bool manual_reset);
|
||||
static void DestroyEvent(HsaEvent* evt);
|
||||
|
||||
/// @brief Determines if a Signal* can be safely converted to an
|
||||
/// InterruptSignal* via static_cast.
|
||||
static __forceinline bool IsType(Signal* ptr) {
|
||||
return ptr->IsType(&rtti_id_);
|
||||
}
|
||||
|
||||
explicit InterruptSignal(hsa_signal_value_t initial_value,
|
||||
HsaEvent* use_event = NULL);
|
||||
|
||||
~InterruptSignal();
|
||||
|
||||
// Below are various methods corresponding to the APIs, which load/store the
|
||||
// signal value or modify the existing signal value automically and with
|
||||
// specified memory ordering semantics.
|
||||
|
||||
hsa_signal_value_t LoadRelaxed();
|
||||
|
||||
hsa_signal_value_t LoadAcquire();
|
||||
|
||||
void StoreRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void StoreRelease(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout, hsa_wait_state_t wait_hint);
|
||||
|
||||
hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout, hsa_wait_state_t wait_hint);
|
||||
|
||||
void AndRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void AndAcquire(hsa_signal_value_t value);
|
||||
|
||||
void AndRelease(hsa_signal_value_t value);
|
||||
|
||||
void AndAcqRel(hsa_signal_value_t value);
|
||||
|
||||
void OrRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void OrAcquire(hsa_signal_value_t value);
|
||||
|
||||
void OrRelease(hsa_signal_value_t value);
|
||||
|
||||
void OrAcqRel(hsa_signal_value_t value);
|
||||
|
||||
void XorRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void XorAcquire(hsa_signal_value_t value);
|
||||
|
||||
void XorRelease(hsa_signal_value_t value);
|
||||
|
||||
void XorAcqRel(hsa_signal_value_t value);
|
||||
|
||||
void AddRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void AddAcquire(hsa_signal_value_t value);
|
||||
|
||||
void AddRelease(hsa_signal_value_t value);
|
||||
|
||||
void AddAcqRel(hsa_signal_value_t value);
|
||||
|
||||
void SubRelaxed(hsa_signal_value_t value);
|
||||
|
||||
void SubAcquire(hsa_signal_value_t value);
|
||||
|
||||
void SubRelease(hsa_signal_value_t value);
|
||||
|
||||
void SubAcqRel(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t ExchAcquire(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t ExchRelease(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t CasAcquire(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t CasRelease(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
|
||||
hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value);
|
||||
|
||||
/// @brief See base class Signal.
|
||||
__forceinline hsa_signal_value_t* ValueLocation() const {
|
||||
return (hsa_signal_value_t*)&signal_.value;
|
||||
}
|
||||
|
||||
/// @brief See base class Signal.
|
||||
__forceinline HsaEvent* EopEvent() { return event_; }
|
||||
|
||||
// TODO(bwicakso) : work around for SDMA async copy. Bypass waiting on EOP
|
||||
// event because SDMA copy does not handle interrupt yet.
|
||||
__forceinline void DisableWaitEvent() { wait_on_event_ = false; }
|
||||
|
||||
/// @brief prevent throwing exceptions
|
||||
void* operator new(size_t size) { return malloc(size); }
|
||||
|
||||
/// @brief prevent throwing exceptions
|
||||
void operator delete(void* ptr) { free(ptr); }
|
||||
|
||||
protected:
|
||||
bool _IsA(rtti_t id) const { return id == &rtti_id_; }
|
||||
|
||||
private:
|
||||
/// @variable KFD event on which the interrupt signal is based on.
|
||||
HsaEvent* event_;
|
||||
|
||||
/// @variable Indicates whether the signal should release the event when it
|
||||
/// closes or not.
|
||||
bool free_event_;
|
||||
|
||||
// TODO(bwicakso) : work around for SDMA async copy. Bypass waiting on EOP
|
||||
// event because SDMA copy does not handle interrupt yet.
|
||||
bool wait_on_event_;
|
||||
|
||||
/// Used to obtain a globally unique value (address) for rtti.
|
||||
static int rtti_id_;
|
||||
|
||||
/// @brief Notify driver of signal value change if necessary.
|
||||
__forceinline void SetEvent() {
|
||||
std::atomic_signal_fence(std::memory_order_seq_cst);
|
||||
if (InWaiting()) hsaKmtSetEvent(event_);
|
||||
}
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(InterruptSignal);
|
||||
};
|
||||
|
||||
} // namespace core
|
||||
#endif // header guard
|
||||
@@ -1,164 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_ISA_H_
|
||||
#define HSA_RUNTIME_CORE_ISA_H_
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include "core/inc/amd_hsa_code.hpp"
|
||||
|
||||
namespace core {
|
||||
|
||||
// @class Isa
|
||||
// @brief Instruction Set Architecture
|
||||
class Isa final: public amd::hsa::common::Signed<0xB13594F2BD8F212D> {
|
||||
public:
|
||||
// @brief Isa's version type
|
||||
typedef std::tuple<int32_t, int32_t, int32_t> Version;
|
||||
|
||||
// @brief Default destructor
|
||||
~Isa() {}
|
||||
|
||||
// @returns Handle equivalent of @p isa_object
|
||||
static hsa_isa_t Handle(const Isa *isa_object) {
|
||||
hsa_isa_t isa_handle = { reinterpret_cast<uint64_t>(isa_object) };
|
||||
return isa_handle;
|
||||
}
|
||||
// @returns Object equivalend of @p isa_handle
|
||||
static Isa *Object(const hsa_isa_t &isa_handle) {
|
||||
Isa *isa_object = amd::hsa::common::ObjectAt<Isa>(isa_handle.handle);
|
||||
return isa_object;
|
||||
}
|
||||
|
||||
// @returns This Isa's version
|
||||
const Version &version() const {
|
||||
return version_;
|
||||
}
|
||||
|
||||
// @returns This Isa's vendor
|
||||
std::string GetVendor() const {
|
||||
return "AMD";
|
||||
}
|
||||
// @returns This Isa's architecture
|
||||
std::string GetArchitecture() const {
|
||||
return "AMDGPU";
|
||||
}
|
||||
// @returns This Isa's major version
|
||||
int32_t GetMajorVersion() const {
|
||||
return std::get<0>(version_);
|
||||
}
|
||||
// @returns This Isa's minor version
|
||||
int32_t GetMinorVersion() const {
|
||||
return std::get<1>(version_);
|
||||
}
|
||||
// @returns This Isa's stepping
|
||||
int32_t GetStepping() const {
|
||||
return std::get<2>(version_);
|
||||
}
|
||||
|
||||
// @returns True if this Isa is compatible with @p isa_object, false otherwise
|
||||
bool IsCompatible(const Isa *isa_object) const {
|
||||
assert(isa_object);
|
||||
return version_ == isa_object->version_;
|
||||
}
|
||||
// @returns True if this Isa is compatible with @p isa_handle, false otherwise
|
||||
bool IsCompatible(const hsa_isa_t &isa_handle) const {
|
||||
assert(isa_handle.handle);
|
||||
return IsCompatible(Object(isa_handle));
|
||||
}
|
||||
// @brief Isa is always in valid state
|
||||
bool IsValid() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
// @returns This Isa's full name
|
||||
std::string GetFullName() const;
|
||||
|
||||
// @brief Query value of requested @p attribute and record it in @p value
|
||||
bool GetInfo(const hsa_isa_info_t &attribute, void *value) const;
|
||||
|
||||
private:
|
||||
// @brief Default constructor
|
||||
Isa(): version_(Version(-1, -1, -1)) {}
|
||||
|
||||
// @brief Construct from @p version
|
||||
Isa(const Version &version): version_(version) {}
|
||||
|
||||
// @brief Isa's version
|
||||
Version version_;
|
||||
|
||||
// @brief Isa's friends
|
||||
friend class IsaRegistry;
|
||||
}; // class Isa
|
||||
|
||||
// @class IsaRegistry
|
||||
// @brief Instruction Set Architecture Registry
|
||||
class IsaRegistry final {
|
||||
public:
|
||||
// @returns Isa for requested @p full_name, null pointer if not supported
|
||||
static const Isa *GetIsa(const std::string &full_name);
|
||||
// @returns Isa for requested @p version, null pointer if not supported
|
||||
static const Isa *GetIsa(const Isa::Version &version);
|
||||
|
||||
private:
|
||||
// @brief IsaRegistry's map type
|
||||
typedef std::unordered_map<std::string, Isa> IsaMap;
|
||||
|
||||
// @brief Supported instruction set architectures
|
||||
static const IsaMap supported_isas_;
|
||||
|
||||
// @brief Default constructor - not available
|
||||
IsaRegistry();
|
||||
// @brief Default destructor - not available
|
||||
~IsaRegistry();
|
||||
|
||||
// @returns Supported instruction set architectures
|
||||
static const IsaMap GetSupportedIsas();
|
||||
}; // class IsaRegistry
|
||||
|
||||
} // namespace core
|
||||
|
||||
#endif // HSA_RUNTIME_CORE_ISA_HPP_
|
||||
@@ -1,109 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// HSA runtime C++ interface file.
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_INC_MEMORY_REGION_H_
|
||||
#define HSA_RUNTME_CORE_INC_MEMORY_REGION_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/agent.h"
|
||||
#include "core/inc/checked.h"
|
||||
|
||||
namespace core {
|
||||
class Agent;
|
||||
|
||||
class MemoryRegion : public Checked<0x9C961F19EE175BB3> {
|
||||
public:
|
||||
MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owner)
|
||||
: fine_grain_(fine_grain), full_profile_(full_profile), owner_(owner) {
|
||||
assert(owner_ != NULL);
|
||||
}
|
||||
|
||||
virtual ~MemoryRegion() {}
|
||||
|
||||
// Convert this object into hsa_region_t.
|
||||
static __forceinline hsa_region_t Convert(MemoryRegion* region) {
|
||||
const hsa_region_t region_handle = {
|
||||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(region))};
|
||||
return region_handle;
|
||||
}
|
||||
|
||||
static __forceinline const hsa_region_t Convert(const MemoryRegion* region) {
|
||||
const hsa_region_t region_handle = {
|
||||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(region))};
|
||||
return region_handle;
|
||||
}
|
||||
|
||||
// Convert hsa_region_t into MemoryRegion *.
|
||||
static __forceinline MemoryRegion* Convert(hsa_region_t region) {
|
||||
return reinterpret_cast<MemoryRegion*>(region.handle);
|
||||
}
|
||||
|
||||
virtual hsa_status_t Allocate(size_t size, void** address) const = 0;
|
||||
|
||||
virtual hsa_status_t Free(void* address, size_t size) const = 0;
|
||||
|
||||
// Translate memory properties into HSA region attribute.
|
||||
virtual hsa_status_t GetInfo(hsa_region_info_t attribute,
|
||||
void* value) const = 0;
|
||||
|
||||
virtual hsa_status_t AssignAgent(void* ptr, size_t size, const Agent& agent,
|
||||
hsa_access_permission_t access) const = 0;
|
||||
|
||||
__forceinline bool fine_grain() const { return fine_grain_; }
|
||||
|
||||
__forceinline bool full_profile() const { return full_profile_; }
|
||||
|
||||
__forceinline core::Agent* owner() const { return owner_; }
|
||||
|
||||
private:
|
||||
const bool fine_grain_;
|
||||
const bool full_profile_;
|
||||
|
||||
core::Agent* owner_;
|
||||
};
|
||||
} // namespace core
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,322 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// HSA runtime C++ interface file.
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_INC_COMMAND_QUEUE_H_
|
||||
#define HSA_RUNTME_CORE_INC_COMMAND_QUEUE_H_
|
||||
#include <sstream>
|
||||
|
||||
#include "core/common/shared.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/checked.h"
|
||||
|
||||
#include "core/util/utils.h"
|
||||
|
||||
#include "inc/amd_hsa_queue.h"
|
||||
|
||||
namespace core {
|
||||
struct AqlPacket {
|
||||
|
||||
union {
|
||||
hsa_kernel_dispatch_packet_t dispatch;
|
||||
hsa_barrier_and_packet_t barrier_and;
|
||||
hsa_barrier_or_packet_t barrier_or;
|
||||
hsa_agent_dispatch_packet_t agent;
|
||||
};
|
||||
|
||||
uint8_t type() {
|
||||
return ((dispatch.header >> HSA_PACKET_HEADER_TYPE) &
|
||||
((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1));
|
||||
}
|
||||
|
||||
bool IsValid() {
|
||||
const uint8_t packet_type = dispatch.header >> HSA_PACKET_HEADER_TYPE;
|
||||
return (packet_type > HSA_PACKET_TYPE_INVALID &&
|
||||
packet_type <= HSA_PACKET_TYPE_BARRIER_OR);
|
||||
}
|
||||
|
||||
std::string string() const {
|
||||
std::stringstream string;
|
||||
uint8_t type = ((dispatch.header >> HSA_PACKET_HEADER_TYPE) &
|
||||
((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1));
|
||||
|
||||
const char* type_names[] = {
|
||||
"HSA_PACKET_TYPE_VENDOR_SPECIFIC", "HSA_PACKET_TYPE_INVALID",
|
||||
"HSA_PACKET_TYPE_KERNEL_DISPATCH", "HSA_PACKET_TYPE_BARRIER_AND",
|
||||
"HSA_PACKET_TYPE_AGENT_DISPATCH", "HSA_PACKET_TYPE_BARRIER_OR"};
|
||||
|
||||
string << "type: " << type_names[type]
|
||||
<< "\nbarrier: " << ((dispatch.header >> HSA_PACKET_HEADER_BARRIER) &
|
||||
((1 << HSA_PACKET_HEADER_WIDTH_BARRIER) - 1))
|
||||
<< "\nacquire: "
|
||||
<< ((dispatch.header >> HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) &
|
||||
((1 << HSA_PACKET_HEADER_WIDTH_ACQUIRE_FENCE_SCOPE) - 1))
|
||||
<< "\nrelease: "
|
||||
<< ((dispatch.header >> HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE) &
|
||||
((1 << HSA_PACKET_HEADER_WIDTH_RELEASE_FENCE_SCOPE) - 1));
|
||||
|
||||
if (type == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
|
||||
string << "\nDim: " << dispatch.setup
|
||||
<< "\nworkgroup_size: " << dispatch.workgroup_size_x << ", "
|
||||
<< dispatch.workgroup_size_y << ", " << dispatch.workgroup_size_z
|
||||
<< "\ngrid_size: " << dispatch.grid_size_x << ", "
|
||||
<< dispatch.grid_size_y << ", " << dispatch.grid_size_z
|
||||
<< "\nprivate_size: " << dispatch.private_segment_size
|
||||
<< "\ngroup_size: " << dispatch.group_segment_size
|
||||
<< "\nkernel_object: " << dispatch.kernel_object
|
||||
<< "\nkern_arg: " << dispatch.kernarg_address
|
||||
<< "\nsignal: " << dispatch.completion_signal.handle;
|
||||
}
|
||||
|
||||
if ((type == HSA_PACKET_TYPE_BARRIER_AND) ||
|
||||
(type == HSA_PACKET_TYPE_BARRIER_OR)) {
|
||||
for (int i = 0; i < 5; i++)
|
||||
string << "\ndep[" << i << "]: " << barrier_and.dep_signal[i].handle;
|
||||
string << "\nsignal: " << barrier_and.completion_signal.handle;
|
||||
}
|
||||
|
||||
return string.str();
|
||||
}
|
||||
};
|
||||
|
||||
class Queue;
|
||||
|
||||
/// @brief Helper structure to simplify conversion of amd_queue_t and
|
||||
/// core::Queue object.
|
||||
struct SharedQueue {
|
||||
amd_queue_t amd_queue;
|
||||
Queue* core_queue;
|
||||
};
|
||||
|
||||
/// @brief Class Queue which encapsulate user mode queues and
|
||||
/// provides Api to access its Read, Write indices using Acquire,
|
||||
/// Release and Relaxed semantics.
|
||||
/*
|
||||
Queue is intended to be an pure interface class and may be wrapped or replaced
|
||||
by tools.
|
||||
All funtions other than Convert and public_handle must be virtual.
|
||||
*/
|
||||
class Queue : public Checked<0xFA3906A679F9DB49>,
|
||||
public Shared<SharedQueue, AMD_QUEUE_ALIGN_BYTES> {
|
||||
public:
|
||||
Queue() : Shared(), amd_queue_(shared_object()->amd_queue) {
|
||||
if (!Shared::IsSharedObjectAllocationValid()) {
|
||||
return;
|
||||
}
|
||||
|
||||
shared_object()->core_queue = this;
|
||||
|
||||
public_handle_ = Convert(this);
|
||||
}
|
||||
|
||||
virtual ~Queue() {}
|
||||
|
||||
/// @brief Returns the handle of Queue's public data type
|
||||
///
|
||||
/// @param queue Pointer to an instance of Queue implementation object
|
||||
///
|
||||
/// @return hsa_queue_t * Pointer to the public data type of a queue
|
||||
static __forceinline hsa_queue_t* Convert(Queue* queue) {
|
||||
return ((queue != NULL) && (queue->IsSharedObjectAllocationValid()))
|
||||
? &queue->amd_queue_.hsa_queue
|
||||
: NULL;
|
||||
}
|
||||
|
||||
/// @brief Transform the public data type of a Queue's data type into an
|
||||
// instance of it Queue class object
|
||||
///
|
||||
/// @param queue Handle of public data type of a queue
|
||||
///
|
||||
/// @return Queue * Pointer to the Queue's implementation object
|
||||
static __forceinline Queue* Convert(const hsa_queue_t* queue) {
|
||||
return (queue != NULL)
|
||||
? reinterpret_cast<const SharedQueue*>(
|
||||
reinterpret_cast<uintptr_t>(queue) -
|
||||
(reinterpret_cast<uintptr_t>(
|
||||
&reinterpret_cast<SharedQueue*>(1234)
|
||||
->amd_queue.hsa_queue) -
|
||||
uintptr_t(1234)))->core_queue
|
||||
: NULL;
|
||||
}
|
||||
|
||||
/// @brief Inactivate the queue object. Once inactivate a
|
||||
/// queue cannot be used anymore and must be destroyed
|
||||
///
|
||||
/// @return hsa_status_t Status of request
|
||||
virtual hsa_status_t Inactivate() = 0;
|
||||
|
||||
/// @brief Reads the Read Index of Queue using Acquire semantics
|
||||
///
|
||||
/// @return uint64_t Value of Read index
|
||||
virtual uint64_t LoadReadIndexAcquire() = 0;
|
||||
|
||||
/// @brief Reads the Read Index of Queue using Relaxed semantics
|
||||
///
|
||||
/// @return uint64_t Value of Read index
|
||||
virtual uint64_t LoadReadIndexRelaxed() = 0;
|
||||
|
||||
/// @brief Reads the Write Index of Queue using Acquire semantics
|
||||
///
|
||||
/// @return uint64_t Value of Write index
|
||||
virtual uint64_t LoadWriteIndexAcquire() = 0;
|
||||
|
||||
/// Reads the Write Index of Queue using Relaxed semantics
|
||||
///
|
||||
/// @return uint64_t Value of Write index
|
||||
virtual uint64_t LoadWriteIndexRelaxed() = 0;
|
||||
|
||||
/// @brief Updates the Read Index of Queue using Relaxed semantics
|
||||
///
|
||||
/// @param value New value of Read index to update
|
||||
virtual void StoreReadIndexRelaxed(uint64_t value) = 0;
|
||||
|
||||
/// @brief Updates the Read Index of Queue using Release semantics
|
||||
///
|
||||
/// @param value New value of Read index to update
|
||||
virtual void StoreReadIndexRelease(uint64_t value) = 0;
|
||||
|
||||
/// @brief Updates the Write Index of Queue using Relaxed semantics
|
||||
///
|
||||
/// @param value New value of Write index to update
|
||||
virtual void StoreWriteIndexRelaxed(uint64_t value) = 0;
|
||||
|
||||
/// @brief Updates the Write Index of Queue using Release semantics
|
||||
///
|
||||
/// @param value New value of Write index to update
|
||||
virtual void StoreWriteIndexRelease(uint64_t value) = 0;
|
||||
|
||||
/// @brief Compares and swaps Write index using Acquire and Release semantics
|
||||
///
|
||||
/// @param expected Current value of write index
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
virtual uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) = 0;
|
||||
|
||||
/// @brief Compares and swaps Write index using Acquire semantics
|
||||
///
|
||||
/// @param expected Current value of write index
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
virtual uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) = 0;
|
||||
|
||||
/// @brief Compares and swaps Write index using Relaxed semantics
|
||||
///
|
||||
/// @param expected Current value of write index
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
virtual uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) = 0;
|
||||
|
||||
/// @brief Compares and swaps Write index using Release semantics
|
||||
///
|
||||
/// @param expected Current value of write index
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
virtual uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) = 0;
|
||||
|
||||
/// @brief Updates the Write index using Acquire and Release semantics
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
virtual uint64_t AddWriteIndexAcqRel(uint64_t value) = 0;
|
||||
|
||||
/// @brief Updates the Write index using Acquire semantics
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
virtual uint64_t AddWriteIndexAcquire(uint64_t value) = 0;
|
||||
|
||||
/// @brief Updates the Write index using Relaxed semantics
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
virtual uint64_t AddWriteIndexRelaxed(uint64_t value) = 0;
|
||||
|
||||
/// @brief Updates the Write index using Release semantics
|
||||
///
|
||||
/// @param value Value of new write index
|
||||
///
|
||||
/// @return uint64_t Value of write index before the update
|
||||
virtual uint64_t AddWriteIndexRelease(uint64_t value) = 0;
|
||||
|
||||
/// @brief Set CU Masking
|
||||
///
|
||||
/// @param num_cu_mask_count size of mask bit array
|
||||
///
|
||||
/// @param cu_mask pointer to cu mask
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t SetCUMasking(const uint32_t num_cu_mask_count,
|
||||
const uint32_t* cu_mask) = 0;
|
||||
|
||||
// Handle of AMD Queue struct
|
||||
amd_queue_t& amd_queue_;
|
||||
|
||||
hsa_queue_t* public_handle() const { return public_handle_; }
|
||||
|
||||
protected:
|
||||
static void set_public_handle(Queue* ptr, hsa_queue_t* handle) {
|
||||
ptr->do_set_public_handle(handle);
|
||||
}
|
||||
virtual void do_set_public_handle(hsa_queue_t* handle) {
|
||||
public_handle_ = handle;
|
||||
}
|
||||
hsa_queue_t* public_handle_;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Queue);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,204 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// This file is used only for open source cmake builds, if we hardcode the
|
||||
// register values in amd_aql_queue.cpp then this file won't be required. For
|
||||
// now we are using this file where register details are spelled out in the
|
||||
// structs/unions below.
|
||||
#ifndef HSA_RUNTME_CORE_INC_REGISTERS_H_
|
||||
#define HSA_RUNTME_CORE_INC_REGISTERS_H_
|
||||
|
||||
typedef enum SQ_RSRC_BUF_TYPE {
|
||||
SQ_RSRC_BUF = 0x00000000,
|
||||
SQ_RSRC_BUF_RSVD_1 = 0x00000001,
|
||||
SQ_RSRC_BUF_RSVD_2 = 0x00000002,
|
||||
SQ_RSRC_BUF_RSVD_3 = 0x00000003,
|
||||
} SQ_RSRC_BUF_TYPE;
|
||||
|
||||
typedef enum BUF_DATA_FORMAT {
|
||||
BUF_DATA_FORMAT_INVALID = 0x00000000,
|
||||
BUF_DATA_FORMAT_8 = 0x00000001,
|
||||
BUF_DATA_FORMAT_16 = 0x00000002,
|
||||
BUF_DATA_FORMAT_8_8 = 0x00000003,
|
||||
BUF_DATA_FORMAT_32 = 0x00000004,
|
||||
BUF_DATA_FORMAT_16_16 = 0x00000005,
|
||||
BUF_DATA_FORMAT_10_11_11 = 0x00000006,
|
||||
BUF_DATA_FORMAT_11_11_10 = 0x00000007,
|
||||
BUF_DATA_FORMAT_10_10_10_2 = 0x00000008,
|
||||
BUF_DATA_FORMAT_2_10_10_10 = 0x00000009,
|
||||
BUF_DATA_FORMAT_8_8_8_8 = 0x0000000a,
|
||||
BUF_DATA_FORMAT_32_32 = 0x0000000b,
|
||||
BUF_DATA_FORMAT_16_16_16_16 = 0x0000000c,
|
||||
BUF_DATA_FORMAT_32_32_32 = 0x0000000d,
|
||||
BUF_DATA_FORMAT_32_32_32_32 = 0x0000000e,
|
||||
BUF_DATA_FORMAT_RESERVED_15 = 0x0000000f,
|
||||
} BUF_DATA_FORMAT;
|
||||
|
||||
typedef enum BUF_NUM_FORMAT {
|
||||
BUF_NUM_FORMAT_UNORM = 0x00000000,
|
||||
BUF_NUM_FORMAT_SNORM = 0x00000001,
|
||||
BUF_NUM_FORMAT_USCALED = 0x00000002,
|
||||
BUF_NUM_FORMAT_SSCALED = 0x00000003,
|
||||
BUF_NUM_FORMAT_UINT = 0x00000004,
|
||||
BUF_NUM_FORMAT_SINT = 0x00000005,
|
||||
BUF_NUM_FORMAT_SNORM_OGL__SI__CI = 0x00000006,
|
||||
BUF_NUM_FORMAT_RESERVED_6__VI = 0x00000006,
|
||||
BUF_NUM_FORMAT_FLOAT = 0x00000007,
|
||||
} BUF_NUM_FORMAT;
|
||||
|
||||
typedef enum SQ_SEL_XYZW01 {
|
||||
SQ_SEL_0 = 0x00000000,
|
||||
SQ_SEL_1 = 0x00000001,
|
||||
SQ_SEL_RESERVED_0 = 0x00000002,
|
||||
SQ_SEL_RESERVED_1 = 0x00000003,
|
||||
SQ_SEL_X = 0x00000004,
|
||||
SQ_SEL_Y = 0x00000005,
|
||||
SQ_SEL_Z = 0x00000006,
|
||||
SQ_SEL_W = 0x00000007,
|
||||
} SQ_SEL_XYZW01;
|
||||
|
||||
union COMPUTE_TMPRING_SIZE {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int WAVES : 12;
|
||||
unsigned int WAVESIZE : 13;
|
||||
unsigned int : 7;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int : 7;
|
||||
unsigned int WAVESIZE : 13;
|
||||
unsigned int WAVES : 12;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
|
||||
union SQ_BUF_RSRC_WORD0 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS : 32;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS : 32;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
|
||||
union SQ_BUF_RSRC_WORD1 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS_HI : 16;
|
||||
unsigned int STRIDE : 14;
|
||||
unsigned int CACHE_SWIZZLE : 1;
|
||||
unsigned int SWIZZLE_ENABLE : 1;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int SWIZZLE_ENABLE : 1;
|
||||
unsigned int CACHE_SWIZZLE : 1;
|
||||
unsigned int STRIDE : 14;
|
||||
unsigned int BASE_ADDRESS_HI : 16;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
|
||||
union SQ_BUF_RSRC_WORD2 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int NUM_RECORDS : 32;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int NUM_RECORDS : 32;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
|
||||
union SQ_BUF_RSRC_WORD3 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int DST_SEL_X : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int NUM_FORMAT : 3;
|
||||
unsigned int DATA_FORMAT : 4;
|
||||
unsigned int ELEMENT_SIZE : 2;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int ATC__CI__VI : 1;
|
||||
unsigned int HASH_ENABLE : 1;
|
||||
unsigned int HEAP : 1;
|
||||
unsigned int MTYPE__CI__VI : 3;
|
||||
unsigned int TYPE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int TYPE : 2;
|
||||
unsigned int MTYPE__CI__VI : 3;
|
||||
unsigned int HEAP : 1;
|
||||
unsigned int HASH_ENABLE : 1;
|
||||
unsigned int ATC__CI__VI : 1;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ELEMENT_SIZE : 2;
|
||||
unsigned int DATA_FORMAT : 4;
|
||||
unsigned int NUM_FORMAT : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_X : 3;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,498 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// HSA runtime C++ interface file.
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_INC_RUNTIME_H_
|
||||
#define HSA_RUNTME_CORE_INC_RUNTIME_H_
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include "core/inc/hsa_ext_interface.h"
|
||||
#include "core/inc/hsa_internal.h"
|
||||
|
||||
#include "core/inc/agent.h"
|
||||
#include "core/inc/memory_region.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/util/utils.h"
|
||||
#include "core/util/locks.h"
|
||||
#include "core/util/os.h"
|
||||
|
||||
#include "core/inc/amd_loader_context.hpp"
|
||||
#include "amd_hsa_code.hpp"
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// Constants //
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
#define HSA_ARGUMENT_ALIGN_BYTES 16
|
||||
#define HSA_QUEUE_ALIGN_BYTES 64
|
||||
#define HSA_PACKET_ALIGN_BYTES 64
|
||||
|
||||
namespace core {
|
||||
extern bool g_use_interrupt_wait;
|
||||
|
||||
/// @brief Runtime class provides the following functions:
|
||||
/// - open and close connection to kernel driver.
|
||||
/// - load supported extension library (image and finalizer).
|
||||
/// - load tools library.
|
||||
/// - expose supported agents.
|
||||
/// - allocate and free memory.
|
||||
/// - memory copy and fill.
|
||||
/// - grant access to memory (dgpu memory pool extension).
|
||||
/// - maintain loader state.
|
||||
/// - monitor asynchronous event from agent.
|
||||
class Runtime {
|
||||
public:
|
||||
/// @brief Structure to describe connectivity between agents.
|
||||
struct LinkInfo {
|
||||
uint32_t num_hop;
|
||||
hsa_amd_memory_pool_link_info_t info;
|
||||
};
|
||||
|
||||
/// @brief Open connection to kernel driver and increment reference count.
|
||||
/// @retval True if the connection to kernel driver is successfully opened.
|
||||
static bool Acquire();
|
||||
|
||||
/// @brief Checks if connection to kernel driver is opened.
|
||||
/// @retval True if the connection to kernel driver is opened.
|
||||
static bool IsOpen();
|
||||
|
||||
// @brief Callback handler for VM fault access.
|
||||
static bool VMFaultHandler(hsa_signal_value_t val, void* arg);
|
||||
|
||||
/// @brief Singleton object of the runtime.
|
||||
static Runtime* runtime_singleton_;
|
||||
|
||||
/// @brief Decrement reference count and close connection to kernel driver.
|
||||
/// @retval True if reference count is larger than 0.
|
||||
bool Release();
|
||||
|
||||
/// @brief Insert agent into agent list ::agents_.
|
||||
/// @param [in] agent Pointer to the agent object.
|
||||
void RegisterAgent(Agent* agent);
|
||||
|
||||
/// @brief Delete all agent objects from ::agents_.
|
||||
void DestroyAgents();
|
||||
|
||||
/// @brief Set the number of links connecting the agents in the platform.
|
||||
void SetLinkCount(size_t num_link);
|
||||
|
||||
/// @brief Register link information connecting @p node_id_from and @p
|
||||
/// node_id_to.
|
||||
/// @param [in] node_id_from Node id of the source node.
|
||||
/// @param [in] node_id_to Node id of the destination node.
|
||||
/// @param [in] link_info The link information between source and destination
|
||||
/// nodes.
|
||||
void RegisterLinkInfo(uint32_t node_id_from, uint32_t node_id_to,
|
||||
uint32_t num_hop,
|
||||
hsa_amd_memory_pool_link_info_t& link_info);
|
||||
|
||||
/// @brief Query link information between two nodes.
|
||||
/// @param [in] node_id_from Node id of the source node.
|
||||
/// @param [in] node_id_to Node id of the destination node.
|
||||
/// @retval The link information between source and destination nodes.
|
||||
const LinkInfo& GetLinkInfo(uint32_t node_id_from, uint32_t node_id_to);
|
||||
|
||||
/// @brief Invoke the user provided call back for each agent in the agent
|
||||
/// list.
|
||||
///
|
||||
/// @param [in] callback User provided callback function.
|
||||
/// @param [in] data User provided pointer as input for @p callback.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
|
||||
/// agent returns ::HSA_STATUS_SUCCESS.
|
||||
hsa_status_t IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent,
|
||||
void* data),
|
||||
void* data);
|
||||
|
||||
/// @brief Allocate memory on a particular region.
|
||||
///
|
||||
/// @param [in] region Pointer to region object.
|
||||
/// @param [in] size Allocation size in bytes.
|
||||
/// @param [out] address Pointer to store the allocation result.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS If allocation is successful.
|
||||
hsa_status_t AllocateMemory(const MemoryRegion* region, size_t size,
|
||||
void** address);
|
||||
|
||||
/// @brief Allocate memory on a particular region with option to restrict
|
||||
/// access to the owning agent.
|
||||
///
|
||||
/// @param [in] restrict_access If true, the allocation result would only be
|
||||
/// accessible to the agent(s) that own the region object.
|
||||
/// @param [in] region Pointer to region object.
|
||||
/// @param [in] size Allocation size in bytes.
|
||||
/// @param [out] address Pointer to store the allocation result.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS If allocation is successful.
|
||||
hsa_status_t AllocateMemory(bool restrict_access, const MemoryRegion* region,
|
||||
size_t size, void** address);
|
||||
|
||||
/// @brief Free memory previously allocated with AllocateMemory.
|
||||
///
|
||||
/// @param [in] ptr Address of the memory to be freed.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_ERROR If @p ptr is not the address of previous
|
||||
/// allocation via ::core::Runtime::AllocateMemory
|
||||
/// @retval ::HSA_STATUS_SUCCESS if @p ptr is successfully released.
|
||||
hsa_status_t FreeMemory(void* ptr);
|
||||
|
||||
/// @brief Blocking memory copy from src to dst.
|
||||
///
|
||||
/// @param [in] dst Memory address of the destination.
|
||||
/// @param [in] src Memory address of the source.
|
||||
/// @param [in] size Copy size in bytes.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS if memory copy is successful and completed.
|
||||
hsa_status_t CopyMemory(void* dst, const void* src, size_t size);
|
||||
|
||||
/// @brief Non-blocking memory copy from src to dst.
|
||||
///
|
||||
/// @details The memory copy will be performed after all signals in
|
||||
/// @p dep_signals have value of 0. On completion @p completion_signal
|
||||
/// will be decremented.
|
||||
///
|
||||
/// @param [in] dst Memory address of the destination.
|
||||
/// @param [in] dst_agent Agent object associated with the destination. This
|
||||
/// agent should be able to access the destination and source.
|
||||
/// @param [in] src Memory address of the source.
|
||||
/// @param [in] src_agent Agent object associated with the source. This
|
||||
/// agent should be able to access the destination and source.
|
||||
/// @param [in] size Copy size in bytes.
|
||||
/// @param [in] dep_signals Array of signal dependency.
|
||||
/// @param [in] completion_signal Completion signal object.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS if copy command has been submitted
|
||||
/// successfully to the agent DMA queue.
|
||||
hsa_status_t CopyMemory(void* dst, core::Agent& dst_agent, const void* src,
|
||||
core::Agent& src_agent, size_t size,
|
||||
std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& completion_signal);
|
||||
|
||||
/// @brief Fill the first @p count of uint32_t in ptr with value.
|
||||
///
|
||||
/// @param [in] ptr Memory address to be filled.
|
||||
/// @param [in] value The value/pattern that will be used to set @p ptr.
|
||||
/// @param [in] count Number of uint32_t element to be set.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS if memory fill is successful and completed.
|
||||
hsa_status_t FillMemory(void* ptr, uint32_t value, size_t count);
|
||||
|
||||
/// @brief Set agents as the whitelist to access ptr.
|
||||
///
|
||||
/// @param [in] num_agents The number of agent handles in @p agents array.
|
||||
/// @param [in] agents Agent handle array.
|
||||
/// @param [in] ptr Pointer of memory previously allocated via
|
||||
/// core::Runtime::AllocateMemory.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS The whitelist has been configured
|
||||
/// successfully and all agents in the @p agents could start accessing @p ptr.
|
||||
hsa_status_t AllowAccess(uint32_t num_agents, const hsa_agent_t* agents,
|
||||
const void* ptr);
|
||||
|
||||
/// @brief Query system information.
|
||||
///
|
||||
/// @param [in] attribute System info attribute to query.
|
||||
/// @param [out] value Pointer to store the attribute value.
|
||||
///
|
||||
/// @retval HSA_STATUS_SUCCESS The attribute is valid and the @p value is
|
||||
/// set.
|
||||
hsa_status_t GetSystemInfo(hsa_system_info_t attribute, void* value);
|
||||
|
||||
/// @brief Query next available queue id.
|
||||
///
|
||||
/// @retval Next available queue id.
|
||||
uint32_t GetQueueId();
|
||||
|
||||
/// @brief Register a callback function @p handler that is associated with
|
||||
/// @p signal to asynchronous event monitor thread.
|
||||
///
|
||||
/// @param [in] signal Signal handle associated with @p handler.
|
||||
/// @param [in] cond The condition to execute the @p handler.
|
||||
/// @param [in] value The value to compare with @p signal value. If the
|
||||
/// comparison satisfy @p cond, the @p handler will be called.
|
||||
/// @param [in] arg Pointer to the argument that will be provided to @p
|
||||
/// handler.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS Registration is successful.
|
||||
hsa_status_t SetAsyncSignalHandler(hsa_signal_t signal,
|
||||
hsa_signal_condition_t cond,
|
||||
hsa_signal_value_t value,
|
||||
hsa_amd_signal_handler handler, void* arg);
|
||||
|
||||
hsa_status_t InteropMap(uint32_t num_agents, Agent** agents,
|
||||
int interop_handle, uint32_t flags, size_t* size,
|
||||
void** ptr, size_t* metadata_size,
|
||||
const void** metadata);
|
||||
|
||||
hsa_status_t InteropUnmap(void* ptr);
|
||||
|
||||
const std::vector<Agent*>& cpu_agents() { return cpu_agents_; }
|
||||
|
||||
const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }
|
||||
|
||||
|
||||
const std::vector<uint32_t>& gpu_ids() { return gpu_ids_; }
|
||||
|
||||
Agent* blit_agent() { return blit_agent_; }
|
||||
|
||||
Agent* host_agent() { return host_agent_; }
|
||||
|
||||
const std::vector<const MemoryRegion*>& system_regions_fine() const {
|
||||
return system_regions_fine_;
|
||||
}
|
||||
|
||||
const std::vector<const MemoryRegion*>& system_regions_coarse() const {
|
||||
return system_regions_coarse_;
|
||||
}
|
||||
|
||||
amd::hsa::loader::Loader* loader() { return loader_; }
|
||||
|
||||
amd::LoaderContext* loader_context() { return &loader_context_; }
|
||||
|
||||
amd::hsa::code::AmdHsaCodeManager* code_manager() { return &code_manager_; }
|
||||
|
||||
std::function<void*(size_t, size_t)>& system_allocator() {
|
||||
return system_allocator_;
|
||||
}
|
||||
|
||||
std::function<void(void*)>& system_deallocator() {
|
||||
return system_deallocator_;
|
||||
}
|
||||
|
||||
ExtensionEntryPoints extensions_;
|
||||
|
||||
protected:
|
||||
static void AsyncEventsLoop(void*);
|
||||
|
||||
struct AllocationRegion {
|
||||
AllocationRegion() : region(NULL), assigned_agent_(NULL), size(0) {}
|
||||
AllocationRegion(const MemoryRegion* region_arg, size_t size_arg)
|
||||
: region(region_arg), assigned_agent_(NULL), size(size_arg) {}
|
||||
|
||||
const MemoryRegion* region;
|
||||
const Agent* assigned_agent_;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
struct AsyncEventsControl {
|
||||
AsyncEventsControl() : async_events_thread_(NULL) {}
|
||||
void Shutdown();
|
||||
|
||||
hsa_signal_t wake;
|
||||
os::Thread async_events_thread_;
|
||||
KernelMutex lock;
|
||||
bool exit;
|
||||
};
|
||||
|
||||
struct AsyncEvents {
|
||||
void PushBack(hsa_signal_t signal, hsa_signal_condition_t cond,
|
||||
hsa_signal_value_t value, hsa_amd_signal_handler handler,
|
||||
void* arg);
|
||||
|
||||
void CopyIndex(size_t dst, size_t src);
|
||||
|
||||
size_t Size();
|
||||
|
||||
void PopBack();
|
||||
|
||||
void Clear();
|
||||
|
||||
std::vector<hsa_signal_t> signal_;
|
||||
std::vector<hsa_signal_condition_t> cond_;
|
||||
std::vector<hsa_signal_value_t> value_;
|
||||
std::vector<hsa_amd_signal_handler> handler_;
|
||||
std::vector<void*> arg_;
|
||||
};
|
||||
|
||||
// Will be created before any user could call hsa_init but also could be
|
||||
// destroyed before incorrectly written programs call hsa_shutdown.
|
||||
static KernelMutex bootstrap_lock_;
|
||||
|
||||
Runtime();
|
||||
|
||||
Runtime(const Runtime&);
|
||||
|
||||
Runtime& operator=(const Runtime&);
|
||||
|
||||
~Runtime() {}
|
||||
|
||||
/// @brief Open connection to kernel driver.
|
||||
void Load();
|
||||
|
||||
/// @brief Close connection to kernel driver and cleanup resources.
|
||||
void Unload();
|
||||
|
||||
/// @brief Dynamically load extension libraries (images, finalizer) and
|
||||
/// call OnLoad method on each loaded library.
|
||||
void LoadExtensions();
|
||||
|
||||
/// @brief Call OnUnload method on each extension library then close it.
|
||||
void UnloadExtensions();
|
||||
|
||||
/// @brief Dynamically load tool libraries and call OnUnload method on each
|
||||
/// loaded library.
|
||||
void LoadTools();
|
||||
|
||||
/// @brief Call OnUnload method of each tool library.
|
||||
void UnloadTools();
|
||||
|
||||
/// @brief Close tool libraries.
|
||||
void CloseTools();
|
||||
|
||||
// @brief Binds virtual memory access fault handler to this node.
|
||||
void BindVmFaultHandler();
|
||||
|
||||
/// @brief Blocking memory copy from src to dst. One of the src or dst
|
||||
/// is user pointer. A particular setup need to be made if the DMA queue
|
||||
/// for the memory copy belongs to a dGPU agent. E.g: pin the user pointer
|
||||
/// before copying, or using a staging buffer.
|
||||
///
|
||||
/// @param [in] dst Memory address of the destination.
|
||||
/// @param [in] src Memory address of the source.
|
||||
/// @param [in] size Copy size in bytes.
|
||||
/// @param [in] dst_malloc If true, then @p dst is the user pointer. Otherwise
|
||||
/// @p src is the user pointer.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS if memory copy is successful and completed.
|
||||
hsa_status_t CopyMemoryHostAlloc(void* dst, const void* src, size_t size,
|
||||
bool dst_malloc);
|
||||
|
||||
/// @brief Get the index of ::link_matrix_.
|
||||
/// @param [in] node_id_from Node id of the source node.
|
||||
/// @param [in] node_id_to Node id of the destination node.
|
||||
/// @retval Index in ::link_matrix_.
|
||||
uint32_t GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to);
|
||||
|
||||
// Mutex object to protect multithreaded access to ::Acquire and ::Release.
|
||||
KernelMutex kernel_lock_;
|
||||
|
||||
// Mutex object to protect multithreaded access to ::allocation_map_.
|
||||
KernelMutex memory_lock_;
|
||||
|
||||
// Array containing tools library handles.
|
||||
std::vector<os::LibHandle> tool_libs_;
|
||||
|
||||
// Agent list containing all CPU agents in the platform.
|
||||
std::vector<Agent*> cpu_agents_;
|
||||
|
||||
// Agent list containing all compatible GPU agents in the platform.
|
||||
std::vector<Agent*> gpu_agents_;
|
||||
|
||||
// Agent list containing all compatible gpu agent ids in the platform.
|
||||
std::vector<uint32_t> gpu_ids_;
|
||||
|
||||
// List of all fine grain system memory region in the platform.
|
||||
std::vector<const MemoryRegion*> system_regions_fine_;
|
||||
|
||||
// List of all coarse grain system memory region in the platform.
|
||||
std::vector<const MemoryRegion*> system_regions_coarse_;
|
||||
|
||||
// Matrix of IO link.
|
||||
std::vector<LinkInfo> link_matrix_;
|
||||
|
||||
// Loader instance.
|
||||
amd::hsa::loader::Loader* loader_;
|
||||
|
||||
// Loader context.
|
||||
amd::LoaderContext loader_context_;
|
||||
|
||||
// Code object manager.
|
||||
amd::hsa::code::AmdHsaCodeManager code_manager_;
|
||||
|
||||
// Contains the region, address, and size of previously allocated memory.
|
||||
std::map<const void*, AllocationRegion> allocation_map_;
|
||||
|
||||
// Allocator using ::system_region_
|
||||
std::function<void*(size_t, size_t)> system_allocator_;
|
||||
|
||||
// Deallocator using ::system_region_
|
||||
std::function<void(void*)> system_deallocator_;
|
||||
|
||||
// Pointer to a host/cpu agent object.
|
||||
Agent* host_agent_;
|
||||
|
||||
// Pointer to DMA agent.
|
||||
Agent* blit_agent_;
|
||||
|
||||
AsyncEventsControl async_events_control_;
|
||||
|
||||
AsyncEvents async_events_;
|
||||
|
||||
AsyncEvents new_async_events_;
|
||||
|
||||
// Queue id counter.
|
||||
uint32_t queue_count_;
|
||||
|
||||
// Starting address of SVM address space.
|
||||
// On APU the cpu and gpu could access the area inside starting and end of
|
||||
// the SVM address space.
|
||||
// On dGPU, only the gpu is guaranteed to have access to the area inside the
|
||||
// SVM address space, since it maybe backed by private gpu VRAM.
|
||||
uintptr_t start_svm_address_;
|
||||
|
||||
// End address of SVM address space.
|
||||
// start_svm_address_ + size
|
||||
uintptr_t end_svm_address_;
|
||||
|
||||
// System clock frequency.
|
||||
uint64_t sys_clock_freq_;
|
||||
|
||||
// @brief AMD HSA event to monitor for virtual memory access fault.
|
||||
HsaEvent* vm_fault_event_;
|
||||
|
||||
// @brief HSA signal to contain the VM fault event.
|
||||
Signal* vm_fault_signal_;
|
||||
|
||||
// Holds reference count to runtime object.
|
||||
volatile uint32_t ref_count_;
|
||||
|
||||
// Frees runtime memory when the runtime library is unloaded if safe to do so.
|
||||
// Failure to release the runtime indicates an incorrect application but is
|
||||
// common (example: calls library routines at process exit).
|
||||
friend class RuntimeCleanup;
|
||||
};
|
||||
|
||||
} // namespace core
|
||||
#endif // header guard
|
||||
@@ -1,269 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// HSA runtime C++ interface file.
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_INC_SIGNAL_H_
|
||||
#define HSA_RUNTME_CORE_INC_SIGNAL_H_
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
#include "core/common/shared.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/checked.h"
|
||||
|
||||
#include "core/util/utils.h"
|
||||
|
||||
#include "inc/amd_hsa_signal.h"
|
||||
|
||||
namespace core {
|
||||
class Signal;
|
||||
|
||||
/// @brief Helper structure to simplify conversion of amd_signal_t and
|
||||
/// core::Signal object.
|
||||
struct SharedSignal {
|
||||
amd_signal_t amd_signal;
|
||||
Signal* core_signal;
|
||||
};
|
||||
|
||||
/// @brief An abstract base class which helps implement the public hsa_signal_t
|
||||
/// type (an opaque handle) and its associated APIs. At its core, signal uses
|
||||
/// a 32 or 64 bit value. This value can be waitied on or signaled atomically
|
||||
/// using specified memory ordering semantics.
|
||||
class Signal : public Checked<0x71FCCA6A3D5D5276>,
|
||||
public Shared<SharedSignal, AMD_SIGNAL_ALIGN_BYTES> {
|
||||
public:
|
||||
/// @brief Constructor initializes the signal with initial value.
|
||||
explicit Signal(hsa_signal_value_t initial_value)
|
||||
: Shared(), signal_(shared_object()->amd_signal) {
|
||||
if (!Shared::IsSharedObjectAllocationValid()) {
|
||||
invalid_ = true;
|
||||
return;
|
||||
}
|
||||
|
||||
shared_object()->core_signal = this;
|
||||
|
||||
signal_.kind = AMD_SIGNAL_KIND_INVALID;
|
||||
signal_.value = initial_value;
|
||||
invalid_ = false;
|
||||
waiting_ = 0;
|
||||
retained_ = 0;
|
||||
}
|
||||
|
||||
virtual ~Signal() { signal_.kind = AMD_SIGNAL_KIND_INVALID; }
|
||||
|
||||
bool IsValid() const {
|
||||
if (CheckedType::IsValid() && !invalid_) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// @brief Converts from this implementation class to the public
|
||||
/// hsa_signal_t type - an opaque handle.
|
||||
static __forceinline hsa_signal_t Convert(Signal* signal) {
|
||||
const uint64_t handle =
|
||||
(signal != NULL && signal->IsValid())
|
||||
? static_cast<uint64_t>(
|
||||
reinterpret_cast<uintptr_t>(&signal->signal_))
|
||||
: 0;
|
||||
const hsa_signal_t signal_handle = {handle};
|
||||
return signal_handle;
|
||||
}
|
||||
|
||||
/// @brief Converts from this implementation class to the public
|
||||
/// hsa_signal_t type - an opaque handle.
|
||||
static __forceinline const hsa_signal_t Convert(const Signal* signal) {
|
||||
const uint64_t handle =
|
||||
(signal != NULL && signal->IsValid())
|
||||
? static_cast<uint64_t>(
|
||||
reinterpret_cast<uintptr_t>(&signal->signal_))
|
||||
: 0;
|
||||
const hsa_signal_t signal_handle = {handle};
|
||||
return signal_handle;
|
||||
}
|
||||
|
||||
/// @brief Converts from public hsa_signal_t type (an opaque handle) to
|
||||
/// this implementation class object.
|
||||
static __forceinline Signal* Convert(hsa_signal_t signal) {
|
||||
return (signal.handle != 0)
|
||||
? reinterpret_cast<const SharedSignal*>(
|
||||
static_cast<uintptr_t>(signal.handle) -
|
||||
(reinterpret_cast<uintptr_t>(
|
||||
&reinterpret_cast<SharedSignal*>(1234)->amd_signal) -
|
||||
uintptr_t(1234)))->core_signal
|
||||
: NULL;
|
||||
}
|
||||
|
||||
// Below are various methods corresponding to the APIs, which load/store the
|
||||
// signal value or modify the existing signal value automically and with
|
||||
// specified memory ordering semantics.
|
||||
virtual hsa_signal_value_t LoadRelaxed() = 0;
|
||||
virtual hsa_signal_value_t LoadAcquire() = 0;
|
||||
|
||||
virtual void StoreRelaxed(hsa_signal_value_t value) = 0;
|
||||
virtual void StoreRelease(hsa_signal_value_t value) = 0;
|
||||
|
||||
virtual hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout,
|
||||
hsa_wait_state_t wait_hint) = 0;
|
||||
virtual hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout,
|
||||
hsa_wait_state_t wait_hint) = 0;
|
||||
|
||||
virtual void AndRelaxed(hsa_signal_value_t value) = 0;
|
||||
virtual void AndAcquire(hsa_signal_value_t value) = 0;
|
||||
virtual void AndRelease(hsa_signal_value_t value) = 0;
|
||||
virtual void AndAcqRel(hsa_signal_value_t value) = 0;
|
||||
|
||||
virtual void OrRelaxed(hsa_signal_value_t value) = 0;
|
||||
virtual void OrAcquire(hsa_signal_value_t value) = 0;
|
||||
virtual void OrRelease(hsa_signal_value_t value) = 0;
|
||||
virtual void OrAcqRel(hsa_signal_value_t value) = 0;
|
||||
|
||||
virtual void XorRelaxed(hsa_signal_value_t value) = 0;
|
||||
virtual void XorAcquire(hsa_signal_value_t value) = 0;
|
||||
virtual void XorRelease(hsa_signal_value_t value) = 0;
|
||||
virtual void XorAcqRel(hsa_signal_value_t value) = 0;
|
||||
|
||||
virtual void AddRelaxed(hsa_signal_value_t value) = 0;
|
||||
virtual void AddAcquire(hsa_signal_value_t value) = 0;
|
||||
virtual void AddRelease(hsa_signal_value_t value) = 0;
|
||||
virtual void AddAcqRel(hsa_signal_value_t value) = 0;
|
||||
|
||||
virtual void SubRelaxed(hsa_signal_value_t value) = 0;
|
||||
virtual void SubAcquire(hsa_signal_value_t value) = 0;
|
||||
virtual void SubRelease(hsa_signal_value_t value) = 0;
|
||||
virtual void SubAcqRel(hsa_signal_value_t value) = 0;
|
||||
|
||||
virtual hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value) = 0;
|
||||
virtual hsa_signal_value_t ExchAcquire(hsa_signal_value_t value) = 0;
|
||||
virtual hsa_signal_value_t ExchRelease(hsa_signal_value_t value) = 0;
|
||||
virtual hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value) = 0;
|
||||
|
||||
virtual hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) = 0;
|
||||
virtual hsa_signal_value_t CasAcquire(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) = 0;
|
||||
virtual hsa_signal_value_t CasRelease(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) = 0;
|
||||
virtual hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) = 0;
|
||||
|
||||
//-------------------------
|
||||
// implementation specific
|
||||
//-------------------------
|
||||
typedef void* rtti_t;
|
||||
|
||||
/// @brief Returns the address of the value.
|
||||
virtual hsa_signal_value_t* ValueLocation() const = 0;
|
||||
|
||||
/// @brief Applies only to InterrupEvent type, returns the event used to.
|
||||
/// Returns NULL for DefaultEvent Type.
|
||||
virtual HsaEvent* EopEvent() = 0;
|
||||
|
||||
/// @brief Waits until any signal in the list satisfies its condition or
|
||||
/// timeout is reached.
|
||||
/// Returns the index of a satisfied signal. Returns -1 on timeout and
|
||||
/// errors.
|
||||
static uint32_t WaitAny(uint32_t signal_count, hsa_signal_t* hsa_signals,
|
||||
hsa_signal_condition_t* conds,
|
||||
hsa_signal_value_t* values, uint64_t timeout_hint,
|
||||
hsa_wait_state_t wait_hint,
|
||||
hsa_signal_value_t* satisfying_value);
|
||||
|
||||
__forceinline bool IsType(rtti_t id) { return _IsA(id); }
|
||||
|
||||
/// @brief Allows special case interaction with signal destruction cleanup.
|
||||
void Retain() { atomic::Increment(&retained_); }
|
||||
void Release() { atomic::Decrement(&retained_); }
|
||||
|
||||
/// @brief Checks if signal is currently in use such that it should not be
|
||||
/// deleted.
|
||||
bool InUse() const { return (retained_ != 0) || (waiting_ != 0); }
|
||||
|
||||
/// @brief Checks if signal is currently in use by a wait API.
|
||||
bool InWaiting() const { return waiting_ != 0; }
|
||||
|
||||
/// @brief Structure which defines key signal elements like type and value.
|
||||
/// Address of this struct is used as a value for the opaque handle of type
|
||||
/// hsa_signal_t provided to the public API.
|
||||
amd_signal_t& signal_;
|
||||
|
||||
protected:
|
||||
/// @brief Simple RTTI type checking helper
|
||||
/// Returns true if the object can be converted to the query type via
|
||||
/// static_cast.
|
||||
/// Do not use directly. Use IsType in the desired derived type instead.
|
||||
virtual bool _IsA(rtti_t id) const = 0;
|
||||
|
||||
/// @variable Indicates if signal is valid or not.
|
||||
volatile bool invalid_;
|
||||
|
||||
/// @variable Indicates number of runtime threads waiting on this signal.
|
||||
/// Value of zero means no waits.
|
||||
volatile uint32_t waiting_;
|
||||
|
||||
volatile uint32_t retained_;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Signal);
|
||||
};
|
||||
|
||||
struct hsa_signal_handle {
|
||||
hsa_signal_t signal;
|
||||
|
||||
hsa_signal_handle() {}
|
||||
hsa_signal_handle(hsa_signal_t Signal) { signal = Signal; }
|
||||
operator hsa_signal_t() { return signal; }
|
||||
Signal* operator->() { return core::Signal::Convert(signal); }
|
||||
};
|
||||
static_assert(
|
||||
sizeof(hsa_signal_handle) == sizeof(hsa_signal_t),
|
||||
"hsa_signal_handle and hsa_signal_t must have identical binary layout.");
|
||||
static_assert(
|
||||
sizeof(hsa_signal_handle[2]) == sizeof(hsa_signal_t[2]),
|
||||
"hsa_signal_handle and hsa_signal_t must have identical binary layout.");
|
||||
|
||||
} // namespace core
|
||||
#endif // header guard
|
||||
@@ -1,856 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/amd_aql_queue.h"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/inc/queue.h"
|
||||
#include "core/util/utils.h"
|
||||
#include "core/inc/registers.h"
|
||||
#include "core/inc/interrupt_signal.h"
|
||||
|
||||
namespace amd {
|
||||
// Queue::amd_queue_ is cache-aligned for performance.
|
||||
const uint32_t kAmdQueueAlignBytes = 0x40;
|
||||
|
||||
HsaEvent* AqlQueue::queue_event_ = NULL;
|
||||
volatile uint32_t AqlQueue::queue_count_ = 0;
|
||||
KernelMutex AqlQueue::queue_lock_;
|
||||
int AqlQueue::rtti_id_;
|
||||
|
||||
void* AqlQueue::operator new(size_t size) {
|
||||
// Align base to 64B to enforce amd_queue_ member alignment.
|
||||
return _aligned_malloc(size, kAmdQueueAlignBytes);
|
||||
}
|
||||
|
||||
void AqlQueue::operator delete(void* ptr) { _aligned_free(ptr); }
|
||||
|
||||
AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id,
|
||||
ScratchInfo& scratch, core::HsaEventCallback callback,
|
||||
void* err_data, bool is_kv)
|
||||
: Queue(),
|
||||
Signal(0),
|
||||
ring_buf_(NULL),
|
||||
ring_buf_alloc_bytes_(0),
|
||||
queue_id_(HSA_QUEUEID(-1)),
|
||||
valid_(false),
|
||||
agent_(agent),
|
||||
queue_scratch_(scratch),
|
||||
errors_callback_(callback),
|
||||
errors_data_(err_data),
|
||||
is_kv_queue_(is_kv) {
|
||||
if (!Queue::Shared::IsSharedObjectAllocationValid()) {
|
||||
return;
|
||||
}
|
||||
|
||||
hsa_status_t stat = agent_->GetInfo(HSA_AGENT_INFO_PROFILE, &agent_profile_);
|
||||
assert(stat == HSA_STATUS_SUCCESS);
|
||||
|
||||
const core::Isa* isa = agent_->isa();
|
||||
|
||||
// When queue_full_workaround_ is set to 1, the ring buffer is internally
|
||||
// doubled in size. Virtual addresses in the upper half of the ring allocation
|
||||
// are mapped to the same set of pages backing the lower half.
|
||||
// Values written to the HW doorbell are modulo the doubled size.
|
||||
// This allows the HW to accept (doorbell == last_doorbell + queue_size).
|
||||
// This workaround is required for GFXIP 7 and GFXIP 8 ASICs.
|
||||
queue_full_workaround_ =
|
||||
(isa->GetMajorVersion() == 7 || isa->GetMajorVersion() == 8)
|
||||
? 1
|
||||
: 0;
|
||||
|
||||
// Identify doorbell semantics for this agent.
|
||||
doorbell_type_ = agent->properties().Capability.ui32.DoorbellType;
|
||||
|
||||
// Queue size is a function of several restrictions.
|
||||
const uint32_t min_pkts = ComputeRingBufferMinPkts();
|
||||
const uint32_t max_pkts = ComputeRingBufferMaxPkts();
|
||||
|
||||
// Apply sizing constraints to the ring buffer.
|
||||
uint32_t queue_size_pkts = uint32_t(req_size_pkts);
|
||||
queue_size_pkts = Min(queue_size_pkts, max_pkts);
|
||||
queue_size_pkts = Max(queue_size_pkts, min_pkts);
|
||||
|
||||
uint32_t queue_size_bytes = queue_size_pkts * sizeof(core::AqlPacket);
|
||||
if ((queue_size_bytes & (queue_size_bytes - 1)) != 0) return;
|
||||
|
||||
// Allocate the AQL packet ring buffer.
|
||||
AllocRegisteredRingBuffer(queue_size_pkts);
|
||||
if (ring_buf_ == NULL) return;
|
||||
MAKE_NAMED_SCOPE_GUARD(RingGuard, [&]() { FreeRegisteredRingBuffer(); });
|
||||
|
||||
// Fill the ring buffer with ALWAYS_RESERVED packet headers.
|
||||
// Leave packet content uninitialized to help track errors.
|
||||
for (uint32_t pkt_id = 0; pkt_id < queue_size_pkts; ++pkt_id) {
|
||||
((uint32_t*)ring_buf_)[16 * pkt_id] = HSA_PACKET_TYPE_INVALID;
|
||||
}
|
||||
|
||||
// Zero the amd_queue_ structure to clear RPTR/WPTR before queue attach.
|
||||
memset(&amd_queue_, 0, sizeof(amd_queue_));
|
||||
|
||||
// Initialize and map a HW AQL queue.
|
||||
HsaQueueResource queue_rsrc = {0};
|
||||
queue_rsrc.Queue_read_ptr_aql = (uint64_t*)&amd_queue_.read_dispatch_id;
|
||||
queue_rsrc.Queue_write_ptr_aql =
|
||||
(uint64_t*)&amd_queue_.max_legacy_doorbell_dispatch_id_plus_1;
|
||||
|
||||
HSAKMT_STATUS kmt_status;
|
||||
kmt_status = hsaKmtCreateQueue(node_id, HSA_QUEUE_COMPUTE_AQL, 100,
|
||||
HSA_QUEUE_PRIORITY_NORMAL, ring_buf_,
|
||||
ring_buf_alloc_bytes_, NULL, &queue_rsrc);
|
||||
if (kmt_status != HSAKMT_STATUS_SUCCESS) return;
|
||||
queue_id_ = queue_rsrc.QueueId;
|
||||
MAKE_NAMED_SCOPE_GUARD(QueueGuard, [&]() { hsaKmtDestroyQueue(queue_id_); });
|
||||
|
||||
// Populate doorbell signal structure.
|
||||
memset(&signal_, 0, sizeof(signal_));
|
||||
signal_.kind = AMD_SIGNAL_KIND_LEGACY_DOORBELL;
|
||||
signal_.legacy_hardware_doorbell_ptr =
|
||||
(volatile uint32_t*)queue_rsrc.Queue_DoorBell;
|
||||
signal_.queue_ptr = &amd_queue_;
|
||||
|
||||
// Populate amd_queue_ structure.
|
||||
amd_queue_.hsa_queue.type = HSA_QUEUE_TYPE_MULTI;
|
||||
amd_queue_.hsa_queue.features = HSA_QUEUE_FEATURE_KERNEL_DISPATCH;
|
||||
amd_queue_.hsa_queue.base_address = ring_buf_;
|
||||
amd_queue_.hsa_queue.doorbell_signal = Signal::Convert(this);
|
||||
amd_queue_.hsa_queue.size = queue_size_pkts;
|
||||
amd_queue_.hsa_queue.id = core::Runtime::runtime_singleton_->GetQueueId();
|
||||
amd_queue_.read_dispatch_id_field_base_byte_offset = uint32_t(
|
||||
uintptr_t(&amd_queue_.read_dispatch_id) - uintptr_t(&amd_queue_));
|
||||
|
||||
const auto& props = agent->properties();
|
||||
amd_queue_.max_cu_id = (props.NumFComputeCores / props.NumSIMDPerCU) - 1;
|
||||
amd_queue_.max_wave_id = props.MaxWavesPerSIMD - 1;
|
||||
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
AMD_HSA_BITS_SET(amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64,
|
||||
1);
|
||||
#else
|
||||
AMD_HSA_BITS_SET(amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64,
|
||||
0);
|
||||
#endif
|
||||
|
||||
// Populate scratch resource descriptor in amd_queue_.
|
||||
SQ_BUF_RSRC_WORD0 srd0;
|
||||
SQ_BUF_RSRC_WORD1 srd1;
|
||||
SQ_BUF_RSRC_WORD2 srd2;
|
||||
SQ_BUF_RSRC_WORD3 srd3;
|
||||
uintptr_t scratch_base = uintptr_t(queue_scratch_.queue_base);
|
||||
uint32_t scratch_base_hi = 0;
|
||||
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
scratch_base_hi = uint32_t(scratch_base >> 32);
|
||||
#endif
|
||||
|
||||
srd0.bits.BASE_ADDRESS = uint32_t(scratch_base);
|
||||
srd1.bits.BASE_ADDRESS_HI = scratch_base_hi;
|
||||
srd1.bits.STRIDE = 0;
|
||||
srd1.bits.CACHE_SWIZZLE = 0;
|
||||
srd1.bits.SWIZZLE_ENABLE = 1;
|
||||
srd2.bits.NUM_RECORDS = uint32_t(queue_scratch_.size);
|
||||
srd3.bits.DST_SEL_X = SQ_SEL_X;
|
||||
srd3.bits.DST_SEL_Y = SQ_SEL_Y;
|
||||
srd3.bits.DST_SEL_Z = SQ_SEL_Z;
|
||||
srd3.bits.DST_SEL_W = SQ_SEL_W;
|
||||
srd3.bits.NUM_FORMAT = BUF_NUM_FORMAT_UINT;
|
||||
srd3.bits.DATA_FORMAT = BUF_DATA_FORMAT_32;
|
||||
srd3.bits.ELEMENT_SIZE = 1; // 4
|
||||
srd3.bits.INDEX_STRIDE = 3; // 64
|
||||
srd3.bits.ADD_TID_ENABLE = 1;
|
||||
srd3.bits.ATC__CI__VI = (agent_profile_ == HSA_PROFILE_FULL) ? 1 : 0;
|
||||
srd3.bits.HASH_ENABLE = 0;
|
||||
srd3.bits.HEAP = 0;
|
||||
srd3.bits.MTYPE__CI__VI = 0;
|
||||
srd3.bits.TYPE = SQ_RSRC_BUF;
|
||||
|
||||
amd_queue_.scratch_resource_descriptor[0] = srd0.u32All;
|
||||
amd_queue_.scratch_resource_descriptor[1] = srd1.u32All;
|
||||
amd_queue_.scratch_resource_descriptor[2] = srd2.u32All;
|
||||
amd_queue_.scratch_resource_descriptor[3] = srd3.u32All;
|
||||
|
||||
// Populate flat scratch parameters in amd_queue_.
|
||||
amd_queue_.scratch_backing_memory_location =
|
||||
queue_scratch_.queue_process_offset;
|
||||
amd_queue_.scratch_backing_memory_byte_size = queue_scratch_.size;
|
||||
amd_queue_.scratch_workitem_byte_size =
|
||||
uint32_t(queue_scratch_.size_per_thread);
|
||||
|
||||
// Set concurrent wavefront limits when scratch is being used.
|
||||
COMPUTE_TMPRING_SIZE tmpring_size = {0};
|
||||
|
||||
if (queue_scratch_.size != 0) {
|
||||
tmpring_size.bits.WAVES =
|
||||
(queue_scratch_.size / queue_scratch_.size_per_thread / 64);
|
||||
tmpring_size.bits.WAVESIZE =
|
||||
(((64 * queue_scratch_.size_per_thread) + 1023) / 1024);
|
||||
}
|
||||
|
||||
amd_queue_.compute_tmpring_size = tmpring_size.u32All;
|
||||
|
||||
// Set group and private memory apertures in amd_queue_.
|
||||
auto& regions = agent->regions();
|
||||
|
||||
for (int i = 0; i < regions.size(); i++) {
|
||||
const MemoryRegion* amdregion;
|
||||
amdregion = static_cast<const MemoryRegion*>(regions[i]);
|
||||
uint64_t base = amdregion->GetBaseAddress();
|
||||
|
||||
if (amdregion->IsLDS()) {
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
amd_queue_.group_segment_aperture_base_hi =
|
||||
uint32_t(uintptr_t(base) >> 32);
|
||||
#else
|
||||
amd_queue_.group_segment_aperture_base_hi = uint32_t(base);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (amdregion->IsScratch()) {
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
amd_queue_.private_segment_aperture_base_hi =
|
||||
uint32_t(uintptr_t(base) >> 32);
|
||||
#else
|
||||
amd_queue_.private_segment_aperture_base_hi = uint32_t(base);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
assert(amd_queue_.group_segment_aperture_base_hi != NULL &&
|
||||
"No group region found.");
|
||||
|
||||
if (os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH") == "1") {
|
||||
assert(amd_queue_.private_segment_aperture_base_hi != NULL &&
|
||||
"No private region found.");
|
||||
}
|
||||
|
||||
MAKE_NAMED_SCOPE_GUARD(EventGuard, [&]() {
|
||||
ScopedAcquire<KernelMutex> _lock(&queue_lock_);
|
||||
queue_count_--;
|
||||
if (queue_count_ == 0) {
|
||||
core::InterruptSignal::DestroyEvent(queue_event_);
|
||||
queue_event_ = NULL;
|
||||
}
|
||||
});
|
||||
|
||||
MAKE_NAMED_SCOPE_GUARD(SignalGuard, [&]() {
|
||||
HSA::hsa_signal_destroy(amd_queue_.queue_inactive_signal);
|
||||
});
|
||||
#if defined(HSA_LARGE_MODEL) && defined(__linux__)
|
||||
if (core::g_use_interrupt_wait) {
|
||||
{
|
||||
ScopedAcquire<KernelMutex> _lock(&queue_lock_);
|
||||
queue_count_++;
|
||||
if (queue_event_ == NULL) {
|
||||
assert(queue_count_ == 1 &&
|
||||
"Inconsistency in queue event reference counting found.\n");
|
||||
|
||||
queue_event_ =
|
||||
core::InterruptSignal::CreateEvent(HSA_EVENTTYPE_SIGNAL, false);
|
||||
if (queue_event_ == NULL) return;
|
||||
}
|
||||
}
|
||||
auto signal = new core::InterruptSignal(0, queue_event_);
|
||||
amd_queue_.queue_inactive_signal = core::InterruptSignal::Convert(signal);
|
||||
if (hsa_amd_signal_async_handler(
|
||||
amd_queue_.queue_inactive_signal, HSA_SIGNAL_CONDITION_NE, 0,
|
||||
DynamicScratchHandler, this) != HSA_STATUS_SUCCESS)
|
||||
return;
|
||||
} else {
|
||||
EventGuard.Dismiss();
|
||||
SignalGuard.Dismiss();
|
||||
}
|
||||
#else
|
||||
EventGuard.Dismiss();
|
||||
SignalGuard.Dismiss();
|
||||
#endif
|
||||
|
||||
valid_ = true;
|
||||
active_ = 1;
|
||||
|
||||
RingGuard.Dismiss();
|
||||
QueueGuard.Dismiss();
|
||||
EventGuard.Dismiss();
|
||||
SignalGuard.Dismiss();
|
||||
}
|
||||
|
||||
AqlQueue::~AqlQueue() {
|
||||
if (!IsValid()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (active_ == 1) hsaKmtDestroyQueue(queue_id_);
|
||||
|
||||
FreeRegisteredRingBuffer();
|
||||
agent_->ReleaseQueueScratch(queue_scratch_.queue_base);
|
||||
HSA::hsa_signal_destroy(amd_queue_.queue_inactive_signal);
|
||||
#if defined(HSA_LARGE_MODEL) && defined(__linux__)
|
||||
if (core::g_use_interrupt_wait) {
|
||||
ScopedAcquire<KernelMutex> lock(&queue_lock_);
|
||||
queue_count_--;
|
||||
if (queue_count_ == 0) {
|
||||
core::InterruptSignal::DestroyEvent(queue_event_);
|
||||
queue_event_ = NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
uint64_t AqlQueue::LoadReadIndexAcquire() {
|
||||
return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_acquire);
|
||||
}
|
||||
|
||||
uint64_t AqlQueue::LoadReadIndexRelaxed() {
|
||||
return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
uint64_t AqlQueue::LoadWriteIndexAcquire() {
|
||||
return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_acquire);
|
||||
}
|
||||
|
||||
uint64_t AqlQueue::LoadWriteIndexRelaxed() {
|
||||
return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void AqlQueue::StoreWriteIndexRelaxed(uint64_t value) {
|
||||
atomic::Store(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void AqlQueue::StoreWriteIndexRelease(uint64_t value) {
|
||||
atomic::Store(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_release);
|
||||
}
|
||||
|
||||
uint64_t AqlQueue::CasWriteIndexAcqRel(uint64_t expected, uint64_t value) {
|
||||
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
|
||||
std::memory_order_acq_rel);
|
||||
}
|
||||
uint64_t AqlQueue::CasWriteIndexAcquire(uint64_t expected, uint64_t value) {
|
||||
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
uint64_t AqlQueue::CasWriteIndexRelaxed(uint64_t expected, uint64_t value) {
|
||||
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
uint64_t AqlQueue::CasWriteIndexRelease(uint64_t expected, uint64_t value) {
|
||||
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
|
||||
std::memory_order_release);
|
||||
}
|
||||
|
||||
uint64_t AqlQueue::AddWriteIndexAcqRel(uint64_t value) {
|
||||
return atomic::Add(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_acq_rel);
|
||||
}
|
||||
|
||||
uint64_t AqlQueue::AddWriteIndexAcquire(uint64_t value) {
|
||||
return atomic::Add(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
|
||||
uint64_t AqlQueue::AddWriteIndexRelaxed(uint64_t value) {
|
||||
return atomic::Add(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
uint64_t AqlQueue::AddWriteIndexRelease(uint64_t value) {
|
||||
return atomic::Add(&amd_queue_.write_dispatch_id, value,
|
||||
std::memory_order_release);
|
||||
}
|
||||
|
||||
void AqlQueue::StoreRelaxed(hsa_signal_value_t value) {
|
||||
// Acquire spinlock protecting the legacy doorbell.
|
||||
while (atomic::Cas(&amd_queue_.legacy_doorbell_lock, 1U, 0U,
|
||||
std::memory_order_acquire) != 0) {
|
||||
os::YieldThread();
|
||||
}
|
||||
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
// AMD hardware convention expects the packet index to point beyond
|
||||
// the last packet to be processed. Packet indices written to the
|
||||
// max_legacy_doorbell_dispatch_id_plus_1 field must conform to this
|
||||
// expectation, since this field is used as the HW-visible write index.
|
||||
uint64_t legacy_dispatch_id = value + 1;
|
||||
#else
|
||||
// In the small machine model it is difficult to distinguish packet index
|
||||
// wrap at 2^32 packets from a backwards doorbell. Instead, ignore the
|
||||
// doorbell value and submit the write index instead. It is OK to issue
|
||||
// a doorbell for packets in the INVALID or ALWAYS_RESERVED state.
|
||||
// The HW will stall on these packets until they enter a valid state.
|
||||
uint64_t legacy_dispatch_id = amd_queue_.write_dispatch_id;
|
||||
|
||||
// The write index may extend more than a full queue of packets beyond
|
||||
// the read index. The hardware can process at most a full queue of packets
|
||||
// at a time. Clamp the write index appropriately. A doorbell for the
|
||||
// remaining packets is guaranteed to be sent at a later time.
|
||||
legacy_dispatch_id =
|
||||
Min(legacy_dispatch_id,
|
||||
uint64_t(amd_queue_.read_dispatch_id) + amd_queue_.hsa_queue.size);
|
||||
#endif
|
||||
|
||||
// Discard backwards and duplicate doorbells.
|
||||
if (legacy_dispatch_id > amd_queue_.max_legacy_doorbell_dispatch_id_plus_1) {
|
||||
// Record the most recent packet index used in a doorbell submission.
|
||||
// This field will be interpreted as a write index upon HW queue connect.
|
||||
// Must be visible to the HW before sending the doorbell to avoid a race.
|
||||
atomic::Store(&amd_queue_.max_legacy_doorbell_dispatch_id_plus_1,
|
||||
legacy_dispatch_id, std::memory_order_relaxed);
|
||||
|
||||
// Write the dispatch id to the hardware MMIO doorbell.
|
||||
if (doorbell_type_ == 0) {
|
||||
// The legacy GFXIP 7 hardware doorbell expects:
|
||||
// 1. Packet index wrapped to a point within the ring buffer
|
||||
// 2. Packet index converted to DWORD count
|
||||
uint64_t queue_size_mask =
|
||||
((1 + queue_full_workaround_) * amd_queue_.hsa_queue.size) - 1;
|
||||
|
||||
*(volatile uint32_t*)signal_.legacy_hardware_doorbell_ptr =
|
||||
uint32_t((legacy_dispatch_id & queue_size_mask) *
|
||||
(sizeof(core::AqlPacket) / sizeof(uint32_t)));
|
||||
} else if (doorbell_type_ == 1) {
|
||||
*(volatile uint32_t*)signal_.legacy_hardware_doorbell_ptr =
|
||||
uint32_t(legacy_dispatch_id);
|
||||
} else {
|
||||
assert(false && "Agent has unsupported doorbell semantics");
|
||||
}
|
||||
}
|
||||
|
||||
// Release spinlock protecting the legacy doorbell.
|
||||
atomic::Store(&amd_queue_.legacy_doorbell_lock, 0U,
|
||||
std::memory_order_release);
|
||||
}
|
||||
|
||||
void AqlQueue::StoreRelease(hsa_signal_value_t value) {
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
StoreRelaxed(value);
|
||||
}
|
||||
|
||||
uint32_t AqlQueue::ComputeRingBufferMinPkts() {
|
||||
// From CP_HQD_PQ_CONTROL.QUEUE_SIZE specification:
|
||||
// Size of the primary queue (PQ) will be: 2^(HQD_QUEUE_SIZE+1) DWs.
|
||||
// Min Size is 7 (2^8 = 256 DWs) and max size is 29 (2^30 = 1 G-DW)
|
||||
uint32_t min_bytes = 0x400;
|
||||
|
||||
if (queue_full_workaround_ == 1) {
|
||||
#ifdef __linux__
|
||||
// Double mapping requires one page of backing store.
|
||||
min_bytes = Max(min_bytes, 0x1000U);
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
// Shared memory mapping is at system allocation granularity.
|
||||
SYSTEM_INFO sys_info;
|
||||
GetNativeSystemInfo(&sys_info);
|
||||
min_bytes = Max(min_bytes, uint32_t(sys_info.dwAllocationGranularity));
|
||||
#endif
|
||||
}
|
||||
|
||||
return uint32_t(min_bytes / sizeof(core::AqlPacket));
|
||||
}
|
||||
|
||||
uint32_t AqlQueue::ComputeRingBufferMaxPkts() {
|
||||
// From CP_HQD_PQ_CONTROL.QUEUE_SIZE specification:
|
||||
// Size of the primary queue (PQ) will be: 2^(HQD_QUEUE_SIZE+1) DWs.
|
||||
// Min Size is 7 (2^8 = 256 DWs) and max size is 29 (2^30 = 1 G-DW)
|
||||
uint64_t max_bytes = 0x100000000;
|
||||
|
||||
if (queue_full_workaround_ == 1) {
|
||||
// Double mapping halves maximum size.
|
||||
max_bytes /= 2;
|
||||
}
|
||||
|
||||
return uint32_t(max_bytes / sizeof(core::AqlPacket));
|
||||
}
|
||||
|
||||
void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) {
|
||||
if (agent_profile_ == HSA_PROFILE_FULL) {
|
||||
// Compute the physical and virtual size of the queue.
|
||||
uint32_t ring_buf_phys_size_bytes =
|
||||
uint32_t(queue_size_pkts * sizeof(core::AqlPacket));
|
||||
ring_buf_alloc_bytes_ = 2 * ring_buf_phys_size_bytes;
|
||||
|
||||
#ifdef __linux__
|
||||
// Create a system-unique shared memory path for this thread.
|
||||
char ring_buf_shm_path[16];
|
||||
pid_t sys_unique_tid = pid_t(syscall(__NR_gettid));
|
||||
sprintf(ring_buf_shm_path, "/%u", sys_unique_tid);
|
||||
|
||||
int ring_buf_shm_fd = -1;
|
||||
void* reserve_va = NULL;
|
||||
|
||||
do {
|
||||
// Create a shared memory object to back the ring buffer.
|
||||
ring_buf_shm_fd = shm_open(ring_buf_shm_path, O_CREAT | O_RDWR | O_EXCL,
|
||||
S_IRUSR | S_IWUSR);
|
||||
if (ring_buf_shm_fd == -1) {
|
||||
break;
|
||||
}
|
||||
if (posix_fallocate(ring_buf_shm_fd, 0, ring_buf_phys_size_bytes) != 0)
|
||||
break;
|
||||
|
||||
// Reserve a VA range twice the size of the physical backing store.
|
||||
reserve_va = mmap(NULL, ring_buf_alloc_bytes_, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
assert(reserve_va != MAP_FAILED && "mmap failed");
|
||||
|
||||
// Remap the lower and upper halves of the VA range.
|
||||
// Map both halves to the shared memory backing store.
|
||||
// If the GPU device is KV, do not set PROT_EXEC flag.
|
||||
void* ring_buf_lower_half = NULL;
|
||||
void* ring_buf_upper_half = NULL;
|
||||
if (is_kv_queue_) {
|
||||
ring_buf_lower_half =
|
||||
mmap(reserve_va, ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
|
||||
assert(ring_buf_lower_half != MAP_FAILED && "mmap failed");
|
||||
|
||||
ring_buf_upper_half =
|
||||
mmap((void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes),
|
||||
ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
|
||||
assert(ring_buf_upper_half != MAP_FAILED && "mmap failed");
|
||||
} else {
|
||||
ring_buf_lower_half = mmap(reserve_va, ring_buf_phys_size_bytes,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
|
||||
assert(ring_buf_lower_half != MAP_FAILED && "mmap failed");
|
||||
|
||||
ring_buf_upper_half =
|
||||
mmap((void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes),
|
||||
ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
|
||||
assert(ring_buf_upper_half != MAP_FAILED && "mmap failed");
|
||||
}
|
||||
|
||||
// Release explicit reference to shared memory object.
|
||||
shm_unlink(ring_buf_shm_path);
|
||||
close(ring_buf_shm_fd);
|
||||
|
||||
// Successfully created mapping.
|
||||
ring_buf_ = ring_buf_lower_half;
|
||||
return;
|
||||
} while (false);
|
||||
|
||||
// Resource cleanup on failure.
|
||||
if (reserve_va) munmap(reserve_va, ring_buf_alloc_bytes_);
|
||||
if (ring_buf_shm_fd != -1) {
|
||||
shm_unlink(ring_buf_shm_path);
|
||||
close(ring_buf_shm_fd);
|
||||
}
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
HANDLE ring_buf_mapping = INVALID_HANDLE_VALUE;
|
||||
void* ring_buf_lower_half = NULL;
|
||||
void* ring_buf_upper_half = NULL;
|
||||
|
||||
do {
|
||||
// Create a page file mapping to back the ring buffer.
|
||||
ring_buf_mapping = CreateFileMapping(INVALID_HANDLE_VALUE, NULL,
|
||||
PAGE_EXECUTE_READWRITE | SEC_COMMIT,
|
||||
0, ring_buf_phys_size_bytes, NULL);
|
||||
if (ring_buf_mapping == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Retry until obtaining an appropriate virtual address mapping.
|
||||
for (int num_attempts = 0; num_attempts < 1000; ++num_attempts) {
|
||||
// Find a virtual address range twice the size of the file mapping.
|
||||
void* reserve_va =
|
||||
VirtualAllocEx(GetCurrentProcess(), NULL, ring_buf_alloc_bytes_,
|
||||
MEM_TOP_DOWN | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
|
||||
if (reserve_va == NULL) {
|
||||
break;
|
||||
}
|
||||
VirtualFree(reserve_va, 0, MEM_RELEASE);
|
||||
|
||||
// Map the ring buffer into the free virtual range.
|
||||
// This may fail: another thread can allocate in this range.
|
||||
ring_buf_lower_half = MapViewOfFileEx(
|
||||
ring_buf_mapping, FILE_MAP_ALL_ACCESS | FILE_MAP_EXECUTE, 0, 0,
|
||||
ring_buf_phys_size_bytes, reserve_va);
|
||||
|
||||
if (ring_buf_lower_half == NULL) {
|
||||
// Virtual range allocated by another thread, try again.
|
||||
continue;
|
||||
}
|
||||
|
||||
ring_buf_upper_half = MapViewOfFileEx(
|
||||
ring_buf_mapping, FILE_MAP_ALL_ACCESS | FILE_MAP_EXECUTE, 0, 0,
|
||||
ring_buf_phys_size_bytes,
|
||||
(void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes));
|
||||
|
||||
if (ring_buf_upper_half == NULL) {
|
||||
// Virtual range allocated by another thread, try again.
|
||||
UnmapViewOfFile(ring_buf_lower_half);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Successfully created mapping.
|
||||
ring_buf_ = ring_buf_lower_half;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ring_buf_ == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Release file mapping (reference counted by views).
|
||||
CloseHandle(ring_buf_mapping);
|
||||
|
||||
// Don't register the memory: causes a failure in the KFD.
|
||||
// Instead use implicit registration to access the ring buffer.
|
||||
return;
|
||||
} while (false);
|
||||
|
||||
// Resource cleanup on failure.
|
||||
UnmapViewOfFile(ring_buf_upper_half);
|
||||
UnmapViewOfFile(ring_buf_lower_half);
|
||||
CloseHandle(ring_buf_mapping);
|
||||
#endif
|
||||
} else {
|
||||
// Allocate storage for the ring buffer.
|
||||
HsaMemFlags flags;
|
||||
flags.Value = 0;
|
||||
flags.ui32.HostAccess = 1;
|
||||
flags.ui32.AtomicAccessPartial = 1;
|
||||
flags.ui32.ExecuteAccess = 1;
|
||||
flags.ui32.AQLQueueMemory = 1;
|
||||
|
||||
ring_buf_alloc_bytes_ = AlignUp(
|
||||
queue_size_pkts * static_cast<uint32_t>(sizeof(core::AqlPacket)), 4096);
|
||||
auto err = hsaKmtAllocMemory(agent_->node_id(), ring_buf_alloc_bytes_,
|
||||
flags, (void**)&ring_buf_);
|
||||
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
assert(false && "AQL queue memory allocation failure.");
|
||||
return;
|
||||
}
|
||||
|
||||
HSAuint64 alternate_va;
|
||||
err = hsaKmtMapMemoryToGPU(ring_buf_, ring_buf_alloc_bytes_, &alternate_va);
|
||||
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
assert(false && "AQL queue memory map failure.");
|
||||
hsaKmtFreeMemory(ring_buf_, ring_buf_alloc_bytes_);
|
||||
ring_buf_ = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
ring_buf_alloc_bytes_ = 2 * ring_buf_alloc_bytes_;
|
||||
}
|
||||
}
|
||||
|
||||
void AqlQueue::FreeRegisteredRingBuffer() {
|
||||
if (agent_profile_ == HSA_PROFILE_FULL) {
|
||||
#ifdef __linux__
|
||||
munmap(ring_buf_, ring_buf_alloc_bytes_);
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
UnmapViewOfFile(ring_buf_);
|
||||
UnmapViewOfFile(
|
||||
(void*)(uintptr_t(ring_buf_) + (ring_buf_alloc_bytes_ / 2)));
|
||||
#endif
|
||||
} else {
|
||||
hsaKmtUnmapMemoryToGPU(ring_buf_);
|
||||
hsaKmtFreeMemory(ring_buf_, ring_buf_alloc_bytes_ / 2);
|
||||
}
|
||||
|
||||
ring_buf_ = NULL;
|
||||
ring_buf_alloc_bytes_ = 0;
|
||||
}
|
||||
|
||||
hsa_status_t AqlQueue::Inactivate() {
|
||||
int32_t active = atomic::Exchange((volatile int32_t*)&active_, 0);
|
||||
if (active == 1) hsaKmtDestroyQueue(this->queue_id_);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
bool AqlQueue::DynamicScratchHandler(hsa_signal_value_t error_code, void* arg) {
|
||||
AqlQueue* queue = (AqlQueue*)arg;
|
||||
|
||||
if ((error_code & 1) == 1) {
|
||||
// Insufficient scratch - recoverable
|
||||
auto& scratch = queue->queue_scratch_;
|
||||
|
||||
queue->agent_->ReleaseQueueScratch(scratch.queue_base);
|
||||
|
||||
const core::AqlPacket& pkt =
|
||||
((core::AqlPacket*)queue->amd_queue_.hsa_queue
|
||||
.base_address)[queue->amd_queue_.read_dispatch_id];
|
||||
|
||||
uint32_t scratch_request = pkt.dispatch.private_segment_size;
|
||||
|
||||
scratch.size_per_thread =
|
||||
Max(uint32_t(scratch.size_per_thread * 2), scratch_request);
|
||||
// Align whole waves to 1KB.
|
||||
scratch.size_per_thread = AlignUp(scratch.size_per_thread, 16);
|
||||
scratch.size = scratch.size_per_thread * (queue->amd_queue_.max_cu_id + 1) *
|
||||
32 * 64; // TODO: replace constants.
|
||||
|
||||
// printf("Growing scratch to %u - %u\n", uint32_t(scratch.size_per_thread),
|
||||
// uint32_t(scratch.size));
|
||||
|
||||
queue->agent_->AcquireQueueScratch(scratch);
|
||||
if (scratch.queue_base == NULL) {
|
||||
// Out of scratch - promote error and invalidate queue
|
||||
queue->Inactivate();
|
||||
if (queue->errors_callback_ != NULL)
|
||||
queue->errors_callback_(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
|
||||
queue->public_handle(), queue->errors_data_);
|
||||
return false;
|
||||
}
|
||||
|
||||
SQ_BUF_RSRC_WORD0 srd0;
|
||||
SQ_BUF_RSRC_WORD2 srd2;
|
||||
uintptr_t base = (uintptr_t)scratch.queue_base;
|
||||
|
||||
srd0.u32All = queue->amd_queue_.scratch_resource_descriptor[0];
|
||||
srd2.u32All = queue->amd_queue_.scratch_resource_descriptor[2];
|
||||
|
||||
srd0.bits.BASE_ADDRESS = uint32_t(base);
|
||||
srd2.bits.NUM_RECORDS = uint32_t(scratch.size);
|
||||
|
||||
queue->amd_queue_.scratch_resource_descriptor[0] = srd0.u32All;
|
||||
queue->amd_queue_.scratch_resource_descriptor[2] = srd2.u32All;
|
||||
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
SQ_BUF_RSRC_WORD1 srd1;
|
||||
srd1.u32All = queue->amd_queue_.scratch_resource_descriptor[1];
|
||||
srd1.bits.BASE_ADDRESS_HI = uint32_t(base >> 32);
|
||||
queue->amd_queue_.scratch_resource_descriptor[1] = srd1.u32All;
|
||||
#endif
|
||||
|
||||
queue->amd_queue_.scratch_backing_memory_location =
|
||||
scratch.queue_process_offset;
|
||||
queue->amd_queue_.scratch_backing_memory_byte_size = scratch.size;
|
||||
queue->amd_queue_.scratch_workitem_byte_size =
|
||||
uint32_t(scratch.size_per_thread);
|
||||
|
||||
COMPUTE_TMPRING_SIZE tmpring_size = {0};
|
||||
tmpring_size.bits.WAVES = (scratch.size / scratch.size_per_thread / 64);
|
||||
tmpring_size.bits.WAVESIZE =
|
||||
(((64 * scratch.size_per_thread) + 1023) / 1024);
|
||||
queue->amd_queue_.compute_tmpring_size = tmpring_size.u32All;
|
||||
|
||||
} else if ((error_code & 2) == 2) { // Invalid dim
|
||||
queue->Inactivate();
|
||||
if (queue->errors_callback_ != NULL)
|
||||
queue->errors_callback_(HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS,
|
||||
queue->public_handle(), queue->errors_data_);
|
||||
return false;
|
||||
|
||||
} else if ((error_code & 4) == 4) { // Invalid group memory
|
||||
queue->Inactivate();
|
||||
if (queue->errors_callback_ != NULL)
|
||||
queue->errors_callback_(HSA_STATUS_ERROR_INVALID_ALLOCATION,
|
||||
queue->public_handle(), queue->errors_data_);
|
||||
return false;
|
||||
|
||||
} else if ((error_code & 8) == 8) { // Invalid (or NULL) code
|
||||
queue->Inactivate();
|
||||
if (queue->errors_callback_ != NULL)
|
||||
queue->errors_callback_(HSA_STATUS_ERROR_INVALID_CODE_OBJECT,
|
||||
queue->public_handle(), queue->errors_data_);
|
||||
return false;
|
||||
|
||||
} else if ((error_code & 32) == 32) { // Invalid format
|
||||
queue->Inactivate();
|
||||
if (queue->errors_callback_ != NULL)
|
||||
queue->errors_callback_(HSA_STATUS_ERROR_INVALID_PACKET_FORMAT,
|
||||
queue->public_handle(), queue->errors_data_);
|
||||
return false;
|
||||
} else if ((error_code & 64) == 64) { // Group is too large
|
||||
queue->Inactivate();
|
||||
if (queue->errors_callback_ != NULL)
|
||||
queue->errors_callback_(HSA_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
queue->public_handle(), queue->errors_data_);
|
||||
return false;
|
||||
} else if ((error_code & 128) == 128) { // Out of VGPRs
|
||||
queue->Inactivate();
|
||||
if (queue->errors_callback_ != NULL)
|
||||
queue->errors_callback_(HSA_STATUS_ERROR_INVALID_ISA,
|
||||
queue->public_handle(), queue->errors_data_);
|
||||
return false;
|
||||
} else if ((error_code & 0x80000000) == 0x80000000) { // Debug trap
|
||||
queue->Inactivate();
|
||||
if (queue->errors_callback_ != NULL)
|
||||
queue->errors_callback_(HSA_STATUS_ERROR_EXCEPTION,
|
||||
queue->public_handle(), queue->errors_data_);
|
||||
return false;
|
||||
} else {
|
||||
// Undefined code
|
||||
queue->Inactivate();
|
||||
assert(false && "Undefined queue error code");
|
||||
if (queue->errors_callback_ != NULL)
|
||||
queue->errors_callback_(HSA_STATUS_ERROR, queue->public_handle(),
|
||||
queue->errors_data_);
|
||||
return false;
|
||||
}
|
||||
|
||||
HSA::hsa_signal_store_relaxed(queue->amd_queue_.queue_inactive_signal, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
hsa_status_t AqlQueue::SetCUMasking(const uint32_t num_cu_mask_count,
|
||||
const uint32_t* cu_mask) {
|
||||
HSAKMT_STATUS ret = hsaKmtSetQueueCUMask(
|
||||
queue_id_, num_cu_mask_count,
|
||||
reinterpret_cast<HSAuint32*>(const_cast<uint32_t*>(cu_mask)));
|
||||
return (HSAKMT_STATUS_SUCCESS == ret) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
}
|
||||
} // namespace amd
|
||||
@@ -1,647 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/amd_blit_kernel.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include "core/inc/amd_blit_kernel_kv.h"
|
||||
#include "core/inc/amd_blit_kernel_vi.h"
|
||||
#include "core/inc/amd_gpu_agent.h"
|
||||
#include "core/inc/hsa_internal.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
namespace amd {
|
||||
const uint32_t BlitKernel::kGroupSize = 256;
|
||||
const size_t BlitKernel::kMaxCopyCount = AlignDown(UINT32_MAX, kGroupSize);
|
||||
const size_t BlitKernel::kMaxFillCount = AlignDown(UINT32_MAX, kGroupSize);
|
||||
|
||||
static const uint16_t kInvalidPacketHeader = HSA_PACKET_TYPE_INVALID;
|
||||
|
||||
BlitKernel::BlitKernel()
|
||||
: core::Blit(),
|
||||
copy_code_handle_(0),
|
||||
fill_code_handle_(0),
|
||||
queue_(NULL),
|
||||
cached_index_(0),
|
||||
kernarg_(NULL),
|
||||
kernarg_async_(NULL),
|
||||
kernarg_async_mask_(0),
|
||||
kernarg_async_counter_(0),
|
||||
code_arg_buffer_(NULL) {
|
||||
completion_signal_.handle = 0;
|
||||
}
|
||||
|
||||
BlitKernel::~BlitKernel() {}
|
||||
|
||||
hsa_status_t BlitKernel::Initialize(const core::Agent& agent) {
|
||||
hsa_agent_t agent_handle = agent.public_handle();
|
||||
|
||||
uint32_t features = 0;
|
||||
hsa_status_t status =
|
||||
HSA::hsa_agent_get_info(agent_handle, HSA_AGENT_INFO_FEATURE, &features);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if ((features & HSA_AGENT_FEATURE_KERNEL_DISPATCH) == 0) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// Need queue buffer that can cover the max size of local memory.
|
||||
const uint64_t kGpuVmVaSize = 1ULL << 40;
|
||||
const uint32_t kRequiredQueueSize = NextPow2(static_cast<uint32_t>(
|
||||
std::ceil(static_cast<double>(kGpuVmVaSize) / kMaxCopyCount)));
|
||||
|
||||
uint32_t max_queue_size = 0;
|
||||
status = HSA::hsa_agent_get_info(agent_handle, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
|
||||
&max_queue_size);
|
||||
|
||||
if (HSA_STATUS_SUCCESS != status) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if (max_queue_size < kRequiredQueueSize) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
status =
|
||||
HSA::hsa_queue_create(agent_handle, kRequiredQueueSize,
|
||||
HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue_);
|
||||
|
||||
if (HSA_STATUS_SUCCESS != status) {
|
||||
return status;
|
||||
}
|
||||
|
||||
queue_bitmask_ = queue_->size - 1;
|
||||
|
||||
cached_index_ = 0;
|
||||
|
||||
void* copy_raw_obj_mem = NULL;
|
||||
size_t copy_akc_size = 0;
|
||||
size_t copy_akc_offset = 0;
|
||||
|
||||
void* copy_aligned_raw_obj_mem = NULL;
|
||||
size_t copy_aligned_akc_size = 0;
|
||||
size_t copy_aligned_akc_offset = 0;
|
||||
|
||||
void* fill_raw_obj_mem = NULL;
|
||||
size_t fill_akc_size = 0;
|
||||
size_t fill_akc_offset = 0;
|
||||
|
||||
switch (agent.isa()->GetMajorVersion()) {
|
||||
case 7:
|
||||
copy_raw_obj_mem = kVectorCopyKvObject;
|
||||
copy_akc_size = HSA_VECTOR_COPY_KV_AKC_SIZE;
|
||||
copy_akc_offset = HSA_VECTOR_COPY_KV_AKC_OFFSET;
|
||||
|
||||
copy_aligned_raw_obj_mem = kVectorCopyAlignedKvObject;
|
||||
copy_aligned_akc_size = HSA_VECTOR_COPY_ALIGNED_KV_AKC_SIZE;
|
||||
copy_aligned_akc_offset = HSA_VECTOR_COPY_ALIGNED_KV_AKC_OFFSET;
|
||||
|
||||
fill_raw_obj_mem = kFillMemoryKvObject;
|
||||
fill_akc_size = HSA_FILL_MEMORY_KV_AKC_SIZE;
|
||||
fill_akc_offset = HSA_FILL_MEMORY_KV_AKC_OFFSET;
|
||||
break;
|
||||
case 8:
|
||||
copy_raw_obj_mem = kVectorCopyViObject;
|
||||
copy_akc_size = HSA_VECTOR_COPY_VI_AKC_SIZE;
|
||||
copy_akc_offset = HSA_VECTOR_COPY_VI_AKC_OFFSET;
|
||||
|
||||
copy_aligned_raw_obj_mem = kVectorCopyAlignedViObject;
|
||||
copy_aligned_akc_size = HSA_VECTOR_COPY_ALIGNED_VI_AKC_SIZE;
|
||||
copy_aligned_akc_offset = HSA_VECTOR_COPY_ALIGNED_VI_AKC_OFFSET;
|
||||
|
||||
fill_raw_obj_mem = kFillMemoryViObject;
|
||||
fill_akc_size = HSA_FILL_MEMORY_VI_AKC_SIZE;
|
||||
fill_akc_offset = HSA_FILL_MEMORY_VI_AKC_OFFSET;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Only gfx7 and gfx8 are supported");
|
||||
break;
|
||||
}
|
||||
|
||||
static const size_t kKernArgSize =
|
||||
std::max(sizeof(KernelCopyArgs), sizeof(KernelFillArgs));
|
||||
const size_t total_alloc_size = AlignUp(
|
||||
AlignUp(copy_akc_size, 256) + AlignUp(copy_aligned_akc_size, 256) +
|
||||
AlignUp(fill_akc_size, 256) + AlignUp(kKernArgSize, 16),
|
||||
4096);
|
||||
|
||||
amd_kernel_code_t *code_ptr = nullptr;
|
||||
code_arg_buffer_ = core::Runtime::runtime_singleton_->system_allocator()(
|
||||
total_alloc_size, 4096);
|
||||
|
||||
char* akc_arg = reinterpret_cast<char*>(code_arg_buffer_);
|
||||
memcpy(akc_arg,
|
||||
reinterpret_cast<const char*>(copy_raw_obj_mem) + copy_akc_offset,
|
||||
copy_akc_size);
|
||||
copy_code_handle_ = reinterpret_cast<uint64_t>(akc_arg);
|
||||
code_ptr = (amd_kernel_code_t*)(copy_code_handle_);
|
||||
code_ptr->runtime_loader_kernel_symbol = 0;
|
||||
akc_arg += copy_akc_size;
|
||||
|
||||
akc_arg = AlignUp(akc_arg, 256);
|
||||
memcpy(akc_arg, reinterpret_cast<const char*>(copy_aligned_raw_obj_mem) +
|
||||
copy_aligned_akc_offset,
|
||||
copy_aligned_akc_size);
|
||||
copy_aligned_code_handle_ = reinterpret_cast<uint64_t>(akc_arg);
|
||||
code_ptr = (amd_kernel_code_t*)(copy_aligned_code_handle_);
|
||||
code_ptr->runtime_loader_kernel_symbol = 0;
|
||||
akc_arg += copy_aligned_akc_size;
|
||||
|
||||
akc_arg = AlignUp(akc_arg, 256);
|
||||
memcpy(akc_arg,
|
||||
reinterpret_cast<const char*>(fill_raw_obj_mem) + fill_akc_offset,
|
||||
fill_akc_size);
|
||||
fill_code_handle_ = reinterpret_cast<uint64_t>(akc_arg);
|
||||
code_ptr = (amd_kernel_code_t*)(fill_code_handle_);
|
||||
code_ptr->runtime_loader_kernel_symbol = 0;
|
||||
akc_arg += fill_akc_size;
|
||||
|
||||
akc_arg = AlignUp(akc_arg, 16);
|
||||
kernarg_ = akc_arg;
|
||||
|
||||
status = HSA::hsa_signal_create(1, 0, NULL, &completion_signal_);
|
||||
if (HSA_STATUS_SUCCESS != status) {
|
||||
return status;
|
||||
}
|
||||
|
||||
kernarg_async_ = reinterpret_cast<KernelCopyArgs*>(
|
||||
core::Runtime::runtime_singleton_->system_allocator()(
|
||||
kRequiredQueueSize * AlignUp(sizeof(KernelCopyArgs), 16), 16));
|
||||
|
||||
kernarg_async_mask_ = kRequiredQueueSize - 1;
|
||||
|
||||
// TODO(bwicakso): remove this code when execute permission level is not
|
||||
// mandatory.
|
||||
if (((amd::GpuAgent&)agent).profile() == HSA_PROFILE_FULL) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define NOMINMAX
|
||||
DWORD old_protect = 0;
|
||||
const DWORD new_protect = PAGE_EXECUTE_READWRITE;
|
||||
if (!VirtualProtect(code_arg_buffer_, total_alloc_size, new_protect,
|
||||
&old_protect)) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
#else
|
||||
if (0 != mprotect(code_arg_buffer_, total_alloc_size,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC)) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t BlitKernel::Destroy(void) {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
|
||||
if (queue_ != NULL) {
|
||||
HSA::hsa_queue_destroy(queue_);
|
||||
}
|
||||
|
||||
if (kernarg_async_ != NULL) {
|
||||
core::Runtime::runtime_singleton_->system_deallocator()(kernarg_async_);
|
||||
}
|
||||
|
||||
if (code_arg_buffer_ != NULL) {
|
||||
core::Runtime::runtime_singleton_->system_deallocator()(code_arg_buffer_);
|
||||
}
|
||||
|
||||
if (completion_signal_.handle != 0) {
|
||||
HSA::hsa_signal_destroy(completion_signal_);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static bool IsSystemMemory(void* address) {
|
||||
static const uint64_t kLimitSystem = 1ULL << 48;
|
||||
return (reinterpret_cast<uint64_t>(address) < kLimitSystem);
|
||||
}
|
||||
|
||||
hsa_status_t BlitKernel::SubmitLinearCopyCommand(void* dst, const void* src,
|
||||
size_t size) {
|
||||
assert(copy_code_handle_ != 0);
|
||||
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
|
||||
HSA::hsa_signal_store_relaxed(completion_signal_, 1);
|
||||
|
||||
const size_t kAlignmentChar = 1;
|
||||
const size_t kAlignmentUin32 = 4;
|
||||
const size_t kAlignmentVec4 = 16;
|
||||
const size_t copy_granule =
|
||||
(IsMultipleOf(dst, kAlignmentVec4) && IsMultipleOf(src, kAlignmentVec4) &&
|
||||
IsMultipleOf(size, kAlignmentVec4))
|
||||
? kAlignmentVec4
|
||||
: (IsMultipleOf(dst, kAlignmentUin32) &&
|
||||
IsMultipleOf(src, kAlignmentUin32) &&
|
||||
IsMultipleOf(size, kAlignmentUin32))
|
||||
? kAlignmentUin32
|
||||
: kAlignmentChar;
|
||||
|
||||
size = size / copy_granule;
|
||||
|
||||
const uint32_t num_copy_packet = static_cast<uint32_t>(
|
||||
std::ceil(static_cast<double>(size) / kMaxCopyCount));
|
||||
|
||||
// Reserve write index for copy + fence packet.
|
||||
uint64_t write_index = AcquireWriteIndex(num_copy_packet);
|
||||
|
||||
const uint32_t last_copy_index = num_copy_packet - 1;
|
||||
size_t total_copy_count = 0;
|
||||
for (uint32_t i = 0; i < num_copy_packet; ++i) {
|
||||
// Setup arguments.
|
||||
const uint32_t copy_count = static_cast<uint32_t>(
|
||||
std::min((size - total_copy_count), kMaxCopyCount));
|
||||
|
||||
void* cur_dst = static_cast<char*>(dst) + (total_copy_count * copy_granule);
|
||||
const void* cur_src =
|
||||
static_cast<const char*>(src) + (total_copy_count * copy_granule);
|
||||
|
||||
KernelCopyArgs* args = ObtainAsyncKernelCopyArg();
|
||||
assert(args != NULL);
|
||||
assert(IsMultipleOf(args, 16));
|
||||
|
||||
args->src = cur_src;
|
||||
args->dst = cur_dst;
|
||||
args->size = copy_count;
|
||||
args->use_vector = (copy_granule == kAlignmentVec4) ? 1 : 0;
|
||||
|
||||
const uint32_t grid_size_x =
|
||||
AlignUp(static_cast<uint32_t>(copy_count), kGroupSize);
|
||||
|
||||
// This assert to make sure kMaxCopySize is not changed to a number that
|
||||
// could cause overflow to packet.grid_size_x.
|
||||
assert(grid_size_x >= copy_count);
|
||||
|
||||
hsa_signal_t signal = {(i == last_copy_index) ? completion_signal_.handle
|
||||
: 0};
|
||||
PopulateQueue(write_index + i, ((copy_granule == kAlignmentChar)
|
||||
? copy_code_handle_
|
||||
: copy_aligned_code_handle_),
|
||||
args, grid_size_x, signal);
|
||||
|
||||
total_copy_count += copy_count;
|
||||
}
|
||||
|
||||
// Launch copy packet.
|
||||
ReleaseWriteIndex(write_index, num_copy_packet);
|
||||
|
||||
// Wait for the packet to finish.
|
||||
if (HSA::hsa_signal_wait_acquire(completion_signal_, HSA_SIGNAL_CONDITION_LT,
|
||||
1, uint64_t(-1),
|
||||
HSA_WAIT_STATE_ACTIVE) != 0) {
|
||||
// Signal wait returned unexpected value.
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t BlitKernel::SubmitLinearCopyCommand(
|
||||
void* dst, const void* src, size_t size,
|
||||
std::vector<core::Signal*>& dep_signals, core::Signal& out_signal) {
|
||||
(copy_code_handle_ != 0);
|
||||
const size_t kAlignmentChar = 1;
|
||||
const size_t kAlignmentUin32 = 4;
|
||||
const size_t kAlignmentVec4 = 16;
|
||||
const size_t copy_granule =
|
||||
(IsMultipleOf(dst, kAlignmentVec4) && IsMultipleOf(src, kAlignmentVec4) &&
|
||||
IsMultipleOf(size, kAlignmentVec4))
|
||||
? kAlignmentVec4
|
||||
: (IsMultipleOf(dst, kAlignmentUin32) &&
|
||||
IsMultipleOf(src, kAlignmentUin32) &&
|
||||
IsMultipleOf(size, kAlignmentUin32))
|
||||
? kAlignmentUin32
|
||||
: kAlignmentChar;
|
||||
|
||||
size = size / copy_granule;
|
||||
|
||||
const uint32_t num_copy_packet = static_cast<uint32_t>(
|
||||
std::ceil(static_cast<double>(size) / kMaxCopyCount));
|
||||
|
||||
const uint32_t num_barrier_packet =
|
||||
static_cast<uint32_t>(std::ceil(dep_signals.size() / 5.0f));
|
||||
|
||||
// Reserve write index for copy + fence packet.
|
||||
const uint32_t total_num_packet = num_barrier_packet + num_copy_packet;
|
||||
|
||||
uint64_t write_index = AcquireWriteIndex(total_num_packet);
|
||||
uint64_t write_index_temp = write_index;
|
||||
|
||||
const uint16_t kBarrierPacketHeader =
|
||||
(HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
|
||||
hsa_barrier_and_packet_t barrier_packet = {0};
|
||||
barrier_packet.header = HSA_PACKET_TYPE_INVALID;
|
||||
|
||||
hsa_barrier_and_packet_t* queue_buffer =
|
||||
reinterpret_cast<hsa_barrier_and_packet_t*>(queue_->base_address);
|
||||
|
||||
const size_t dep_signal_count = dep_signals.size();
|
||||
for (size_t i = 0; i < dep_signal_count; ++i) {
|
||||
const size_t idx = i % 5;
|
||||
barrier_packet.dep_signal[idx] = core::Signal::Convert(dep_signals[i]);
|
||||
if (i == (dep_signal_count - 1) || idx == 4) {
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
queue_buffer[(write_index)&queue_bitmask_] = barrier_packet;
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
queue_buffer[(write_index)&queue_bitmask_].header = kBarrierPacketHeader;
|
||||
|
||||
++write_index;
|
||||
|
||||
memset(&barrier_packet, 0, sizeof(hsa_barrier_and_packet_t));
|
||||
barrier_packet.header = HSA_PACKET_TYPE_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t last_copy_index = num_copy_packet - 1;
|
||||
size_t total_copy_count = 0;
|
||||
for (uint32_t i = 0; i < num_copy_packet; ++i) {
|
||||
// Setup arguments.
|
||||
const uint32_t copy_count = static_cast<uint32_t>(
|
||||
std::min((size - total_copy_count), kMaxCopyCount));
|
||||
|
||||
void* cur_dst = static_cast<char*>(dst) + (total_copy_count * copy_granule);
|
||||
const void* cur_src =
|
||||
static_cast<const char*>(src) + (total_copy_count * copy_granule);
|
||||
|
||||
KernelCopyArgs* args = ObtainAsyncKernelCopyArg();
|
||||
assert(args != NULL);
|
||||
assert(IsMultipleOf(args, 16));
|
||||
|
||||
args->src = cur_src;
|
||||
args->dst = cur_dst;
|
||||
args->size = copy_count;
|
||||
args->use_vector = (copy_granule == kAlignmentVec4) ? 1 : 0;
|
||||
|
||||
const uint32_t grid_size_x =
|
||||
AlignUp(static_cast<uint32_t>(copy_count), kGroupSize);
|
||||
|
||||
// This assert to make sure kMaxCopySize is not changed to a number that
|
||||
// could cause overflow to packet.grid_size_x.
|
||||
assert(grid_size_x >= copy_count);
|
||||
|
||||
hsa_signal_t signal = {(i == last_copy_index)
|
||||
? (core::Signal::Convert(&out_signal)).handle
|
||||
: 0};
|
||||
PopulateQueue(write_index, ((copy_granule == kAlignmentChar)
|
||||
? copy_code_handle_
|
||||
: copy_aligned_code_handle_),
|
||||
args, grid_size_x, signal);
|
||||
|
||||
++write_index;
|
||||
|
||||
total_copy_count += copy_count;
|
||||
}
|
||||
|
||||
// Launch copy packet.
|
||||
ReleaseWriteIndex(write_index_temp, total_num_packet);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t BlitKernel::SubmitLinearFillCommand(void* ptr, uint32_t value,
|
||||
size_t num) {
|
||||
assert(fill_code_handle_ != 0);
|
||||
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
|
||||
HSA::hsa_signal_store_relaxed(completion_signal_, 1);
|
||||
|
||||
const uint32_t num_fill_packet = static_cast<uint32_t>(
|
||||
std::ceil(static_cast<double>(num) / kMaxFillCount));
|
||||
|
||||
// Reserve write index for copy + fence packet.
|
||||
uint64_t write_index = AcquireWriteIndex(num_fill_packet);
|
||||
|
||||
KernelFillArgs* args = reinterpret_cast<KernelFillArgs*>(kernarg_);
|
||||
|
||||
if (args == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
const uint32_t last_fill_index = num_fill_packet - 1;
|
||||
size_t total_fill_count = 0;
|
||||
for (uint32_t i = 0; i < num_fill_packet; ++i) {
|
||||
assert(IsMultipleOf(&args[i], 16));
|
||||
|
||||
// Setup arguments.
|
||||
const uint32_t fill_count = static_cast<uint32_t>(
|
||||
std::min((num - total_fill_count), kMaxFillCount));
|
||||
void* cur_ptr = static_cast<char*>(ptr) + total_fill_count;
|
||||
|
||||
args[i].ptr = cur_ptr;
|
||||
args[i].num = fill_count;
|
||||
args[i].value = value;
|
||||
|
||||
const uint32_t grid_size_x =
|
||||
AlignUp(static_cast<uint32_t>(fill_count), kGroupSize);
|
||||
|
||||
// This assert to make sure kMaxFillCount is not changed to a number that
|
||||
// could cause overflow to packet.grid_size_x.
|
||||
assert(grid_size_x >= fill_count);
|
||||
|
||||
hsa_signal_t signal = {(i == last_fill_index) ? completion_signal_.handle
|
||||
: 0};
|
||||
PopulateQueue(write_index + i, fill_code_handle_, &args[i], grid_size_x,
|
||||
signal);
|
||||
|
||||
total_fill_count += fill_count;
|
||||
}
|
||||
|
||||
// Launch fill packet.
|
||||
// Launch copy packet.
|
||||
ReleaseWriteIndex(write_index, num_fill_packet);
|
||||
|
||||
// Wait for the packet to finish.
|
||||
if (HSA::hsa_signal_wait_acquire(completion_signal_, HSA_SIGNAL_CONDITION_LT,
|
||||
1, uint64_t(-1),
|
||||
HSA_WAIT_STATE_ACTIVE) != 0) {
|
||||
// Signal wait returned unexpected value.
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
uint64_t BlitKernel::AcquireWriteIndex(uint32_t num_packet) {
|
||||
assert(queue_->size >= num_packet);
|
||||
|
||||
uint64_t write_index =
|
||||
HSA::hsa_queue_add_write_index_acq_rel(queue_, num_packet);
|
||||
|
||||
while (true) {
|
||||
// Wait until we have room in the queue;
|
||||
const uint64_t read_index = HSA::hsa_queue_load_read_index_relaxed(queue_);
|
||||
if ((write_index - read_index) < queue_->size) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return write_index;
|
||||
}
|
||||
|
||||
void BlitKernel::ReleaseWriteIndex(uint64_t write_index, uint32_t num_packet) {
|
||||
// Launch packet.
|
||||
while (true) {
|
||||
// Make sure that the address before ::current_offset is already released.
|
||||
// Otherwise the packet processor may read invalid packets.
|
||||
uint64_t expected_offset = write_index;
|
||||
if (atomic::Cas(&cached_index_, write_index + num_packet, expected_offset,
|
||||
std::memory_order_release) == expected_offset) {
|
||||
// Update doorbel register with last packet id.
|
||||
HSA::hsa_signal_store_release(queue_->doorbell_signal,
|
||||
write_index + num_packet - 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t BlitKernel::FenceRelease(uint64_t write_index,
|
||||
uint32_t num_copy_packet,
|
||||
hsa_fence_scope_t fence) {
|
||||
// This function is not thread safe.
|
||||
|
||||
const uint16_t kBarrierPacketHeader =
|
||||
(HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(fence << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
|
||||
hsa_barrier_and_packet_t packet = {0};
|
||||
packet.header = kInvalidPacketHeader;
|
||||
|
||||
HSA::hsa_signal_store_relaxed(completion_signal_, 1);
|
||||
packet.completion_signal = completion_signal_;
|
||||
|
||||
if (num_copy_packet == 0) {
|
||||
assert(write_index == 0);
|
||||
// Reserve write index.
|
||||
write_index = AcquireWriteIndex(1);
|
||||
}
|
||||
|
||||
// Populate queue buffer with AQL packet.
|
||||
hsa_barrier_and_packet_t* queue_buffer =
|
||||
reinterpret_cast<hsa_barrier_and_packet_t*>(queue_->base_address);
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
queue_buffer[(write_index + num_copy_packet) & queue_bitmask_] = packet;
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
queue_buffer[(write_index + num_copy_packet) & queue_bitmask_].header =
|
||||
kBarrierPacketHeader;
|
||||
|
||||
// Launch packet.
|
||||
ReleaseWriteIndex(write_index, num_copy_packet + 1);
|
||||
|
||||
// Wait for the packet to finish.
|
||||
if (HSA::hsa_signal_wait_acquire(packet.completion_signal,
|
||||
HSA_SIGNAL_CONDITION_LT, 1, uint64_t(-1),
|
||||
HSA_WAIT_STATE_ACTIVE) != 0) {
|
||||
// Signal wait returned unexpected value.
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void BlitKernel::PopulateQueue(uint64_t index, uint64_t code_handle, void* args,
|
||||
uint32_t grid_size_x,
|
||||
hsa_signal_t completion_signal) {
|
||||
assert(IsMultipleOf(args, 16));
|
||||
|
||||
hsa_kernel_dispatch_packet_t packet = {0};
|
||||
|
||||
static const uint16_t kDispatchPacketHeader =
|
||||
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
||||
(((completion_signal.handle != 0) ? 1 : 0) << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
|
||||
packet.header = kInvalidPacketHeader;
|
||||
packet.kernel_object = code_handle;
|
||||
packet.kernarg_address = args;
|
||||
|
||||
// Setup working size.
|
||||
const int kNumDimension = 1;
|
||||
packet.setup = kNumDimension << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
|
||||
packet.grid_size_x = AlignUp(static_cast<uint32_t>(grid_size_x), kGroupSize);
|
||||
packet.grid_size_y = packet.grid_size_z = 1;
|
||||
packet.workgroup_size_x = kGroupSize;
|
||||
packet.workgroup_size_y = packet.workgroup_size_z = 1;
|
||||
|
||||
packet.completion_signal = completion_signal;
|
||||
|
||||
// Populate queue buffer with AQL packet.
|
||||
hsa_kernel_dispatch_packet_t* queue_buffer =
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_->base_address);
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
queue_buffer[index & queue_bitmask_] = packet;
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
queue_buffer[index & queue_bitmask_].header = kDispatchPacketHeader;
|
||||
}
|
||||
|
||||
BlitKernel::KernelCopyArgs* BlitKernel::ObtainAsyncKernelCopyArg() {
|
||||
const uint32_t index =
|
||||
atomic::Add(&kernarg_async_counter_, 1U, std::memory_order_acquire);
|
||||
KernelCopyArgs* arg = &kernarg_async_[index & kernarg_async_mask_];
|
||||
assert(IsMultipleOf(arg, 16));
|
||||
return arg;
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
@@ -1,858 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/amd_blit_sdma.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "core/inc/amd_gpu_agent.h"
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/signal.h"
|
||||
|
||||
namespace amd {
|
||||
// SDMA packet for VI device.
|
||||
// Reference: http://people.freedesktop.org/~agd5f/dma_packets.txt
|
||||
|
||||
const unsigned int SDMA_OP_COPY = 1;
|
||||
const unsigned int SDMA_OP_FENCE = 5;
|
||||
const unsigned int SDMA_OP_POLL_REGMEM = 8;
|
||||
const unsigned int SDMA_OP_ATOMIC = 10;
|
||||
const unsigned int SDMA_OP_CONST_FILL = 11;
|
||||
const unsigned int SDMA_SUBOP_COPY_LINEAR = 0;
|
||||
const unsigned int SDMA_ATOMIC_ADD64 = 47;
|
||||
|
||||
typedef struct SDMA_PKT_COPY_LINEAR_TAG {
|
||||
union {
|
||||
struct {
|
||||
unsigned int op : 8;
|
||||
unsigned int sub_op : 8;
|
||||
unsigned int extra_info : 16;
|
||||
};
|
||||
unsigned int DW_0_DATA;
|
||||
} HEADER_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int count : 22;
|
||||
unsigned int reserved_0 : 10;
|
||||
};
|
||||
unsigned int DW_1_DATA;
|
||||
} COUNT_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int reserved_0 : 16;
|
||||
unsigned int dst_swap : 2;
|
||||
unsigned int reserved_1 : 6;
|
||||
unsigned int src_swap : 2;
|
||||
unsigned int reserved_2 : 6;
|
||||
};
|
||||
unsigned int DW_2_DATA;
|
||||
} PARAMETER_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int src_addr_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_3_DATA;
|
||||
} SRC_ADDR_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int src_addr_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_4_DATA;
|
||||
} SRC_ADDR_HI_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int dst_addr_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_5_DATA;
|
||||
} DST_ADDR_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int dst_addr_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_6_DATA;
|
||||
} DST_ADDR_HI_UNION;
|
||||
} SDMA_PKT_COPY_LINEAR;
|
||||
|
||||
typedef struct SDMA_PKT_CONSTANT_FILL_TAG {
|
||||
union {
|
||||
struct {
|
||||
unsigned int op : 8;
|
||||
unsigned int sub_op : 8;
|
||||
unsigned int sw : 2;
|
||||
unsigned int reserved_0 : 12;
|
||||
unsigned int fillsize : 2;
|
||||
};
|
||||
unsigned int DW_0_DATA;
|
||||
} HEADER_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int dst_addr_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_1_DATA;
|
||||
} DST_ADDR_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int dst_addr_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_2_DATA;
|
||||
} DST_ADDR_HI_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int src_data_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_3_DATA;
|
||||
} DATA_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int count : 22;
|
||||
unsigned int reserved_0 : 10;
|
||||
};
|
||||
unsigned int DW_4_DATA;
|
||||
} COUNT_UNION;
|
||||
} SDMA_PKT_CONSTANT_FILL;
|
||||
|
||||
typedef struct SDMA_PKT_FENCE_TAG {
|
||||
union {
|
||||
struct {
|
||||
unsigned int op : 8;
|
||||
unsigned int sub_op : 8;
|
||||
unsigned int reserved_0 : 16;
|
||||
};
|
||||
unsigned int DW_0_DATA;
|
||||
} HEADER_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_1_DATA;
|
||||
} ADDR_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_2_DATA;
|
||||
} ADDR_HI_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int data : 32;
|
||||
};
|
||||
unsigned int DW_3_DATA;
|
||||
} DATA_UNION;
|
||||
} SDMA_PKT_FENCE;
|
||||
|
||||
typedef struct SDMA_PKT_POLL_REGMEM_TAG {
|
||||
union {
|
||||
struct {
|
||||
unsigned int op : 8;
|
||||
unsigned int sub_op : 8;
|
||||
unsigned int reserved_0 : 10;
|
||||
unsigned int hdp_flush : 1;
|
||||
unsigned int reserved_1 : 1;
|
||||
unsigned int func : 3;
|
||||
unsigned int mem_poll : 1;
|
||||
};
|
||||
unsigned int DW_0_DATA;
|
||||
} HEADER_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_1_DATA;
|
||||
} ADDR_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_2_DATA;
|
||||
} ADDR_HI_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int value : 32;
|
||||
};
|
||||
unsigned int DW_3_DATA;
|
||||
} VALUE_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int mask : 32;
|
||||
};
|
||||
unsigned int DW_4_DATA;
|
||||
} MASK_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int interval : 16;
|
||||
unsigned int retry_count : 12;
|
||||
unsigned int reserved_0 : 4;
|
||||
};
|
||||
unsigned int DW_5_DATA;
|
||||
} DW5_UNION;
|
||||
} SDMA_PKT_POLL_REGMEM;
|
||||
|
||||
typedef struct SDMA_PKT_ATOMIC_TAG {
|
||||
union {
|
||||
struct {
|
||||
unsigned int op : 8;
|
||||
unsigned int sub_op : 8;
|
||||
unsigned int l : 1;
|
||||
unsigned int reserved_0 : 8;
|
||||
unsigned int operation : 7;
|
||||
};
|
||||
unsigned int DW_0_DATA;
|
||||
} HEADER_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_1_DATA;
|
||||
} ADDR_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_2_DATA;
|
||||
} ADDR_HI_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int src_data_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_3_DATA;
|
||||
} SRC_DATA_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int src_data_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_4_DATA;
|
||||
} SRC_DATA_HI_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int cmp_data_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_5_DATA;
|
||||
} CMP_DATA_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int cmp_data_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_6_DATA;
|
||||
} CMP_DATA_HI_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int loop_interval : 13;
|
||||
unsigned int reserved_0 : 19;
|
||||
};
|
||||
unsigned int DW_7_DATA;
|
||||
} LOOP_UNION;
|
||||
} SDMA_PKT_ATOMIC;
|
||||
|
||||
inline uint32_t ptrlow32(const void* p) {
|
||||
return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p));
|
||||
}
|
||||
|
||||
inline uint32_t ptrhigh32(const void* p) {
|
||||
#if defined(HSA_LARGE_MODEL)
|
||||
return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p) >> 32);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
BlitSdma::BlitSdma()
|
||||
: core::Blit(),
|
||||
queue_size_(0),
|
||||
queue_start_addr_(NULL),
|
||||
fence_base_addr_(NULL),
|
||||
fence_pool_size_(0),
|
||||
fence_pool_counter_(0),
|
||||
cached_reserve_offset_(0),
|
||||
cached_commit_offset_(0) {
|
||||
std::memset(&queue_resource_, 0, sizeof(queue_resource_));
|
||||
}
|
||||
|
||||
BlitSdma::~BlitSdma() {}
|
||||
|
||||
hsa_status_t BlitSdma::Initialize(const core::Agent& agent) {
|
||||
if (queue_start_addr_ != NULL && queue_size_ != 0) {
|
||||
// Already initialized.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
if (agent.device_type() != core::Agent::kAmdGpuDevice) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
linear_copy_command_size_ = sizeof(SDMA_PKT_COPY_LINEAR);
|
||||
fill_command_size_ = sizeof(SDMA_PKT_CONSTANT_FILL);
|
||||
fence_command_size_ = sizeof(SDMA_PKT_FENCE);
|
||||
poll_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM);
|
||||
atomic_command_size_ = sizeof(SDMA_PKT_ATOMIC);
|
||||
|
||||
const uint32_t sync_command_size = fence_command_size_;
|
||||
const uint32_t max_num_copy_command =
|
||||
std::floor((static_cast<uint32_t>(queue_size_) - sync_command_size) /
|
||||
linear_copy_command_size_);
|
||||
const uint32_t max_num_fill_command =
|
||||
std::floor((static_cast<uint32_t>(queue_size_) - sync_command_size) /
|
||||
fill_command_size_);
|
||||
|
||||
max_single_linear_copy_size_ = 0x3fffe0;
|
||||
max_total_linear_copy_size_ = static_cast<size_t>(
|
||||
std::min(static_cast<uint64_t>(SIZE_MAX),
|
||||
static_cast<uint64_t>(max_num_copy_command) *
|
||||
static_cast<uint64_t>(max_single_linear_copy_size_)));
|
||||
|
||||
max_single_fill_size_ = 1 << 22;
|
||||
max_total_fill_size_ = static_cast<size_t>(
|
||||
std::min(static_cast<uint64_t>(SIZE_MAX),
|
||||
static_cast<uint64_t>(max_num_fill_command) *
|
||||
static_cast<uint64_t>(max_single_fill_size_)));
|
||||
|
||||
const amd::GpuAgent& amd_gpu_agent = static_cast<const amd::GpuAgent&>(agent);
|
||||
|
||||
if (amd_gpu_agent.isa()->version() != core::Isa::Version(8, 0, 3)) {
|
||||
assert(false && "Only for Fiji currently");
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// Allocate queue buffer.
|
||||
const size_t kPageSize = 4096;
|
||||
const size_t kSdmaQueueSize = 1024 * 1024;
|
||||
|
||||
queue_size_ = kSdmaQueueSize;
|
||||
|
||||
HsaMemFlags flags;
|
||||
flags.Value = 0;
|
||||
flags.ui32.HostAccess = 1;
|
||||
flags.ui32.AtomicAccessPartial = 1;
|
||||
flags.ui32.ExecuteAccess = 1;
|
||||
|
||||
auto err = hsaKmtAllocMemory(amd_gpu_agent.node_id(), queue_size_, flags,
|
||||
reinterpret_cast<void**>(&queue_start_addr_));
|
||||
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
assert(false && "SDMA queue memory allocation failure.");
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
HSAuint64 alternate_va;
|
||||
err = hsaKmtMapMemoryToGPU(queue_start_addr_, queue_size_, &alternate_va);
|
||||
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
assert(false && "AQL queue memory map failure.");
|
||||
Destroy();
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
std::memset(queue_start_addr_, 0, queue_size_);
|
||||
|
||||
// Access kernel driver to initialize the queue control block
|
||||
// This call binds user mode queue object to underlying compute
|
||||
// device.
|
||||
const GpuAgent& gpu_agent = reinterpret_cast<const GpuAgent&>(agent);
|
||||
const HSA_QUEUE_TYPE kQueueType_ = HSA_QUEUE_SDMA;
|
||||
if (HSAKMT_STATUS_SUCCESS !=
|
||||
hsaKmtCreateQueue(gpu_agent.node_id(), kQueueType_, 100,
|
||||
HSA_QUEUE_PRIORITY_MAXIMUM, queue_start_addr_,
|
||||
queue_size_, NULL, &queue_resource_)) {
|
||||
Destroy();
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
cached_reserve_offset_ = *(queue_resource_.Queue_write_ptr);
|
||||
cached_commit_offset_ = cached_reserve_offset_;
|
||||
|
||||
fence_pool_size_ =
|
||||
static_cast<uint32_t>(std::ceil(kSdmaQueueSize / fence_command_size_));
|
||||
|
||||
fence_pool_mask_ = fence_pool_size_ - 1;
|
||||
|
||||
fence_base_addr_ = reinterpret_cast<uint32_t*>(
|
||||
core::Runtime::runtime_singleton_->system_allocator()(
|
||||
fence_pool_size_ * sizeof(uint32_t), 256));
|
||||
|
||||
if (fence_base_addr_ == NULL) {
|
||||
Destroy();
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t BlitSdma::Destroy(void) {
|
||||
// Release all allocated resources and reset them to zero.
|
||||
|
||||
if (queue_resource_.QueueId != 0) {
|
||||
// Release queue resources from the kernel
|
||||
auto err = hsaKmtDestroyQueue(queue_resource_.QueueId);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS);
|
||||
memset(&queue_resource_, 0, sizeof(queue_resource_));
|
||||
}
|
||||
|
||||
if (queue_start_addr_ != NULL && queue_size_ != 0) {
|
||||
// Release queue buffer.
|
||||
hsaKmtUnmapMemoryToGPU(queue_start_addr_);
|
||||
hsaKmtFreeMemory(queue_start_addr_, queue_size_);
|
||||
}
|
||||
|
||||
if (fence_base_addr_ != NULL) {
|
||||
core::Runtime::runtime_singleton_->system_deallocator()(fence_base_addr_);
|
||||
}
|
||||
|
||||
queue_size_ = 0;
|
||||
queue_start_addr_ = NULL;
|
||||
cached_reserve_offset_ = 0;
|
||||
cached_commit_offset_ = 0;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t BlitSdma::SubmitLinearCopyCommand(void* dst, const void* src,
|
||||
size_t size) {
|
||||
if (size > max_total_linear_copy_size_) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
// Break the copy into multiple copy operation incase the copy size exceeds
|
||||
// the SDMA linear copy limit.
|
||||
const uint32_t num_copy_command = static_cast<uint32_t>(
|
||||
std::ceil(static_cast<double>(size) / max_single_linear_copy_size_));
|
||||
|
||||
const uint32_t total_copy_command_size =
|
||||
num_copy_command * linear_copy_command_size_;
|
||||
|
||||
const uint32_t total_command_size =
|
||||
total_copy_command_size + fence_command_size_;
|
||||
|
||||
const uint32_t kFenceValue = 2015;
|
||||
uint32_t* fence_addr = ObtainFenceObject();
|
||||
*fence_addr = 0;
|
||||
|
||||
char* command_addr = AcquireWriteAddress(total_command_size);
|
||||
char* const command_addr_temp = command_addr;
|
||||
|
||||
if (command_addr == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
BuildCopyCommand(command_addr, num_copy_command, dst, src, size);
|
||||
|
||||
command_addr += total_copy_command_size;
|
||||
|
||||
BuildFenceCommand(command_addr, fence_addr, kFenceValue);
|
||||
|
||||
ReleaseWriteAddress(command_addr_temp, total_command_size);
|
||||
|
||||
WaitFence(fence_addr, kFenceValue);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t BlitSdma::SubmitLinearCopyCommand(
|
||||
void* dst, const void* src, size_t size,
|
||||
std::vector<core::Signal*>& dep_signals, core::Signal& out_signal) {
|
||||
if (size > max_total_linear_copy_size_) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
// The signal is 64 bit value, and poll checks for 32 bit value. So we
|
||||
// need to use two poll operations per dependent signal.
|
||||
const uint32_t num_poll_command =
|
||||
static_cast<uint32_t>(2 * dep_signals.size());
|
||||
const uint32_t total_poll_command_size =
|
||||
(num_poll_command * poll_command_size_);
|
||||
|
||||
// Break the copy into multiple copy operation incase the copy size exceeds
|
||||
// the SDMA linear copy limit.
|
||||
const uint32_t num_copy_command = static_cast<uint32_t>(
|
||||
std::ceil(static_cast<double>(size) / max_single_linear_copy_size_));
|
||||
const uint32_t total_copy_command_size =
|
||||
num_copy_command * linear_copy_command_size_;
|
||||
|
||||
const uint32_t total_command_size =
|
||||
total_poll_command_size + total_copy_command_size + atomic_command_size_ +
|
||||
fence_command_size_;
|
||||
|
||||
const uint32_t kFenceValue = 2015;
|
||||
uint32_t* fence_addr = ObtainFenceObject();
|
||||
*fence_addr = 0;
|
||||
|
||||
char* command_addr = AcquireWriteAddress(total_command_size);
|
||||
char* const command_addr_temp = command_addr;
|
||||
|
||||
if (command_addr == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < dep_signals.size(); ++i) {
|
||||
uint32_t* signal_addr =
|
||||
reinterpret_cast<uint32_t*>(dep_signals[i]->ValueLocation());
|
||||
// Wait for the higher 64 bit to 0.
|
||||
BuildPollCommand(command_addr, &signal_addr[1], 0);
|
||||
command_addr += poll_command_size_;
|
||||
// Then wait for the lower 64 bit to 0.
|
||||
BuildPollCommand(command_addr, &signal_addr[0], 0);
|
||||
command_addr += poll_command_size_;
|
||||
}
|
||||
|
||||
// Do the transfer after all polls are satisfied.
|
||||
BuildCopyCommand(command_addr, num_copy_command, dst, src, size);
|
||||
|
||||
command_addr += total_copy_command_size;
|
||||
|
||||
// After transfer is completed, decrement the signal.
|
||||
BuildAtomicDecrementCommand(command_addr, out_signal.ValueLocation());
|
||||
|
||||
command_addr += atomic_command_size_;
|
||||
|
||||
BuildFenceCommand(command_addr, fence_addr, kFenceValue);
|
||||
|
||||
ReleaseWriteAddress(command_addr_temp, total_command_size);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t BlitSdma::SubmitLinearFillCommand(void* ptr, uint32_t value,
|
||||
size_t count) {
|
||||
const size_t size = count * sizeof(uint32_t);
|
||||
|
||||
if (size > max_total_fill_size_) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
// Break the copy into multiple copy operation incase the copy size exceeds
|
||||
// the SDMA linear copy limit.
|
||||
const uint32_t num_fill_command = static_cast<uint32_t>(
|
||||
std::ceil(static_cast<double>(size) / max_single_fill_size_));
|
||||
|
||||
const uint32_t total_fill_command_size =
|
||||
num_fill_command * fill_command_size_;
|
||||
|
||||
const uint32_t total_command_size =
|
||||
total_fill_command_size + fence_command_size_;
|
||||
|
||||
char* command_addr = AcquireWriteAddress(total_command_size);
|
||||
char* const command_addr_temp = command_addr;
|
||||
|
||||
if (command_addr == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
const uint32_t fill_command_size = fill_command_size_;
|
||||
size_t cur_size = 0;
|
||||
for (uint32_t i = 0; i < num_fill_command; ++i) {
|
||||
const uint32_t fill_size = static_cast<uint32_t>(
|
||||
std::min((size - cur_size), max_single_fill_size_));
|
||||
|
||||
void* cur_ptr = static_cast<char*>(ptr) + cur_size;
|
||||
|
||||
SDMA_PKT_CONSTANT_FILL* packet_addr =
|
||||
reinterpret_cast<SDMA_PKT_CONSTANT_FILL*>(command_addr);
|
||||
|
||||
memset(packet_addr, 0, sizeof(SDMA_PKT_CONSTANT_FILL));
|
||||
|
||||
packet_addr->HEADER_UNION.op = SDMA_OP_CONST_FILL;
|
||||
packet_addr->HEADER_UNION.fillsize = 2; // DW fill
|
||||
|
||||
packet_addr->DST_ADDR_LO_UNION.dst_addr_31_0 = ptrlow32(cur_ptr);
|
||||
packet_addr->DST_ADDR_HI_UNION.dst_addr_63_32 = ptrhigh32(cur_ptr);
|
||||
|
||||
packet_addr->DATA_UNION.src_data_31_0 = value;
|
||||
|
||||
packet_addr->COUNT_UNION.count = fill_size;
|
||||
|
||||
command_addr += fill_command_size;
|
||||
cur_size += fill_size;
|
||||
}
|
||||
|
||||
assert(cur_size == size);
|
||||
|
||||
const uint32_t kFenceValue = 2015;
|
||||
uint32_t* fence_addr = ObtainFenceObject();
|
||||
*fence_addr = 0;
|
||||
|
||||
BuildFenceCommand(command_addr, fence_addr, kFenceValue);
|
||||
|
||||
ReleaseWriteAddress(command_addr_temp, total_command_size);
|
||||
|
||||
WaitFence(fence_addr, kFenceValue);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
char* BlitSdma::AcquireWriteAddress(uint32_t cmd_size) {
|
||||
if (cmd_size > queue_size_) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
uint32_t curr_offset =
|
||||
atomic::Load(&cached_reserve_offset_, std::memory_order_acquire);
|
||||
const uint32_t end_offset = curr_offset + cmd_size;
|
||||
|
||||
if (end_offset >= queue_size_) {
|
||||
// Queue buffer is not enough to contain the new command.
|
||||
|
||||
// The safe space for the new command is the start of the queue buffer to
|
||||
// the last read address.
|
||||
if (atomic::Load(queue_resource_.Queue_read_ptr,
|
||||
std::memory_order_acquire) < cmd_size) {
|
||||
// There is no safe space to use currently.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
WrapQueue(cmd_size);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (atomic::Cas(&cached_reserve_offset_, end_offset, curr_offset,
|
||||
std::memory_order_release) == curr_offset) {
|
||||
return queue_start_addr_ + curr_offset;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void BlitSdma::UpdateWriteAndDoorbellRegister(uint32_t current_offset,
|
||||
uint32_t new_offset) {
|
||||
while (true) {
|
||||
// Make sure that the address before ::current_offset is already released.
|
||||
// Otherwise the CP may read invalid packets.
|
||||
if (atomic::Load(&cached_commit_offset_, std::memory_order_acquire) ==
|
||||
current_offset) {
|
||||
// Update write pointer and doorbel register.
|
||||
atomic::Store(queue_resource_.Queue_write_ptr, new_offset);
|
||||
atomic::Store(queue_resource_.Queue_DoorBell, new_offset);
|
||||
atomic::Store(&cached_commit_offset_, new_offset,
|
||||
std::memory_order_release);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BlitSdma::ReleaseWriteAddress(char* cmd_addr, uint32_t cmd_size) {
|
||||
assert(cmd_addr != NULL);
|
||||
assert(cmd_addr >= queue_start_addr_);
|
||||
|
||||
if (cmd_size > queue_size_) {
|
||||
assert(false && "cmd_addr is outside the queue buffer range");
|
||||
return;
|
||||
}
|
||||
|
||||
// Update write register.
|
||||
const uint32_t curent_offset = cmd_addr - queue_start_addr_;
|
||||
const uint32_t new_offset = curent_offset + cmd_size;
|
||||
UpdateWriteAndDoorbellRegister(curent_offset, new_offset);
|
||||
}
|
||||
|
||||
void BlitSdma::WrapQueue(uint32_t cmd_size) {
|
||||
// Re-determine the offset into queue buffer where NOOP instructions
|
||||
// should be written.
|
||||
while (true) {
|
||||
uint32_t curent_offset =
|
||||
atomic::Load(&cached_reserve_offset_, std::memory_order_acquire);
|
||||
const uint32_t end_offset = curent_offset + cmd_size;
|
||||
if (end_offset < queue_size_) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> guard(wrap_lock_);
|
||||
|
||||
if (atomic::Cas(&cached_reserve_offset_, queue_size_ + 1, curent_offset,
|
||||
std::memory_order_release) == curent_offset) {
|
||||
// Wait till all reserved packets are commited.
|
||||
while (atomic::Load(&cached_commit_offset_, std::memory_order_acquire) !=
|
||||
curent_offset) {
|
||||
os::YieldThread();
|
||||
}
|
||||
|
||||
// Fill the remainder of the queue with NOOP commands.
|
||||
char* noop_address = queue_start_addr_ + curent_offset;
|
||||
const size_t noop_commands_size = queue_size_ - curent_offset;
|
||||
memset(noop_address, 0, noop_commands_size);
|
||||
|
||||
// Update write and doorbell registers to execute NOOP instructions.
|
||||
UpdateWriteAndDoorbellRegister(curent_offset, 0);
|
||||
|
||||
// Wait till queue wrapped.
|
||||
while (atomic::Load(queue_resource_.Queue_read_ptr,
|
||||
std::memory_order_acquire) != 0) {
|
||||
os::YieldThread();
|
||||
}
|
||||
|
||||
// Open access to queue.
|
||||
atomic::Store(&cached_reserve_offset_, 0U, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BlitSdma::BuildFenceCommand(char* fence_command_addr, uint32_t* fence,
|
||||
uint32_t fence_value) {
|
||||
assert(fence_command_addr != NULL);
|
||||
SDMA_PKT_FENCE* packet_addr =
|
||||
reinterpret_cast<SDMA_PKT_FENCE*>(fence_command_addr);
|
||||
|
||||
memset(packet_addr, 0, sizeof(SDMA_PKT_FENCE));
|
||||
|
||||
packet_addr->HEADER_UNION.op = SDMA_OP_FENCE;
|
||||
|
||||
packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(fence);
|
||||
|
||||
packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(fence);
|
||||
|
||||
packet_addr->DATA_UNION.data = fence_value;
|
||||
}
|
||||
|
||||
uint32_t* BlitSdma::ObtainFenceObject() {
|
||||
const uint32_t fence_index =
|
||||
atomic::Add(&fence_pool_counter_, 1U, std::memory_order_acquire);
|
||||
uint32_t* fence_addr = &fence_base_addr_[fence_index & fence_pool_mask_];
|
||||
assert(IsMultipleOf(fence_addr, 4));
|
||||
return fence_addr;
|
||||
}
|
||||
|
||||
void BlitSdma::WaitFence(uint32_t* fence, uint32_t fence_value) {
|
||||
int spin_count = 51;
|
||||
while (atomic::Load(fence, std::memory_order_acquire) != fence_value) {
|
||||
if (--spin_count > 0) {
|
||||
continue;
|
||||
}
|
||||
os::YieldThread();
|
||||
}
|
||||
}
|
||||
|
||||
void BlitSdma::BuildCopyCommand(char* cmd_addr, uint32_t num_copy_command,
|
||||
void* dst, const void* src, size_t size) {
|
||||
size_t cur_size = 0;
|
||||
for (uint32_t i = 0; i < num_copy_command; ++i) {
|
||||
const uint32_t copy_size = static_cast<uint32_t>(
|
||||
std::min((size - cur_size), max_single_linear_copy_size_));
|
||||
|
||||
void* cur_dst = static_cast<char*>(dst) + cur_size;
|
||||
const void* cur_src = static_cast<const char*>(src) + cur_size;
|
||||
|
||||
SDMA_PKT_COPY_LINEAR* packet_addr =
|
||||
reinterpret_cast<SDMA_PKT_COPY_LINEAR*>(cmd_addr);
|
||||
|
||||
memset(packet_addr, 0, sizeof(SDMA_PKT_COPY_LINEAR));
|
||||
|
||||
packet_addr->HEADER_UNION.op = SDMA_OP_COPY;
|
||||
packet_addr->HEADER_UNION.sub_op = SDMA_SUBOP_COPY_LINEAR;
|
||||
|
||||
packet_addr->COUNT_UNION.count = copy_size;
|
||||
|
||||
packet_addr->SRC_ADDR_LO_UNION.src_addr_31_0 = ptrlow32(cur_src);
|
||||
packet_addr->SRC_ADDR_HI_UNION.src_addr_63_32 = ptrhigh32(cur_src);
|
||||
|
||||
packet_addr->DST_ADDR_LO_UNION.dst_addr_31_0 = ptrlow32(cur_dst);
|
||||
packet_addr->DST_ADDR_HI_UNION.dst_addr_63_32 = ptrhigh32(cur_dst);
|
||||
|
||||
cmd_addr += linear_copy_command_size_;
|
||||
cur_size += copy_size;
|
||||
}
|
||||
|
||||
assert(cur_size == size);
|
||||
}
|
||||
|
||||
void BlitSdma::BuildPollCommand(char* cmd_addr, void* addr,
|
||||
uint32_t reference) {
|
||||
SDMA_PKT_POLL_REGMEM* packet_addr =
|
||||
reinterpret_cast<SDMA_PKT_POLL_REGMEM*>(cmd_addr);
|
||||
|
||||
memset(packet_addr, 0, sizeof(SDMA_PKT_POLL_REGMEM));
|
||||
|
||||
packet_addr->HEADER_UNION.op = SDMA_OP_POLL_REGMEM;
|
||||
packet_addr->HEADER_UNION.mem_poll = 1;
|
||||
packet_addr->HEADER_UNION.func = 0x3; // IsEqual.
|
||||
packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(addr);
|
||||
packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(addr);
|
||||
|
||||
packet_addr->VALUE_UNION.value = reference;
|
||||
|
||||
packet_addr->MASK_UNION.mask = 0xffffffff; // Compare the whole content.
|
||||
|
||||
packet_addr->DW5_UNION.interval = 0x04;
|
||||
packet_addr->DW5_UNION.retry_count = 0xfff; // Retry forever.
|
||||
}
|
||||
|
||||
void BlitSdma::BuildAtomicDecrementCommand(char* cmd_addr, void* addr) {
|
||||
SDMA_PKT_ATOMIC* packet_addr = reinterpret_cast<SDMA_PKT_ATOMIC*>(cmd_addr);
|
||||
|
||||
memset(packet_addr, 0, sizeof(SDMA_PKT_ATOMIC));
|
||||
|
||||
packet_addr->HEADER_UNION.op = SDMA_OP_ATOMIC;
|
||||
packet_addr->HEADER_UNION.operation = SDMA_ATOMIC_ADD64;
|
||||
|
||||
packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(addr);
|
||||
packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(addr);
|
||||
|
||||
packet_addr->SRC_DATA_LO_UNION.src_data_31_0 = 0xffffffff;
|
||||
packet_addr->SRC_DATA_HI_UNION.src_data_63_32 = 0xffffffff;
|
||||
}
|
||||
} // namespace amd
|
||||
@@ -1,329 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/amd_cpu_agent.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
#include "core/inc/host_queue.h"
|
||||
|
||||
#include "hsa_ext_image.h"
|
||||
|
||||
namespace amd {
|
||||
CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props)
|
||||
: core::Agent(node, kAmdCpuDevice), properties_(node_props) {
|
||||
InitRegionList();
|
||||
|
||||
InitCacheList();
|
||||
}
|
||||
|
||||
CpuAgent::~CpuAgent() {
|
||||
std::for_each(regions_.begin(), regions_.end(), DeleteObject());
|
||||
regions_.clear();
|
||||
}
|
||||
|
||||
void CpuAgent::InitRegionList() {
|
||||
const bool is_apu_node = (properties_.NumFComputeCores > 0);
|
||||
|
||||
std::vector<HsaMemoryProperties> mem_props(properties_.NumMemoryBanks);
|
||||
if (HSAKMT_STATUS_SUCCESS ==
|
||||
hsaKmtGetNodeMemoryProperties(node_id(), properties_.NumMemoryBanks,
|
||||
&mem_props[0])) {
|
||||
std::vector<HsaMemoryProperties>::iterator system_prop =
|
||||
std::find_if(mem_props.begin(), mem_props.end(),
|
||||
[](HsaMemoryProperties prop) -> bool {
|
||||
return (prop.SizeInBytes > 0 && prop.HeapType == HSA_HEAPTYPE_SYSTEM);
|
||||
});
|
||||
|
||||
if (system_prop != mem_props.end()) {
|
||||
MemoryRegion* system_region_fine =
|
||||
new MemoryRegion(true, is_apu_node, this, *system_prop);
|
||||
|
||||
regions_.push_back(system_region_fine);
|
||||
|
||||
if (!is_apu_node) {
|
||||
MemoryRegion* system_region_coarse =
|
||||
new MemoryRegion(false, is_apu_node, this, *system_prop);
|
||||
|
||||
regions_.push_back(system_region_coarse);
|
||||
}
|
||||
} else {
|
||||
HsaMemoryProperties system_props;
|
||||
std::memset(&system_props, 0, sizeof(HsaMemoryProperties));
|
||||
|
||||
const uintptr_t system_base = os::GetUserModeVirtualMemoryBase();
|
||||
const size_t system_physical_size = os::GetUsablePhysicalHostMemorySize();
|
||||
assert(system_physical_size != 0);
|
||||
|
||||
system_props.HeapType = HSA_HEAPTYPE_SYSTEM;
|
||||
system_props.SizeInBytes = (HSAuint64)system_physical_size;
|
||||
system_props.VirtualBaseAddress = (HSAuint64)(system_base);
|
||||
|
||||
MemoryRegion* system_region =
|
||||
new MemoryRegion(true, is_apu_node, this, system_props);
|
||||
|
||||
regions_.push_back(system_region);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CpuAgent::InitCacheList() {
|
||||
// Get CPU cache information.
|
||||
cache_props_.resize(properties_.NumCaches);
|
||||
if (HSAKMT_STATUS_SUCCESS !=
|
||||
hsaKmtGetNodeCacheProperties(node_id(), properties_.CComputeIdLo,
|
||||
properties_.NumCaches, &cache_props_[0])) {
|
||||
cache_props_.clear();
|
||||
} else {
|
||||
// Only store CPU D-cache.
|
||||
for (size_t cache_id = 0; cache_id < cache_props_.size(); ++cache_id) {
|
||||
const HsaCacheType type = cache_props_[cache_id].CacheType;
|
||||
if (type.ui32.CPU != 1 || type.ui32.Instruction == 1) {
|
||||
cache_props_.erase(cache_props_.begin() + cache_id);
|
||||
--cache_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t CpuAgent::VisitRegion(bool include_peer,
|
||||
hsa_status_t (*callback)(hsa_region_t region,
|
||||
void* data),
|
||||
void* data) const {
|
||||
if (!include_peer) {
|
||||
return VisitRegion(regions_, callback, data);
|
||||
}
|
||||
|
||||
// Expose all system regions in the system.
|
||||
hsa_status_t stat = VisitRegion(
|
||||
core::Runtime::runtime_singleton_->system_regions_fine(), callback, data);
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
return stat;
|
||||
}
|
||||
|
||||
return VisitRegion(core::Runtime::runtime_singleton_->system_regions_coarse(),
|
||||
callback, data);
|
||||
}
|
||||
|
||||
hsa_status_t CpuAgent::VisitRegion(
|
||||
const std::vector<const core::MemoryRegion*>& regions,
|
||||
hsa_status_t (*callback)(hsa_region_t region, void* data),
|
||||
void* data) const {
|
||||
for (const core::MemoryRegion* region : regions) {
|
||||
hsa_region_t region_handle = core::MemoryRegion::Convert(region);
|
||||
hsa_status_t status = callback(region_handle, data);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t CpuAgent::IterateRegion(
|
||||
hsa_status_t (*callback)(hsa_region_t region, void* data),
|
||||
void* data) const {
|
||||
return VisitRegion(true, callback, data);
|
||||
}
|
||||
|
||||
hsa_status_t CpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
|
||||
const size_t kNameSize = 64; // agent, and vendor name size limit
|
||||
|
||||
const size_t attribute_u = static_cast<size_t>(attribute);
|
||||
switch (attribute_u) {
|
||||
case HSA_AGENT_INFO_NAME:
|
||||
// TODO: hardcode for now, wait until SWDEV-88894 implemented
|
||||
std::memset(value, 0, kNameSize);
|
||||
std::memcpy(value, "CPU Device", sizeof("CPU Device"));
|
||||
break;
|
||||
case HSA_AGENT_INFO_VENDOR_NAME:
|
||||
// TODO: hardcode for now, wait until SWDEV-88894 implemented
|
||||
std::memset(value, 0, kNameSize);
|
||||
std::memcpy(value, "CPU", sizeof("CPU"));
|
||||
break;
|
||||
case HSA_AGENT_INFO_FEATURE:
|
||||
*((hsa_agent_feature_t*)value) = static_cast<hsa_agent_feature_t>(0);
|
||||
break;
|
||||
case HSA_AGENT_INFO_MACHINE_MODEL:
|
||||
#if defined(HSA_LARGE_MODEL)
|
||||
*((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_LARGE;
|
||||
#else
|
||||
*((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_SMALL;
|
||||
#endif
|
||||
break;
|
||||
case HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES:
|
||||
case HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE:
|
||||
// TODO: validate if this is true.
|
||||
*((hsa_default_float_rounding_mode_t*)value) =
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR;
|
||||
break;
|
||||
case HSA_AGENT_INFO_FAST_F16_OPERATION:
|
||||
// TODO: validate if this is trye.
|
||||
*((bool*)value) = false;
|
||||
break;
|
||||
case HSA_AGENT_INFO_PROFILE:
|
||||
*((hsa_profile_t*)value) = HSA_PROFILE_FULL;
|
||||
break;
|
||||
case HSA_AGENT_INFO_WAVEFRONT_SIZE:
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
case HSA_AGENT_INFO_WORKGROUP_MAX_DIM:
|
||||
std::memset(value, 0, sizeof(uint16_t) * 3);
|
||||
break;
|
||||
case HSA_AGENT_INFO_WORKGROUP_MAX_SIZE:
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
case HSA_AGENT_INFO_GRID_MAX_DIM:
|
||||
std::memset(value, 0, sizeof(hsa_dim3_t));
|
||||
break;
|
||||
case HSA_AGENT_INFO_GRID_MAX_SIZE:
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
case HSA_AGENT_INFO_FBARRIER_MAX_SIZE:
|
||||
// TODO: ?
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
case HSA_AGENT_INFO_QUEUES_MAX:
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
case HSA_AGENT_INFO_QUEUE_MIN_SIZE:
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
case HSA_AGENT_INFO_QUEUE_MAX_SIZE:
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
case HSA_AGENT_INFO_QUEUE_TYPE:
|
||||
*((hsa_queue_type_t*)value) = static_cast<hsa_queue_type_t>(0);
|
||||
break;
|
||||
case HSA_AGENT_INFO_NODE:
|
||||
// TODO: associate with OS NUMA support (numactl / GetNumaProcessorNode).
|
||||
*((uint32_t*)value) = node_id();
|
||||
break;
|
||||
case HSA_AGENT_INFO_DEVICE:
|
||||
*((hsa_device_type_t*)value) = HSA_DEVICE_TYPE_CPU;
|
||||
break;
|
||||
case HSA_AGENT_INFO_CACHE_SIZE: {
|
||||
std::memset(value, 0, sizeof(uint32_t) * 4);
|
||||
|
||||
assert(cache_props_.size() > 0 && "CPU cache info missing.");
|
||||
const size_t num_cache = cache_props_.size();
|
||||
for (size_t i = 0; i < num_cache; ++i) {
|
||||
const uint32_t line_level = cache_props_[i].CacheLevel;
|
||||
((uint32_t*)value)[line_level - 1] = cache_props_[i].CacheSize * 1024;
|
||||
}
|
||||
} break;
|
||||
case HSA_AGENT_INFO_ISA:
|
||||
((hsa_isa_t*)value)->handle = 0;
|
||||
break;
|
||||
case HSA_AGENT_INFO_EXTENSIONS:
|
||||
memset(value, 0, sizeof(uint8_t) * 128);
|
||||
break;
|
||||
case HSA_AGENT_INFO_VERSION_MAJOR:
|
||||
*((uint16_t*)value) = 1;
|
||||
break;
|
||||
case HSA_AGENT_INFO_VERSION_MINOR:
|
||||
*((uint16_t*)value) = 0;
|
||||
break;
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS:
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS:
|
||||
memset(value, 0, sizeof(uint32_t) * 2);
|
||||
break;
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS:
|
||||
memset(value, 0, sizeof(uint32_t) * 3);
|
||||
break;
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS:
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
case HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES:
|
||||
case HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES:
|
||||
case HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS:
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_CHIP_ID:
|
||||
*((uint32_t*)value) = properties_.DeviceId;
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_CACHELINE_SIZE:
|
||||
// TODO: hardcode for now.
|
||||
*((uint32_t*)value) = 64;
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT:
|
||||
*((uint32_t*)value) = properties_.NumCPUCores;
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY:
|
||||
*((uint32_t*)value) = properties_.MaxEngineClockMhzCCompute;
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_DRIVER_NODE_ID:
|
||||
*((uint32_t*)value) = node_id();
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS:
|
||||
*((uint32_t*)value) = static_cast<uint32_t>(
|
||||
1 << properties_.Capability.ui32.WatchPointsTotalBits);
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_BDFID:
|
||||
*((uint32_t*)value) = static_cast<uint32_t>(properties_.LocationId);
|
||||
break;
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
break;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t CpuAgent::QueueCreate(size_t size, hsa_queue_type_t queue_type,
|
||||
core::HsaEventCallback event_callback,
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
core::Queue** queue) {
|
||||
// No HW AQL packet processor on CPU device.
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
@@ -1,863 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/amd_gpu_agent.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cstring>
|
||||
#include <climits>
|
||||
#include <vector>
|
||||
|
||||
#include "core/inc/amd_aql_queue.h"
|
||||
#include "core/inc/amd_blit_kernel.h"
|
||||
#include "core/inc/amd_blit_sdma.h"
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
#include "core/inc/interrupt_signal.h"
|
||||
#include "core/inc/isa.h"
|
||||
#include "core/inc/runtime.h"
|
||||
|
||||
#include "utils/sp3/sp3.h"
|
||||
|
||||
#include "hsa_ext_image.h"
|
||||
|
||||
// Size of scratch (private) segment pre-allocated per thread, in bytes.
|
||||
#define DEFAULT_SCRATCH_BYTES_PER_THREAD 2048
|
||||
|
||||
namespace amd {
|
||||
GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props)
|
||||
: GpuAgentInt(node),
|
||||
properties_(node_props),
|
||||
current_coherency_type_(HSA_AMD_COHERENCY_TYPE_COHERENT),
|
||||
blit_(NULL),
|
||||
is_kv_device_(false),
|
||||
trap_code_buf_(NULL),
|
||||
trap_code_buf_size_(0),
|
||||
ape1_base_(0),
|
||||
ape1_size_(0) {
|
||||
const bool is_apu_node = (properties_.NumCPUCores > 0);
|
||||
profile_ = (is_apu_node) ? HSA_PROFILE_FULL : HSA_PROFILE_BASE;
|
||||
|
||||
HSAKMT_STATUS err = hsaKmtGetClockCounters(node_id(), &t0_);
|
||||
t1_ = t0_;
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaGetClockCounters error");
|
||||
|
||||
// Set instruction set architecture via node property, only on GPU device.
|
||||
isa_ = (core::Isa*)core::IsaRegistry::GetIsa(core::Isa::Version(
|
||||
node_props.EngineId.ui32.Major, node_props.EngineId.ui32.Minor,
|
||||
node_props.EngineId.ui32.Stepping));
|
||||
// Check if the device is Kaveri, only on GPU device.
|
||||
if (isa_->GetMajorVersion() == 7 && isa_->GetMinorVersion() == 0 &&
|
||||
isa_->GetStepping() == 0) {
|
||||
is_kv_device_ = true;
|
||||
}
|
||||
|
||||
current_coherency_type((profile_ == HSA_PROFILE_FULL)
|
||||
? HSA_AMD_COHERENCY_TYPE_COHERENT
|
||||
: HSA_AMD_COHERENCY_TYPE_NONCOHERENT);
|
||||
|
||||
max_queues_ =
|
||||
static_cast<uint32_t>(atoi(os::GetEnvVar("HSA_MAX_QUEUES").c_str()));
|
||||
#if !defined(HSA_LARGE_MODEL) || !defined(__linux__)
|
||||
if (max_queues_ == 0) {
|
||||
max_queues_ = 10;
|
||||
}
|
||||
max_queues_ = std::min(10U, max_queues_);
|
||||
#else
|
||||
if (max_queues_ == 0) {
|
||||
max_queues_ = 128;
|
||||
}
|
||||
max_queues_ = std::min(128U, max_queues_);
|
||||
#endif
|
||||
|
||||
// Populate region list.
|
||||
InitRegionList();
|
||||
|
||||
// Reserve memory for scratch.
|
||||
InitScratchPool();
|
||||
|
||||
// Populate cache list.
|
||||
InitCacheList();
|
||||
|
||||
// Bind the second-level trap handler to this node.
|
||||
BindTrapHandler();
|
||||
}
|
||||
|
||||
GpuAgent::~GpuAgent() {
|
||||
if (blit_ != NULL) {
|
||||
hsa_status_t status = blit_->Destroy();
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
delete blit_;
|
||||
blit_ = NULL;
|
||||
}
|
||||
|
||||
if (ape1_base_ != 0) {
|
||||
_aligned_free(reinterpret_cast<void*>(ape1_base_));
|
||||
}
|
||||
|
||||
if (scratch_pool_.base() != NULL) {
|
||||
hsaKmtFreeMemory(scratch_pool_.base(), scratch_pool_.size());
|
||||
}
|
||||
|
||||
if (trap_code_buf_ != NULL) {
|
||||
ReleaseShader(trap_code_buf_, trap_code_buf_size_);
|
||||
}
|
||||
|
||||
std::for_each(regions_.begin(), regions_.end(), DeleteObject());
|
||||
regions_.clear();
|
||||
}
|
||||
|
||||
void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
|
||||
void*& code_buf, size_t& code_buf_size) {
|
||||
#ifdef __linux__ // No VS builds of libsp3 available right now
|
||||
// Assemble source string with libsp3.
|
||||
sp3_context* sp3 = sp3_new();
|
||||
|
||||
switch (isa_->GetMajorVersion()) {
|
||||
case 7:
|
||||
sp3_setasic(sp3, "CI");
|
||||
break;
|
||||
case 8:
|
||||
sp3_setasic(sp3, "VI");
|
||||
break;
|
||||
default:
|
||||
assert(false && "SP3 assembly not supported on this agent");
|
||||
}
|
||||
|
||||
sp3_parse_string(sp3, src_sp3);
|
||||
sp3_shader* code_sp3_meta = sp3_compile(sp3, func_name);
|
||||
|
||||
// Allocate a GPU-visible buffer for the trap shader.
|
||||
HsaMemFlags code_buf_flags = {0};
|
||||
code_buf_flags.ui32.HostAccess = 1;
|
||||
code_buf_flags.ui32.ExecuteAccess = 1;
|
||||
code_buf_flags.ui32.NoSubstitute = 1;
|
||||
|
||||
size_t code_size = code_sp3_meta->size * sizeof(uint32_t);
|
||||
code_buf_size = AlignUp(code_size, 0x1000);
|
||||
|
||||
HSAKMT_STATUS err =
|
||||
hsaKmtAllocMemory(node_id(), code_buf_size, code_buf_flags, &code_buf);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtAllocMemory(Trap) failed");
|
||||
|
||||
err = hsaKmtMapMemoryToGPU(code_buf, code_buf_size, NULL);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtMapMemoryToGPU(Trap) failed");
|
||||
|
||||
// Copy trap handler code into the GPU-visible buffer.
|
||||
memset(code_buf, 0, code_buf_size);
|
||||
memcpy(code_buf, code_sp3_meta->data, code_size);
|
||||
|
||||
// Release SP3 resources.
|
||||
sp3_free_shader(code_sp3_meta);
|
||||
sp3_close(sp3);
|
||||
#endif
|
||||
}
|
||||
|
||||
void GpuAgent::ReleaseShader(void* code_buf, size_t code_buf_size) {
|
||||
hsaKmtUnmapMemoryToGPU(code_buf);
|
||||
hsaKmtFreeMemory(code_buf, code_buf_size);
|
||||
}
|
||||
|
||||
void GpuAgent::InitRegionList() {
|
||||
const bool is_apu_node = (properties_.NumCPUCores > 0);
|
||||
|
||||
std::vector<HsaMemoryProperties> mem_props(properties_.NumMemoryBanks);
|
||||
if (HSAKMT_STATUS_SUCCESS ==
|
||||
hsaKmtGetNodeMemoryProperties(node_id(), properties_.NumMemoryBanks,
|
||||
&mem_props[0])) {
|
||||
for (uint32_t mem_idx = 0; mem_idx < properties_.NumMemoryBanks;
|
||||
++mem_idx) {
|
||||
// Ignore the one(s) with unknown size.
|
||||
if (mem_props[mem_idx].SizeInBytes == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (mem_props[mem_idx].HeapType) {
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
|
||||
if (!is_apu_node) {
|
||||
mem_props[mem_idx].VirtualBaseAddress = 0;
|
||||
}
|
||||
case HSA_HEAPTYPE_GPU_LDS:
|
||||
case HSA_HEAPTYPE_GPU_SCRATCH:
|
||||
case HSA_HEAPTYPE_DEVICE_SVM: {
|
||||
MemoryRegion* region =
|
||||
new MemoryRegion(false, false, this, mem_props[mem_idx]);
|
||||
|
||||
regions_.push_back(region);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GpuAgent::InitScratchPool() {
|
||||
HsaMemFlags flags;
|
||||
flags.Value = 0;
|
||||
flags.ui32.Scratch = 1;
|
||||
flags.ui32.HostAccess = 1;
|
||||
|
||||
scratch_per_thread_ = atoi(os::GetEnvVar("HSA_SCRATCH_MEM").c_str());
|
||||
if (scratch_per_thread_ == 0)
|
||||
scratch_per_thread_ = DEFAULT_SCRATCH_BYTES_PER_THREAD;
|
||||
|
||||
// Scratch length is: waves/CU * threads/wave * queues * #CUs *
|
||||
// scratch/thread
|
||||
const uint32_t num_cu =
|
||||
properties_.NumFComputeCores / properties_.NumSIMDPerCU;
|
||||
queue_scratch_len_ = 0;
|
||||
queue_scratch_len_ = AlignUp(32 * 64 * num_cu * scratch_per_thread_, 65536);
|
||||
size_t max_scratch_len = queue_scratch_len_ * max_queues_;
|
||||
|
||||
#if defined(HSA_LARGE_MODEL) && defined(__linux__)
|
||||
// For 64-bit linux use max queues unless otherwise specified
|
||||
if ((max_scratch_len == 0) || (max_scratch_len > 4294967296)) {
|
||||
max_scratch_len = 4294967296; // 4GB apeture max
|
||||
}
|
||||
#endif
|
||||
|
||||
void* scratch_base;
|
||||
HSAKMT_STATUS err =
|
||||
hsaKmtAllocMemory(node_id(), max_scratch_len, flags, &scratch_base);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtAllocMemory(Scratch) failed");
|
||||
assert(IsMultipleOf(scratch_base, 0x1000) &&
|
||||
"Scratch base is not page aligned!");
|
||||
|
||||
scratch_pool_. ~SmallHeap();
|
||||
if (HSAKMT_STATUS_SUCCESS == err) {
|
||||
new (&scratch_pool_) SmallHeap(scratch_base, max_scratch_len);
|
||||
} else {
|
||||
new (&scratch_pool_) SmallHeap(NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void GpuAgent::InitCacheList() {
|
||||
// Get GPU cache information.
|
||||
// Similar to getting CPU cache but here we use FComputeIdLo.
|
||||
cache_props_.resize(properties_.NumCaches);
|
||||
if (HSAKMT_STATUS_SUCCESS !=
|
||||
hsaKmtGetNodeCacheProperties(node_id(), properties_.FComputeIdLo,
|
||||
properties_.NumCaches, &cache_props_[0])) {
|
||||
cache_props_.clear();
|
||||
} else {
|
||||
// Only store GPU D-cache.
|
||||
for (size_t cache_id = 0; cache_id < cache_props_.size(); ++cache_id) {
|
||||
const HsaCacheType type = cache_props_[cache_id].CacheType;
|
||||
if (type.ui32.HSACU != 1 || type.ui32.Instruction == 1) {
|
||||
cache_props_.erase(cache_props_.begin() + cache_id);
|
||||
--cache_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::IterateRegion(
|
||||
hsa_status_t (*callback)(hsa_region_t region, void* data),
|
||||
void* data) const {
|
||||
return VisitRegion(true, callback, data);
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::VisitRegion(bool include_peer,
|
||||
hsa_status_t (*callback)(hsa_region_t region,
|
||||
void* data),
|
||||
void* data) const {
|
||||
if (include_peer) {
|
||||
// Only expose system, local, and LDS memory of the blit agent.
|
||||
if (this->node_id() ==
|
||||
core::Runtime::runtime_singleton_->blit_agent()->node_id()) {
|
||||
hsa_status_t stat = VisitRegion(regions_, callback, data);
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
return stat;
|
||||
}
|
||||
}
|
||||
|
||||
// Also expose system regions accessible by this agent.
|
||||
hsa_status_t stat =
|
||||
VisitRegion(core::Runtime::runtime_singleton_->system_regions_fine(),
|
||||
callback, data);
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
return stat;
|
||||
}
|
||||
|
||||
return VisitRegion(
|
||||
core::Runtime::runtime_singleton_->system_regions_coarse(), callback,
|
||||
data);
|
||||
}
|
||||
|
||||
// Only expose system, local, and LDS memory of this agent.
|
||||
return VisitRegion(regions_, callback, data);
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::VisitRegion(
|
||||
const std::vector<const core::MemoryRegion*>& regions,
|
||||
hsa_status_t (*callback)(hsa_region_t region, void* data),
|
||||
void* data) const {
|
||||
for (const core::MemoryRegion* region : regions) {
|
||||
const amd::MemoryRegion* amd_region =
|
||||
reinterpret_cast<const amd::MemoryRegion*>(region);
|
||||
|
||||
// Only expose system, local, and LDS memory.
|
||||
if (amd_region->IsSystem() || amd_region->IsLocalMemory() ||
|
||||
amd_region->IsLDS()) {
|
||||
hsa_region_t region_handle = core::MemoryRegion::Convert(region);
|
||||
hsa_status_t status = callback(region_handle, data);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::InitDma() {
|
||||
// Try create SDMA blit first.
|
||||
std::string sdma_enable = os::GetEnvVar("HSA_ENABLE_SDMA");
|
||||
|
||||
if (sdma_enable != "0" && isa_->GetMajorVersion() == 8 &&
|
||||
isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 3) {
|
||||
blit_ = new BlitSdma();
|
||||
|
||||
if (blit_->Initialize(*this) == HSA_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Fall back to blit kernel if SDMA is unavailable.
|
||||
blit_->Destroy();
|
||||
delete blit_;
|
||||
blit_ = NULL;
|
||||
}
|
||||
|
||||
assert(blit_ == NULL);
|
||||
blit_ = new BlitKernel();
|
||||
|
||||
if (blit_->Initialize(*this) != HSA_STATUS_SUCCESS) {
|
||||
blit_->Destroy();
|
||||
delete blit_;
|
||||
blit_ = NULL;
|
||||
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::DmaCopy(void* dst, const void* src, size_t size) {
|
||||
if (blit_ == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
return blit_->SubmitLinearCopyCommand(dst, src, size);
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::DmaCopy(void* dst, const void* src, size_t size,
|
||||
std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& out_signal) {
|
||||
if (blit_ == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
// TODO(bwicakso): temporarily disable wait on thunk event if the out_signal
|
||||
// is an interrupt signal object. Remove this when SDMA handle interrupt
|
||||
// packet properly.
|
||||
if (out_signal.EopEvent() != NULL) {
|
||||
static_cast<core::InterruptSignal&>(out_signal).DisableWaitEvent();
|
||||
}
|
||||
|
||||
return blit_->SubmitLinearCopyCommand(dst, src, size, dep_signals,
|
||||
out_signal);
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::DmaFill(void* ptr, uint32_t value, size_t count) {
|
||||
if (blit_ == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
return blit_->SubmitLinearFillCommand(ptr, value, count);
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
|
||||
const size_t kNameSize = 64; // agent, and vendor name size limit
|
||||
|
||||
const core::ExtensionEntryPoints& extensions =
|
||||
core::Runtime::runtime_singleton_->extensions_;
|
||||
|
||||
hsa_agent_t agent = core::Agent::Convert(this);
|
||||
|
||||
const size_t attribute_u = static_cast<size_t>(attribute);
|
||||
switch (attribute_u) {
|
||||
case HSA_AGENT_INFO_NAME:
|
||||
// TODO(bwicakso): hardcode for now.
|
||||
std::memset(value, 0, kNameSize);
|
||||
if (isa_->GetMajorVersion() == 7) {
|
||||
std::memcpy(value, "Kaveri", sizeof("Kaveri"));
|
||||
} else if (isa_->GetMajorVersion() == 8) {
|
||||
if (isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 2) {
|
||||
std::memcpy(value, "Tonga", sizeof("Tonga"));
|
||||
} else if (isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 3) {
|
||||
std::memcpy(value, "Fiji", sizeof("Fiji"));
|
||||
} else {
|
||||
std::memcpy(value, "Carrizo", sizeof("Carrizo"));
|
||||
}
|
||||
} else {
|
||||
std::memcpy(value, "Unknown", sizeof("Unknown"));
|
||||
}
|
||||
break;
|
||||
case HSA_AGENT_INFO_VENDOR_NAME:
|
||||
std::memset(value, 0, kNameSize);
|
||||
std::memcpy(value, "AMD", sizeof("AMD"));
|
||||
break;
|
||||
case HSA_AGENT_INFO_FEATURE:
|
||||
*((hsa_agent_feature_t*)value) = HSA_AGENT_FEATURE_KERNEL_DISPATCH;
|
||||
break;
|
||||
case HSA_AGENT_INFO_MACHINE_MODEL:
|
||||
#if defined(HSA_LARGE_MODEL)
|
||||
*((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_LARGE;
|
||||
#else
|
||||
*((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_SMALL;
|
||||
#endif
|
||||
break;
|
||||
case HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES:
|
||||
case HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE:
|
||||
*((hsa_default_float_rounding_mode_t*)value) =
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR;
|
||||
break;
|
||||
case HSA_AGENT_INFO_FAST_F16_OPERATION:
|
||||
*((bool*)value) = false;
|
||||
break;
|
||||
case HSA_AGENT_INFO_PROFILE:
|
||||
*((hsa_profile_t*)value) = profile_;
|
||||
break;
|
||||
case HSA_AGENT_INFO_WAVEFRONT_SIZE:
|
||||
*((uint32_t*)value) = properties_.WaveFrontSize;
|
||||
break;
|
||||
case HSA_AGENT_INFO_WORKGROUP_MAX_DIM: {
|
||||
// TODO: must be per-device
|
||||
const uint16_t group_size[3] = {1024, 1024, 1024};
|
||||
std::memcpy(value, group_size, sizeof(group_size));
|
||||
} break;
|
||||
case HSA_AGENT_INFO_WORKGROUP_MAX_SIZE:
|
||||
// TODO: must be per-device
|
||||
*((uint32_t*)value) = 1024;
|
||||
break;
|
||||
case HSA_AGENT_INFO_GRID_MAX_DIM: {
|
||||
const hsa_dim3_t grid_size = {UINT32_MAX, UINT32_MAX, UINT32_MAX};
|
||||
std::memcpy(value, &grid_size, sizeof(hsa_dim3_t));
|
||||
} break;
|
||||
case HSA_AGENT_INFO_GRID_MAX_SIZE:
|
||||
*((uint32_t*)value) = UINT32_MAX;
|
||||
break;
|
||||
case HSA_AGENT_INFO_FBARRIER_MAX_SIZE:
|
||||
// TODO: to confirm
|
||||
*((uint32_t*)value) = 32;
|
||||
break;
|
||||
case HSA_AGENT_INFO_QUEUES_MAX:
|
||||
*((uint32_t*)value) = max_queues_;
|
||||
break;
|
||||
case HSA_AGENT_INFO_QUEUE_MIN_SIZE:
|
||||
*((uint32_t*)value) = minAqlSize_;
|
||||
break;
|
||||
case HSA_AGENT_INFO_QUEUE_MAX_SIZE:
|
||||
*((uint32_t*)value) = maxAqlSize_;
|
||||
break;
|
||||
case HSA_AGENT_INFO_QUEUE_TYPE:
|
||||
*((hsa_queue_type_t*)value) = HSA_QUEUE_TYPE_MULTI;
|
||||
break;
|
||||
case HSA_AGENT_INFO_NODE:
|
||||
// TODO: associate with OS NUMA support (numactl / GetNumaProcessorNode).
|
||||
*((uint32_t*)value) = node_id();
|
||||
break;
|
||||
case HSA_AGENT_INFO_DEVICE:
|
||||
*((hsa_device_type_t*)value) = HSA_DEVICE_TYPE_GPU;
|
||||
break;
|
||||
case HSA_AGENT_INFO_CACHE_SIZE:
|
||||
std::memset(value, 0, sizeof(uint32_t) * 4);
|
||||
// TODO: no GPU cache info from KFD. Hardcode for now.
|
||||
// GCN whitepaper: L1 data cache is 16KB.
|
||||
((uint32_t*)value)[0] = 16 * 1024;
|
||||
break;
|
||||
case HSA_AGENT_INFO_ISA:
|
||||
*((hsa_isa_t*)value) = core::Isa::Handle(isa_);
|
||||
break;
|
||||
case HSA_AGENT_INFO_EXTENSIONS:
|
||||
memset(value, 0, sizeof(uint8_t) * 128);
|
||||
|
||||
if (extensions.table.hsa_ext_program_finalize_fn != NULL) {
|
||||
*((uint8_t*)value) = 1 << HSA_EXTENSION_FINALIZER;
|
||||
}
|
||||
|
||||
if (profile_ == HSA_PROFILE_FULL &&
|
||||
extensions.table.hsa_ext_image_create_fn != NULL) {
|
||||
// TODO(bwicakso): only APU supports images currently.
|
||||
*((uint8_t*)value) |= 1 << HSA_EXTENSION_IMAGES;
|
||||
}
|
||||
|
||||
*((uint8_t*)value) |= 1 << HSA_EXTENSION_AMD_PROFILER;
|
||||
|
||||
break;
|
||||
case HSA_AGENT_INFO_VERSION_MAJOR:
|
||||
*((uint16_t*)value) = 1;
|
||||
break;
|
||||
case HSA_AGENT_INFO_VERSION_MINOR:
|
||||
*((uint16_t*)value) = 0;
|
||||
break;
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS:
|
||||
case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS:
|
||||
return hsa_amd_image_get_info_max_dim(public_handle(), attribute, value);
|
||||
case HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES:
|
||||
// TODO: hardcode based on OCL constants.
|
||||
*((uint32_t*)value) = 128;
|
||||
break;
|
||||
case HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES:
|
||||
// TODO: hardcode based on OCL constants.
|
||||
*((uint32_t*)value) = 64;
|
||||
break;
|
||||
case HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS:
|
||||
// TODO: hardcode based on OCL constants.
|
||||
*((uint32_t*)value) = 16;
|
||||
case HSA_AMD_AGENT_INFO_CHIP_ID:
|
||||
*((uint32_t*)value) = properties_.DeviceId;
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_CACHELINE_SIZE:
|
||||
// TODO: hardcode for now.
|
||||
// GCN whitepaper: cache line size is 64 byte long.
|
||||
*((uint32_t*)value) = 64;
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT:
|
||||
*((uint32_t*)value) =
|
||||
(properties_.NumFComputeCores / properties_.NumSIMDPerCU);
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY:
|
||||
*((uint32_t*)value) = properties_.MaxEngineClockMhzFCompute;
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_DRIVER_NODE_ID:
|
||||
*((uint32_t*)value) = node_id();
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS:
|
||||
*((uint32_t*)value) = static_cast<uint32_t>(
|
||||
1 << properties_.Capability.ui32.WatchPointsTotalBits);
|
||||
break;
|
||||
case HSA_AMD_AGENT_INFO_BDFID:
|
||||
*((uint32_t*)value) = static_cast<uint32_t>(properties_.LocationId);
|
||||
break;
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
break;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::QueueCreate(size_t size, hsa_queue_type_t queue_type,
|
||||
core::HsaEventCallback event_callback,
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
core::Queue** queue) {
|
||||
// AQL queues must be a power of two in length.
|
||||
if (!IsPowerOfTwo(size)) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Enforce max size
|
||||
if (size > maxAqlSize_) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
// Allocate scratch memory
|
||||
ScratchInfo scratch;
|
||||
#if defined(HSA_LARGE_MODEL) && defined(__linux__)
|
||||
if (core::g_use_interrupt_wait) {
|
||||
if (private_segment_size == UINT_MAX) {
|
||||
private_segment_size =
|
||||
(profile_ == HSA_PROFILE_BASE) ? 0 : scratch_per_thread_;
|
||||
}
|
||||
|
||||
if (private_segment_size > 262128) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
scratch.size_per_thread = AlignUp(private_segment_size, 16);
|
||||
if (scratch.size_per_thread > 262128) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
const uint32_t num_cu =
|
||||
properties_.NumFComputeCores / properties_.NumSIMDPerCU;
|
||||
scratch.size = scratch.size_per_thread * 32 * 64 * num_cu;
|
||||
} else {
|
||||
scratch.size = queue_scratch_len_;
|
||||
scratch.size_per_thread = scratch_per_thread_;
|
||||
}
|
||||
#else
|
||||
scratch.size = queue_scratch_len_;
|
||||
scratch.size_per_thread = scratch_per_thread_;
|
||||
#endif
|
||||
scratch.queue_base = NULL;
|
||||
if (scratch.size != 0) {
|
||||
AcquireQueueScratch(scratch);
|
||||
if (scratch.queue_base == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
// Create an HW AQL queue
|
||||
AqlQueue* hw_queue = new AqlQueue(this, size, node_id(), scratch,
|
||||
event_callback, data, is_kv_device_);
|
||||
if (hw_queue && hw_queue->IsValid()) {
|
||||
// return queue
|
||||
*queue = hw_queue;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
// If reached here its always an ERROR.
|
||||
delete hw_queue;
|
||||
ReleaseQueueScratch(scratch.queue_base);
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
void GpuAgent::AcquireQueueScratch(ScratchInfo& scratch) {
|
||||
if (scratch.size == 0) {
|
||||
scratch.size = queue_scratch_len_;
|
||||
scratch.size_per_thread = scratch_per_thread_;
|
||||
}
|
||||
|
||||
ScopedAcquire<KernelMutex> lock(&scratch_lock_);
|
||||
scratch.queue_base = scratch_pool_.alloc(scratch.size);
|
||||
scratch.queue_process_offset =
|
||||
uintptr_t(scratch.queue_base) - uintptr_t(scratch_pool_.base());
|
||||
|
||||
if ((scratch.queue_base != NULL) && (profile_ == HSA_PROFILE_BASE)) {
|
||||
HSAuint64 alternate_va;
|
||||
if (HSAKMT_STATUS_SUCCESS !=
|
||||
hsaKmtMapMemoryToGPU(scratch.queue_base, scratch.size, &alternate_va)) {
|
||||
assert(false && "Map scratch subrange failed!");
|
||||
scratch_pool_.free(scratch.queue_base);
|
||||
scratch.queue_base = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GpuAgent::ReleaseQueueScratch(void* base) {
|
||||
if (base == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
ScopedAcquire<KernelMutex> lock(&scratch_lock_);
|
||||
if (profile_ == HSA_PROFILE_BASE) {
|
||||
if (HSAKMT_STATUS_SUCCESS != hsaKmtUnmapMemoryToGPU(base)) {
|
||||
assert(false && "Unmap scratch subrange failed!");
|
||||
}
|
||||
}
|
||||
scratch_pool_.free(base);
|
||||
}
|
||||
|
||||
void GpuAgent::TranslateTime(core::Signal* signal,
|
||||
hsa_amd_profiling_dispatch_time_t& time) {
|
||||
// Ensure interpolation
|
||||
ScopedAcquire<KernelMutex> lock(&t1_lock_);
|
||||
if (t1_.GPUClockCounter < signal->signal_.end_ts) {
|
||||
SyncClocks();
|
||||
}
|
||||
|
||||
time.start = uint64_t(
|
||||
(double(int64_t(t0_.SystemClockCounter - t1_.SystemClockCounter)) /
|
||||
double(int64_t(t0_.GPUClockCounter - t1_.GPUClockCounter))) *
|
||||
double(int64_t(signal->signal_.start_ts - t1_.GPUClockCounter)) +
|
||||
double(t1_.SystemClockCounter));
|
||||
time.end = uint64_t(
|
||||
(double(int64_t(t0_.SystemClockCounter - t1_.SystemClockCounter)) /
|
||||
double(int64_t(t0_.GPUClockCounter - t1_.GPUClockCounter))) *
|
||||
double(int64_t(signal->signal_.end_ts - t1_.GPUClockCounter)) +
|
||||
double(t1_.SystemClockCounter));
|
||||
}
|
||||
|
||||
uint64_t GpuAgent::TranslateTime(uint64_t tick) {
|
||||
ScopedAcquire<KernelMutex> lock(&t1_lock_);
|
||||
SyncClocks();
|
||||
|
||||
uint64_t system_tick = 0;
|
||||
system_tick = uint64_t(
|
||||
(double(int64_t(t0_.SystemClockCounter - t1_.SystemClockCounter)) /
|
||||
double(int64_t(t0_.GPUClockCounter - t1_.GPUClockCounter))) *
|
||||
double(int64_t(tick - t1_.GPUClockCounter)) +
|
||||
double(t1_.SystemClockCounter));
|
||||
return system_tick;
|
||||
}
|
||||
|
||||
bool GpuAgent::current_coherency_type(hsa_amd_coherency_type_t type) {
|
||||
if (!is_kv_device_) {
|
||||
current_coherency_type_ = type;
|
||||
return true;
|
||||
}
|
||||
|
||||
ScopedAcquire<KernelMutex> Lock(&coherency_lock_);
|
||||
|
||||
if (ape1_base_ == 0 && ape1_size_ == 0) {
|
||||
static const size_t kApe1Alignment = 64 * 1024;
|
||||
ape1_size_ = kApe1Alignment;
|
||||
ape1_base_ = reinterpret_cast<uintptr_t>(
|
||||
_aligned_malloc(ape1_size_, kApe1Alignment));
|
||||
assert((ape1_base_ != 0) && ("APE1 allocation failed"));
|
||||
} else if (type == current_coherency_type_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
HSA_CACHING_TYPE type0, type1;
|
||||
if (type == HSA_AMD_COHERENCY_TYPE_COHERENT) {
|
||||
type0 = HSA_CACHING_CACHED;
|
||||
type1 = HSA_CACHING_NONCACHED;
|
||||
} else {
|
||||
type0 = HSA_CACHING_NONCACHED;
|
||||
type1 = HSA_CACHING_CACHED;
|
||||
}
|
||||
|
||||
if (hsaKmtSetMemoryPolicy(node_id(), type0, type1,
|
||||
reinterpret_cast<void*>(ape1_base_),
|
||||
ape1_size_) != HSAKMT_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
current_coherency_type_ = type;
|
||||
return true;
|
||||
}
|
||||
|
||||
uint16_t GpuAgent::GetMicrocodeVersion() const {
|
||||
return (properties_.EngineId.ui32.uCode);
|
||||
}
|
||||
|
||||
void GpuAgent::SyncClocks() {
|
||||
HSAKMT_STATUS err = hsaKmtGetClockCounters(node_id(), &t1_);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaGetClockCounters error");
|
||||
}
|
||||
|
||||
void GpuAgent::BindTrapHandler() {
|
||||
#ifdef __linux__ // No raw string literal support in VS builds right now
|
||||
const char* src_sp3 = R"(
|
||||
var s_trap_info_lo = ttmp0
|
||||
var s_trap_info_hi = ttmp1
|
||||
var s_tmp0 = ttmp2
|
||||
var s_tmp1 = ttmp3
|
||||
var s_tmp2 = ttmp4
|
||||
var s_tmp3 = ttmp5
|
||||
|
||||
shader TrapHandler
|
||||
type(CS)
|
||||
|
||||
// Retrieve the queue inactive signal.
|
||||
s_load_dwordx2 [s_tmp0, s_tmp1], s[0:1], 0xC0
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
// Mask all but one lane of the wavefront.
|
||||
s_mov_b64 exec, 0x1
|
||||
|
||||
// Set queue signal value to unhandled exception error.
|
||||
s_add_u32 s_tmp0, s_tmp0, 0x8
|
||||
s_addc_u32 s_tmp1, s_tmp1, 0x0
|
||||
v_mov_b32 v0, s_tmp0
|
||||
v_mov_b32 v1, s_tmp1
|
||||
v_mov_b32 v2, 0x80000000
|
||||
v_mov_b32 v3, 0x0
|
||||
flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3]
|
||||
s_waitcnt vmcnt(0)
|
||||
|
||||
// Skip event if the signal was already set to unhandled exception.
|
||||
v_cmp_eq_u64 vcc, v[0:1], v[2:3]
|
||||
s_cbranch_vccnz L_SIGNAL_DONE
|
||||
|
||||
// Check for a non-NULL signal event mailbox.
|
||||
s_load_dwordx2 [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x8
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_and_b64 [s_tmp2, s_tmp3], [s_tmp2, s_tmp3], [s_tmp2, s_tmp3]
|
||||
s_cbranch_scc0 L_SIGNAL_DONE
|
||||
|
||||
// Load the signal event value.
|
||||
s_add_u32 s_tmp0, s_tmp0, 0x10
|
||||
s_addc_u32 s_tmp1, s_tmp1, 0x0
|
||||
s_load_dword s_tmp0, [s_tmp0, s_tmp1], 0x0
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
// Write the signal event value to the mailbox.
|
||||
v_mov_b32 v0, s_tmp2
|
||||
v_mov_b32 v1, s_tmp3
|
||||
v_mov_b32 v2, s_tmp0
|
||||
flat_store_dword v[0:1], v2
|
||||
s_waitcnt vmcnt(0)
|
||||
|
||||
// Send an interrupt to trigger event notification.
|
||||
s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||
|
||||
L_SIGNAL_DONE:
|
||||
// Halt wavefront and exit trap.
|
||||
s_sethalt 1
|
||||
s_rfe_b64 [s_trap_info_lo, s_trap_info_hi]
|
||||
end
|
||||
)";
|
||||
|
||||
if (isa_->GetMajorVersion() == 7) {
|
||||
// No trap handler support on Gfx7, soft error.
|
||||
return;
|
||||
}
|
||||
|
||||
// Disable trap handler on Carrizo until KFD is fixed.
|
||||
if (profile_ == HSA_PROFILE_FULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Assemble the trap handler source code.
|
||||
AssembleShader(src_sp3, "TrapHandler", trap_code_buf_, trap_code_buf_size_);
|
||||
|
||||
// Bind the trap handler to this node.
|
||||
HSAKMT_STATUS err = hsaKmtSetTrapHandler(node_id(), trap_code_buf_,
|
||||
trap_code_buf_size_, NULL, 0);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtSetTrapHandler() failed");
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -1,172 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <cassert>
|
||||
#include "core/inc/amd_hsa_loader.hpp"
|
||||
#include "core/inc/amd_load_map.h"
|
||||
#include "core/inc/runtime.h"
|
||||
|
||||
using amd::hsa::loader::Executable;
|
||||
using amd::hsa::loader::LoadedCodeObject;
|
||||
using amd::hsa::loader::LoadedSegment;
|
||||
|
||||
hsa_status_t amd_executable_load_code_object(
|
||||
hsa_executable_t executable,
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
const char *options,
|
||||
amd_loaded_code_object_t *loaded_code_object)
|
||||
{
|
||||
if (!core::Runtime::runtime_singleton_->IsOpen()) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
if (nullptr == loaded_code_object) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
Executable *exec = Executable::Object(executable);
|
||||
if (nullptr == exec) {
|
||||
return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
|
||||
}
|
||||
return exec->LoadCodeObject(agent, code_object, options, loaded_code_object);
|
||||
}
|
||||
|
||||
hsa_status_t amd_iterate_executables(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data)
|
||||
{
|
||||
if (!core::Runtime::runtime_singleton_->IsOpen()) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
if (nullptr == callback) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
return core::Runtime::runtime_singleton_->loader()->IterateExecutables(callback, data);
|
||||
}
|
||||
|
||||
hsa_status_t amd_executable_iterate_loaded_code_objects(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
amd_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data)
|
||||
{
|
||||
if (!core::Runtime::runtime_singleton_->IsOpen()) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
if (nullptr == callback) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
Executable *exec = Executable::Object(executable);
|
||||
if (nullptr == exec) {
|
||||
return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
|
||||
}
|
||||
return exec->IterateLoadedCodeObjects(callback, data);
|
||||
}
|
||||
|
||||
hsa_status_t amd_loaded_code_object_get_info(
|
||||
amd_loaded_code_object_t loaded_code_object,
|
||||
amd_loaded_code_object_info_t attribute,
|
||||
void *value)
|
||||
{
|
||||
if (!core::Runtime::runtime_singleton_->IsOpen()) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
if (nullptr == value) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
LoadedCodeObject *obj = LoadedCodeObject::Object(loaded_code_object);
|
||||
if (nullptr == obj) {
|
||||
// \todo: new error code: AMD_STATUS_ERROR_INVALID_LOADED_CODE_OBJECT.
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
return false == obj->GetInfo(attribute, value) ?
|
||||
HSA_STATUS_ERROR_INVALID_ARGUMENT : HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t amd_loaded_code_object_iterate_loaded_segments(
|
||||
amd_loaded_code_object_t loaded_code_object,
|
||||
hsa_status_t (*callback)(
|
||||
amd_loaded_segment_t loaded_segment,
|
||||
void *data),
|
||||
void *data)
|
||||
{
|
||||
if (!core::Runtime::runtime_singleton_->IsOpen()) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
if (nullptr == callback) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
LoadedCodeObject *obj = LoadedCodeObject::Object(loaded_code_object);
|
||||
if (nullptr == obj) {
|
||||
// \todo: new error code: AMD_STATUS_ERROR_INVALID_LOADED_CODE_OBJECT.
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
return obj->IterateLoadedSegments(callback, data);
|
||||
}
|
||||
|
||||
hsa_status_t amd_loaded_segment_get_info(
|
||||
amd_loaded_segment_t loaded_segment,
|
||||
amd_loaded_segment_info_t attribute,
|
||||
void *value)
|
||||
{
|
||||
if (!core::Runtime::runtime_singleton_->IsOpen()) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
if (nullptr == value) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
LoadedSegment *obj = LoadedSegment::Object(loaded_segment);
|
||||
if (nullptr == obj) {
|
||||
// \todo: new error code: AMD_STATUS_ERROR_INVALID_LOADED_SEGMENT.
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
return false == obj->GetInfo(attribute, value) ?
|
||||
HSA_STATUS_ERROR_INVALID_ARGUMENT : HSA_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -1,588 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/amd_loader_context.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
#include "core/inc/amd_gpu_agent.h"
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
#include "core/util/os.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <utility>
|
||||
#include "core/inc/hsa_internal.h"
|
||||
#include "core/util/utils.h"
|
||||
#include "inc/hsa_ext_amd.h"
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
bool IsLocalRegion(const core::MemoryRegion *region)
|
||||
{
|
||||
const amd::MemoryRegion *amd_region = (amd::MemoryRegion*)region;
|
||||
if (nullptr == amd_region || !amd_region->IsLocalMemory()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsDebuggerRegistered()
|
||||
{
|
||||
return false;
|
||||
// Leaving code commented as it will be used later on
|
||||
// return (("1" == os::GetEnvVar("HSA_EMULATE_AQL")) &&
|
||||
// (0 != os::GetEnvVar("HSA_TOOLS_LIB").size()));
|
||||
}
|
||||
|
||||
class SegmentMemory {
|
||||
public:
|
||||
virtual ~SegmentMemory() {}
|
||||
virtual void* Address(size_t offset = 0) const = 0;
|
||||
virtual void* HostAddress(size_t offset = 0) const = 0;
|
||||
virtual bool Allocated() const = 0;
|
||||
virtual bool Allocate(size_t size, size_t align, bool zero) = 0;
|
||||
virtual bool Copy(size_t offset, const void *src, size_t size) = 0;
|
||||
virtual void Free() = 0;
|
||||
virtual bool Freeze() = 0;
|
||||
|
||||
protected:
|
||||
SegmentMemory() {}
|
||||
|
||||
private:
|
||||
SegmentMemory(const SegmentMemory&);
|
||||
SegmentMemory& operator=(const SegmentMemory&);
|
||||
};
|
||||
|
||||
class MallocedMemory final: public SegmentMemory {
|
||||
public:
|
||||
MallocedMemory(): SegmentMemory(), ptr_(nullptr), size_(0) {}
|
||||
~MallocedMemory() {}
|
||||
|
||||
void* Address(size_t offset = 0) const override
|
||||
{ assert(this->Allocated()); return (char*)ptr_ + offset; }
|
||||
void* HostAddress(size_t offset = 0) const override
|
||||
{ assert(false); return nullptr; }
|
||||
bool Allocated() const override
|
||||
{ return nullptr != ptr_; }
|
||||
|
||||
bool Allocate(size_t size, size_t align, bool zero) override;
|
||||
bool Copy(size_t offset, const void *src, size_t size) override;
|
||||
void Free() override;
|
||||
bool Freeze() override;
|
||||
|
||||
private:
|
||||
MallocedMemory(const MallocedMemory&);
|
||||
MallocedMemory& operator=(const MallocedMemory&);
|
||||
|
||||
void *ptr_;
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
bool MallocedMemory::Allocate(size_t size, size_t align, bool zero)
|
||||
{
|
||||
assert(!this->Allocated());
|
||||
assert(0 < size);
|
||||
assert(0 < align && 0 == (align & (align - 1)));
|
||||
ptr_ = _aligned_malloc(size, align);
|
||||
if (nullptr == ptr_) {
|
||||
return false;
|
||||
}
|
||||
if (HSA_STATUS_SUCCESS != HSA::hsa_memory_register(ptr_, size)) {
|
||||
_aligned_free(ptr_);
|
||||
ptr_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
if (zero) {
|
||||
memset(ptr_, 0x0, size);
|
||||
}
|
||||
size_ = size;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MallocedMemory::Copy(size_t offset, const void *src, size_t size)
|
||||
{
|
||||
assert(this->Allocated());
|
||||
assert(nullptr != src);
|
||||
assert(0 < size);
|
||||
memcpy(this->Address(offset), src, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void MallocedMemory::Free()
|
||||
{
|
||||
assert(this->Allocated());
|
||||
HSA::hsa_memory_deregister(ptr_, size_);
|
||||
_aligned_free(ptr_);
|
||||
ptr_ = nullptr;
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
bool MallocedMemory::Freeze()
|
||||
{
|
||||
assert(this->Allocated());
|
||||
return true;
|
||||
}
|
||||
|
||||
class MappedMemory final: public SegmentMemory {
|
||||
public:
|
||||
MappedMemory(bool is_kv = false): SegmentMemory(), is_kv_(is_kv), ptr_(nullptr), size_(0) {}
|
||||
~MappedMemory() {}
|
||||
|
||||
void* Address(size_t offset = 0) const override
|
||||
{ assert(this->Allocated()); return (char*)ptr_ + offset; }
|
||||
void* HostAddress(size_t offset = 0) const override
|
||||
{ assert(false); return nullptr; }
|
||||
bool Allocated() const override
|
||||
{ return nullptr != ptr_; }
|
||||
|
||||
bool Allocate(size_t size, size_t align, bool zero) override;
|
||||
bool Copy(size_t offset, const void *src, size_t size) override;
|
||||
void Free() override;
|
||||
bool Freeze() override;
|
||||
|
||||
private:
|
||||
MappedMemory(const MappedMemory&);
|
||||
MappedMemory& operator=(const MappedMemory&);
|
||||
|
||||
bool is_kv_;
|
||||
void *ptr_;
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
bool MappedMemory::Allocate(size_t size, size_t align, bool zero)
|
||||
{
|
||||
assert(!this->Allocated());
|
||||
assert(0 < size);
|
||||
assert(0 < align && 0 == (align & (align - 1)));
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
ptr_ = (void*)VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
|
||||
#else
|
||||
ptr_ = is_kv_ ?
|
||||
mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0) :
|
||||
mmap(nullptr, size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
#endif // _WIN32 || _WIN64
|
||||
if (nullptr == ptr_) {
|
||||
return false;
|
||||
}
|
||||
assert(0 == ((uintptr_t)ptr_) % align);
|
||||
if (HSA_STATUS_SUCCESS != HSA::hsa_memory_register(ptr_, size)) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
VirtualFree(ptr_, size, MEM_DECOMMIT);
|
||||
VirtualFree(ptr_, 0, MEM_RELEASE);
|
||||
#else
|
||||
munmap(ptr_, size);
|
||||
#endif // _WIN32 || _WIN64
|
||||
ptr_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
if (zero) {
|
||||
memset(ptr_, 0x0, size);
|
||||
}
|
||||
size_ = size;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MappedMemory::Copy(size_t offset, const void *src, size_t size)
|
||||
{
|
||||
assert(this->Allocated());
|
||||
assert(nullptr != src);
|
||||
assert(0 < size);
|
||||
memcpy(this->Address(offset), src, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void MappedMemory::Free()
|
||||
{
|
||||
assert(this->Allocated());
|
||||
HSA::hsa_memory_deregister(ptr_, size_);
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
VirtualFree(ptr_, size_, MEM_DECOMMIT);
|
||||
VirtualFree(ptr_, 0, MEM_RELEASE);
|
||||
#else
|
||||
munmap(ptr_, size_);
|
||||
#endif // _WIN32 || _WIN64
|
||||
ptr_ = nullptr;
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
bool MappedMemory::Freeze()
|
||||
{
|
||||
assert(this->Allocated());
|
||||
return true;
|
||||
}
|
||||
|
||||
class RegionMemory final: public SegmentMemory {
|
||||
public:
|
||||
static hsa_region_t AgentLocal(hsa_agent_t agent);
|
||||
static hsa_region_t System();
|
||||
|
||||
RegionMemory(hsa_region_t region): SegmentMemory(), region_(region), ptr_(nullptr), host_ptr_(nullptr), size_(0) {}
|
||||
~RegionMemory() {}
|
||||
|
||||
void* Address(size_t offset = 0) const override
|
||||
{ assert(this->Allocated()); return (char*)ptr_ + offset; }
|
||||
void* HostAddress(size_t offset = 0) const override
|
||||
{ assert(this->Allocated()); return (char*)host_ptr_ + offset; }
|
||||
bool Allocated() const override
|
||||
{ return nullptr != ptr_; }
|
||||
|
||||
bool Allocate(size_t size, size_t align, bool zero) override;
|
||||
bool Copy(size_t offset, const void *src, size_t size) override;
|
||||
void Free() override;
|
||||
bool Freeze() override;
|
||||
|
||||
private:
|
||||
RegionMemory(const RegionMemory&);
|
||||
RegionMemory& operator=(const RegionMemory&);
|
||||
|
||||
hsa_region_t region_;
|
||||
void *ptr_;
|
||||
void *host_ptr_;
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
hsa_region_t RegionMemory::AgentLocal(hsa_agent_t agent)
|
||||
{
|
||||
hsa_region_t invalid_region; invalid_region.handle = 0;
|
||||
amd::GpuAgent *amd_agent = (amd::GpuAgent*)core::Agent::Convert(agent);
|
||||
if (nullptr == amd_agent) {
|
||||
return invalid_region;
|
||||
}
|
||||
auto agent_local_region = std::find_if(amd_agent->regions().begin(), amd_agent->regions().end(), IsLocalRegion);
|
||||
return agent_local_region == amd_agent->regions().end() ?
|
||||
invalid_region : core::MemoryRegion::Convert(*agent_local_region);
|
||||
}
|
||||
|
||||
hsa_region_t RegionMemory::System() {
|
||||
const core::MemoryRegion* default_system_region =
|
||||
core::Runtime::runtime_singleton_->system_regions_fine()[0];
|
||||
|
||||
assert(default_system_region != NULL);
|
||||
|
||||
return core::MemoryRegion::Convert(default_system_region);
|
||||
}
|
||||
|
||||
bool RegionMemory::Allocate(size_t size, size_t align, bool zero)
|
||||
{
|
||||
assert(!this->Allocated());
|
||||
assert(0 < size);
|
||||
assert(0 < align && 0 == (align & (align - 1)));
|
||||
if (HSA_STATUS_SUCCESS != HSA::hsa_memory_allocate(region_, size, &ptr_)) {
|
||||
ptr_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
assert(0 == ((uintptr_t)ptr_) % align);
|
||||
if (HSA_STATUS_SUCCESS != HSA::hsa_memory_allocate(RegionMemory::System(), size, &host_ptr_)) {
|
||||
HSA::hsa_memory_free(ptr_);
|
||||
ptr_ = nullptr;
|
||||
host_ptr_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
if (zero) {
|
||||
memset(host_ptr_, 0x0, size);
|
||||
}
|
||||
size_ = size;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RegionMemory::Copy(size_t offset, const void *src, size_t size)
|
||||
{
|
||||
assert(this->Allocated() && nullptr != host_ptr_);
|
||||
assert(nullptr != src);
|
||||
assert(0 < size);
|
||||
memcpy((char*)host_ptr_ + offset, src, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void RegionMemory::Free()
|
||||
{
|
||||
assert(this->Allocated());
|
||||
HSA::hsa_memory_free(ptr_);
|
||||
if (nullptr != host_ptr_) {
|
||||
HSA::hsa_memory_free(host_ptr_);
|
||||
}
|
||||
ptr_ = nullptr;
|
||||
host_ptr_ = nullptr;
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
bool RegionMemory::Freeze() {
|
||||
assert(this->Allocated() && nullptr != host_ptr_);
|
||||
|
||||
core::Agent* agent = reinterpret_cast<amd::MemoryRegion*>(
|
||||
core::MemoryRegion::Convert(region_))->owner();
|
||||
if (agent != NULL && agent->device_type() == core::Agent::kAmdGpuDevice) {
|
||||
if (HSA_STATUS_SUCCESS != agent->DmaCopy(ptr_, host_ptr_, size_)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
memcpy(ptr_, host_ptr_, size_);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace anonymous
|
||||
|
||||
namespace amd {
|
||||
|
||||
hsa_isa_t LoaderContext::IsaFromName(const char *name) {
|
||||
assert(name);
|
||||
|
||||
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
|
||||
hsa_isa_t isa_handle;
|
||||
isa_handle.handle = 0;
|
||||
|
||||
hsa_status = HSA::hsa_isa_from_name(name, &isa_handle);
|
||||
if (HSA_STATUS_SUCCESS != hsa_status) {
|
||||
isa_handle.handle = 0;
|
||||
return isa_handle;
|
||||
}
|
||||
|
||||
return isa_handle;
|
||||
}
|
||||
|
||||
bool LoaderContext::IsaSupportedByAgent(hsa_agent_t agent,
|
||||
hsa_isa_t code_object_isa) {
|
||||
assert(agent.handle);
|
||||
|
||||
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
|
||||
hsa_isa_t agent_isa;
|
||||
agent_isa.handle = 0;
|
||||
|
||||
hsa_status = HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa);
|
||||
if (HSA_STATUS_SUCCESS != hsa_status) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
|
||||
hsa_status = HSA::hsa_isa_compatible(code_object_isa, agent_isa, &result);
|
||||
if (HSA_STATUS_SUCCESS != hsa_status) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment,
|
||||
hsa_agent_t agent,
|
||||
size_t size,
|
||||
size_t align,
|
||||
bool zero)
|
||||
{
|
||||
assert(0 < size);
|
||||
assert(0 < align && 0 == (align & (align - 1)));
|
||||
hsa_profile_t agent_profile;
|
||||
if (HSA_STATUS_SUCCESS != HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile)) {
|
||||
return nullptr;
|
||||
}
|
||||
SegmentMemory *mem = nullptr;
|
||||
switch (segment) {
|
||||
case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT:
|
||||
case AMDGPU_HSA_SEGMENT_READONLY_AGENT:
|
||||
switch (agent_profile) {
|
||||
case HSA_PROFILE_BASE:
|
||||
mem = new (std::nothrow) RegionMemory(RegionMemory::AgentLocal(agent));
|
||||
break;
|
||||
case HSA_PROFILE_FULL:
|
||||
mem = new (std::nothrow) RegionMemory(RegionMemory::System());
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
break;
|
||||
case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM:
|
||||
switch (agent_profile) {
|
||||
case HSA_PROFILE_BASE:
|
||||
mem = new (std::nothrow) RegionMemory(RegionMemory::System());
|
||||
break;
|
||||
case HSA_PROFILE_FULL:
|
||||
mem = new (std::nothrow) MallocedMemory();
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
break;
|
||||
case AMDGPU_HSA_SEGMENT_CODE_AGENT:
|
||||
switch (agent_profile) {
|
||||
case HSA_PROFILE_BASE:
|
||||
mem = new (std::nothrow) RegionMemory(IsDebuggerRegistered() ?
|
||||
RegionMemory::System() :
|
||||
RegionMemory::AgentLocal(agent));
|
||||
break;
|
||||
case HSA_PROFILE_FULL:
|
||||
mem = new (std::nothrow) MappedMemory(((GpuAgentInt*)core::Agent::Convert(agent))->is_kv_device());
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
if (nullptr == mem) {
|
||||
return nullptr;
|
||||
}
|
||||
mem->Allocate(size, align, zero);
|
||||
return mem;
|
||||
}
|
||||
|
||||
bool LoaderContext::SegmentCopy(amdgpu_hsa_elf_segment_t segment, // not used.
|
||||
hsa_agent_t agent, // not used.
|
||||
void* dst,
|
||||
size_t offset,
|
||||
const void* src,
|
||||
size_t size)
|
||||
{
|
||||
assert(nullptr != dst);
|
||||
return ((SegmentMemory*)dst)->Copy(offset, src, size);
|
||||
}
|
||||
|
||||
void LoaderContext::SegmentFree(amdgpu_hsa_elf_segment_t segment, // not used.
|
||||
hsa_agent_t agent, // not used.
|
||||
void* seg,
|
||||
size_t size) // not used.
|
||||
{
|
||||
assert(nullptr != seg);
|
||||
SegmentMemory *mem = (SegmentMemory*)seg;
|
||||
mem->Free();
|
||||
delete mem;
|
||||
mem = nullptr;
|
||||
}
|
||||
|
||||
void* LoaderContext::SegmentAddress(amdgpu_hsa_elf_segment_t segment, // not used.
|
||||
hsa_agent_t agent, // not used.
|
||||
void* seg,
|
||||
size_t offset)
|
||||
{
|
||||
assert(nullptr != seg);
|
||||
return ((SegmentMemory*)seg)->Address(offset);
|
||||
}
|
||||
|
||||
void* LoaderContext::SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, // not used.
|
||||
hsa_agent_t agent, // not used.
|
||||
void* seg,
|
||||
size_t offset)
|
||||
{
|
||||
assert(nullptr != seg);
|
||||
return ((SegmentMemory*)seg)->HostAddress(offset);
|
||||
}
|
||||
|
||||
bool LoaderContext::SegmentFreeze(amdgpu_hsa_elf_segment_t segment, // not used.
|
||||
hsa_agent_t agent, // not used.
|
||||
void* seg,
|
||||
size_t size) // not used.
|
||||
{
|
||||
assert(nullptr != seg);
|
||||
return ((SegmentMemory*)seg)->Freeze();
|
||||
}
|
||||
|
||||
bool LoaderContext::ImageExtensionSupported() {
|
||||
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
|
||||
bool result = false;
|
||||
|
||||
hsa_status =
|
||||
HSA::hsa_system_extension_supported(HSA_EXTENSION_IMAGES, 1, 0, &result);
|
||||
if (HSA_STATUS_SUCCESS != hsa_status) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
hsa_status_t LoaderContext::ImageCreate(
|
||||
hsa_agent_t agent, hsa_access_permission_t image_permission,
|
||||
const hsa_ext_image_descriptor_t *image_descriptor, const void *image_data,
|
||||
hsa_ext_image_t *image_handle) {
|
||||
assert(agent.handle);
|
||||
assert(image_descriptor);
|
||||
assert(image_data);
|
||||
assert(image_handle);
|
||||
|
||||
assert(ImageExtensionSupported());
|
||||
|
||||
return hsa_ext_image_create(agent, image_descriptor, image_data,
|
||||
image_permission, image_handle);
|
||||
}
|
||||
|
||||
hsa_status_t LoaderContext::ImageDestroy(hsa_agent_t agent,
|
||||
hsa_ext_image_t image_handle) {
|
||||
assert(agent.handle);
|
||||
assert(image_handle.handle);
|
||||
|
||||
assert(ImageExtensionSupported());
|
||||
|
||||
return hsa_ext_image_destroy(agent, image_handle);
|
||||
}
|
||||
|
||||
hsa_status_t LoaderContext::SamplerCreate(
|
||||
hsa_agent_t agent, const hsa_ext_sampler_descriptor_t *sampler_descriptor,
|
||||
hsa_ext_sampler_t *sampler_handle) {
|
||||
assert(agent.handle);
|
||||
assert(sampler_descriptor);
|
||||
assert(sampler_handle);
|
||||
|
||||
assert(ImageExtensionSupported());
|
||||
|
||||
return hsa_ext_sampler_create(agent, sampler_descriptor, sampler_handle);
|
||||
}
|
||||
|
||||
hsa_status_t LoaderContext::SamplerDestroy(hsa_agent_t agent,
|
||||
hsa_ext_sampler_t sampler_handle) {
|
||||
assert(agent.handle);
|
||||
assert(sampler_handle.handle);
|
||||
|
||||
assert(ImageExtensionSupported());
|
||||
|
||||
return hsa_ext_sampler_destroy(agent, sampler_handle);
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
@@ -1,555 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/amd_cpu_agent.h"
|
||||
#include "core/inc/amd_gpu_agent.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
namespace amd {
|
||||
void* MemoryRegion::AllocateKfdMemory(const HsaMemFlags& flag,
|
||||
HSAuint32 node_id, size_t size) {
|
||||
void* ret = NULL;
|
||||
const HSAKMT_STATUS status = hsaKmtAllocMemory(node_id, size, flag, &ret);
|
||||
return (status == HSAKMT_STATUS_SUCCESS) ? ret : NULL;
|
||||
}
|
||||
|
||||
void MemoryRegion::FreeKfdMemory(void* ptr, size_t size) {
|
||||
if (ptr == NULL || size == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS status = hsaKmtFreeMemory(ptr, size);
|
||||
assert(status == HSAKMT_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
bool MemoryRegion::RegisterMemory(void* ptr, size_t size, size_t num_nodes,
|
||||
const uint32_t* nodes) {
|
||||
assert(ptr != NULL);
|
||||
assert(size != 0);
|
||||
assert(num_nodes != 0);
|
||||
assert(nodes != NULL);
|
||||
|
||||
const HSAKMT_STATUS status = hsaKmtRegisterMemoryToNodes(
|
||||
ptr, size, num_nodes, const_cast<uint32_t*>(nodes));
|
||||
return (status == HSAKMT_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
void MemoryRegion::DeregisterMemory(void* ptr) { hsaKmtDeregisterMemory(ptr); }
|
||||
|
||||
bool MemoryRegion::MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes,
|
||||
void* ptr, size_t size,
|
||||
uint64_t* alternate_va,
|
||||
HsaMemMapFlags map_flag) {
|
||||
assert(num_node > 0);
|
||||
assert(nodes != NULL);
|
||||
|
||||
*alternate_va = 0;
|
||||
const HSAKMT_STATUS status =
|
||||
hsaKmtMapMemoryToGPUNodes(ptr, size, alternate_va, map_flag, num_node,
|
||||
const_cast<uint32_t*>(nodes));
|
||||
|
||||
return (status == HSAKMT_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
void MemoryRegion::MakeKfdMemoryUnresident(void* ptr) {
|
||||
hsaKmtUnmapMemoryToGPU(ptr);
|
||||
}
|
||||
|
||||
MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile,
|
||||
core::Agent* owner,
|
||||
const HsaMemoryProperties& mem_props)
|
||||
: core::MemoryRegion(fine_grain, full_profile, owner),
|
||||
mem_props_(mem_props),
|
||||
max_single_alloc_size_(0),
|
||||
virtual_size_(0) {
|
||||
virtual_size_ = GetPhysicalSize();
|
||||
|
||||
mem_flag_.Value = 0;
|
||||
map_flag_.Value = 0;
|
||||
|
||||
static const HSAuint64 kGpuVmSize = (1ULL << 40);
|
||||
|
||||
if (IsLocalMemory()) {
|
||||
mem_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
||||
mem_flag_.ui32.NoSubstitute = 1;
|
||||
mem_flag_.ui32.HostAccess =
|
||||
(mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE) ? 0 : 1;
|
||||
mem_flag_.ui32.NonPaged = 1;
|
||||
|
||||
map_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
||||
|
||||
virtual_size_ = kGpuVmSize;
|
||||
} else if (IsSystem()) {
|
||||
mem_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
||||
mem_flag_.ui32.NoSubstitute = 1;
|
||||
mem_flag_.ui32.HostAccess = 1;
|
||||
mem_flag_.ui32.CachePolicy = HSA_CACHING_CACHED;
|
||||
|
||||
map_flag_.ui32.HostAccess = 1;
|
||||
map_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
||||
|
||||
virtual_size_ =
|
||||
(full_profile) ? os::GetUserModeVirtualMemorySize() : kGpuVmSize;
|
||||
}
|
||||
|
||||
max_single_alloc_size_ =
|
||||
AlignDown(static_cast<size_t>(GetPhysicalSize()), kPageSize_);
|
||||
|
||||
mem_flag_.ui32.CoarseGrain = (fine_grain) ? 0 : 1;
|
||||
|
||||
assert(GetVirtualSize() != 0);
|
||||
assert(GetPhysicalSize() <= GetVirtualSize());
|
||||
assert(IsMultipleOf(max_single_alloc_size_, kPageSize_));
|
||||
}
|
||||
|
||||
MemoryRegion::~MemoryRegion() {}
|
||||
|
||||
hsa_status_t MemoryRegion::Allocate(size_t size, void** address) const {
|
||||
return Allocate(false, size, address);
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::Allocate(bool restrict_access, size_t size,
|
||||
void** address) const {
|
||||
if (address == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (!IsSystem() && !IsLocalMemory()) {
|
||||
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
||||
}
|
||||
|
||||
if (size > max_single_alloc_size_) {
|
||||
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
||||
}
|
||||
|
||||
size = AlignUp(size, kPageSize_);
|
||||
|
||||
*address = AllocateKfdMemory(mem_flag_, owner()->node_id(), size);
|
||||
|
||||
if (*address != NULL) {
|
||||
// Commit the memory.
|
||||
// For system memory, on non-restricted allocation, map it to all GPUs. On
|
||||
// restricted allocation, only CPU is allowed to access by default, so
|
||||
// no need to map
|
||||
// For local memory, only map it to the owning GPU. Mapping to other GPU,
|
||||
// if the access is allowed, is performed on AllowAccess.
|
||||
HsaMemMapFlags map_flag = map_flag_;
|
||||
size_t map_node_count = 1;
|
||||
const uint32_t owner_node_id = owner()->node_id();
|
||||
const uint32_t* map_node_id = &owner_node_id;
|
||||
|
||||
if (IsSystem()) {
|
||||
if (!restrict_access) {
|
||||
// Map to all GPU agents.
|
||||
map_node_count = core::Runtime::runtime_singleton_->gpu_ids().size();
|
||||
|
||||
if (map_node_count == 0) {
|
||||
// No need to pin since no GPU in the platform.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
map_node_id = &core::Runtime::runtime_singleton_->gpu_ids()[0];
|
||||
} else {
|
||||
// No need to pin it for CPU exclusive access.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t alternate_va = 0;
|
||||
const bool is_resident = MakeKfdMemoryResident(
|
||||
map_node_count, map_node_id, *address, size, &alternate_va, map_flag);
|
||||
|
||||
const bool require_pinning =
|
||||
(!full_profile() || IsLocalMemory() || IsScratch());
|
||||
|
||||
if (require_pinning && !is_resident) {
|
||||
FreeKfdMemory(*address, size);
|
||||
*address = NULL;
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::Free(void* address, size_t size) const {
|
||||
MakeKfdMemoryUnresident(address);
|
||||
|
||||
FreeKfdMemory(address, size);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::GetInfo(hsa_region_info_t attribute,
|
||||
void* value) const {
|
||||
switch (attribute) {
|
||||
case HSA_REGION_INFO_SEGMENT:
|
||||
switch (mem_props_.HeapType) {
|
||||
case HSA_HEAPTYPE_SYSTEM:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
|
||||
*((hsa_region_segment_t*)value) = HSA_REGION_SEGMENT_GLOBAL;
|
||||
break;
|
||||
case HSA_HEAPTYPE_GPU_LDS:
|
||||
*((hsa_region_segment_t*)value) = HSA_REGION_SEGMENT_GROUP;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Memory region should only be global, group");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case HSA_REGION_INFO_GLOBAL_FLAGS:
|
||||
switch (mem_props_.HeapType) {
|
||||
case HSA_HEAPTYPE_SYSTEM:
|
||||
*((uint32_t*)value) = fine_grain()
|
||||
? (HSA_REGION_GLOBAL_FLAG_KERNARG |
|
||||
HSA_REGION_GLOBAL_FLAG_FINE_GRAINED)
|
||||
: HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED;
|
||||
break;
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
|
||||
*((uint32_t*)value) = HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED;
|
||||
break;
|
||||
default:
|
||||
*((uint32_t*)value) = 0;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case HSA_REGION_INFO_SIZE:
|
||||
switch (mem_props_.HeapType) {
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
|
||||
*((size_t*)value) = static_cast<size_t>(GetPhysicalSize());
|
||||
break;
|
||||
default:
|
||||
*((size_t*)value) = static_cast<size_t>(
|
||||
(full_profile()) ? GetVirtualSize() : GetPhysicalSize());
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case HSA_REGION_INFO_ALLOC_MAX_SIZE:
|
||||
switch (mem_props_.HeapType) {
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
|
||||
case HSA_HEAPTYPE_SYSTEM:
|
||||
*((size_t*)value) = max_single_alloc_size_;
|
||||
break;
|
||||
default:
|
||||
*((size_t*)value) = 0;
|
||||
}
|
||||
break;
|
||||
case HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED:
|
||||
switch (mem_props_.HeapType) {
|
||||
case HSA_HEAPTYPE_SYSTEM:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
|
||||
*((bool*)value) = true;
|
||||
break;
|
||||
default:
|
||||
*((bool*)value) = false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE:
|
||||
switch (mem_props_.HeapType) {
|
||||
case HSA_HEAPTYPE_SYSTEM:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
|
||||
*((size_t*)value) = kPageSize_;
|
||||
break;
|
||||
default:
|
||||
*((size_t*)value) = 0;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT:
|
||||
switch (mem_props_.HeapType) {
|
||||
case HSA_HEAPTYPE_SYSTEM:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
|
||||
*((size_t*)value) = kPageSize_;
|
||||
break;
|
||||
default:
|
||||
*((size_t*)value) = 0;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
switch ((hsa_amd_region_info_t)attribute) {
|
||||
case HSA_AMD_REGION_INFO_HOST_ACCESSIBLE:
|
||||
*((bool*)value) =
|
||||
(mem_props_.HeapType == HSA_HEAPTYPE_SYSTEM) ? true : false;
|
||||
break;
|
||||
case HSA_AMD_REGION_INFO_BASE:
|
||||
*((void**)value) = reinterpret_cast<void*>(GetBaseAddress());
|
||||
break;
|
||||
case HSA_AMD_REGION_INFO_BUS_WIDTH:
|
||||
*((uint32_t*)value) = BusWidth();
|
||||
break;
|
||||
case HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY:
|
||||
*((uint32_t*)value) = MaxMemCloc();
|
||||
break;
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::GetPoolInfo(hsa_amd_memory_pool_info_t attribute,
|
||||
void* value) const {
|
||||
switch (attribute) {
|
||||
case HSA_AMD_MEMORY_POOL_INFO_SEGMENT:
|
||||
case HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS:
|
||||
case HSA_AMD_MEMORY_POOL_INFO_SIZE:
|
||||
case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED:
|
||||
case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE:
|
||||
case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT:
|
||||
return GetInfo(static_cast<hsa_region_info_t>(attribute), value);
|
||||
break;
|
||||
case HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL:
|
||||
*((bool*)value) = IsSystem() ? true : false;
|
||||
break;
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::GetAgentPoolInfo(
|
||||
const core::Agent& agent, hsa_amd_agent_memory_pool_info_t attribute,
|
||||
void* value) const {
|
||||
const uint32_t node_id_from = agent.node_id();
|
||||
const uint32_t node_id_to = owner()->node_id();
|
||||
|
||||
const core::Runtime::LinkInfo link_info =
|
||||
core::Runtime::runtime_singleton_->GetLinkInfo(node_id_from, node_id_to);
|
||||
|
||||
switch (attribute) {
|
||||
case HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS:
|
||||
/**
|
||||
* ---------------------------------------------------
|
||||
* | |CPU |GPU (owner)|GPU (peer) |
|
||||
* ---------------------------------------------------
|
||||
* |system memory |allowed |disallowed |disallowed |
|
||||
* ---------------------------------------------------
|
||||
* |fb private |never |allowed |never |
|
||||
* ---------------------------------------------------
|
||||
* |fb public |disallowed |allowed |disallowed |
|
||||
* ---------------------------------------------------
|
||||
* |others |never |allowed |never |
|
||||
* ---------------------------------------------------
|
||||
*/
|
||||
*((hsa_amd_memory_pool_access_t*)value) =
|
||||
(((IsSystem()) &&
|
||||
(agent.device_type() == core::Agent::kAmdCpuDevice)) ||
|
||||
(agent.node_id() == owner()->node_id()))
|
||||
? HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT
|
||||
: (IsSystem() || (IsPublic() && link_info.num_hop > 0))
|
||||
? HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT
|
||||
: HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
|
||||
break;
|
||||
case HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS:
|
||||
*((uint32_t*)value) = link_info.num_hop;
|
||||
case HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO:
|
||||
memset(value, 0, sizeof(hsa_amd_memory_pool_link_info_t));
|
||||
if (link_info.num_hop > 0) {
|
||||
memcpy(value, &link_info.info, sizeof(hsa_amd_memory_pool_link_info_t));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents,
|
||||
const hsa_agent_t* agents,
|
||||
const void* ptr, size_t size) const {
|
||||
if (num_agents == 0 || agents == NULL || ptr == NULL || size == 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (!IsSystem() && !IsLocalMemory()) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
bool cpu_in_list = false;
|
||||
|
||||
std::vector<uint32_t> whitelist_nodes;
|
||||
for (uint32_t i = 0; i < num_agents; ++i) {
|
||||
const core::Agent* agent = core::Agent::Convert(agents[i]);
|
||||
if (agent == NULL || !agent->IsValid()) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
if (agent->device_type() == core::Agent::kAmdGpuDevice) {
|
||||
whitelist_nodes.push_back(agent->node_id());
|
||||
} else {
|
||||
cpu_in_list = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (whitelist_nodes.size() == 0 && IsSystem()) {
|
||||
assert(cpu_in_list);
|
||||
// This is a system region and only CPU agents in the whitelist.
|
||||
// No need to call map.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// If this is a local memory region, the owning gpu always needs to be in
|
||||
// the whitelist.
|
||||
if (IsPublic() &&
|
||||
std::find(whitelist_nodes.begin(), whitelist_nodes.end(),
|
||||
owner()->node_id()) == whitelist_nodes.end()) {
|
||||
whitelist_nodes.push_back(owner()->node_id());
|
||||
}
|
||||
|
||||
HsaMemMapFlags map_flag = map_flag_;
|
||||
map_flag.ui32.HostAccess |= (cpu_in_list) ? 1 : 0;
|
||||
|
||||
uint64_t alternate_va = 0;
|
||||
return (amd::MemoryRegion::MakeKfdMemoryResident(
|
||||
whitelist_nodes.size(), &whitelist_nodes[0],
|
||||
const_cast<void*>(ptr), size, &alternate_va, map_flag))
|
||||
? HSA_STATUS_SUCCESS
|
||||
: HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::CanMigrate(const MemoryRegion& dst,
|
||||
bool& result) const {
|
||||
// TODO(bwicakso): not implemented yet.
|
||||
result = false;
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::Migrate(uint32_t flag, const void* ptr) const {
|
||||
// TODO(bwicakso): not implemented yet.
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents,
|
||||
void* host_ptr, size_t size,
|
||||
void** agent_ptr) const {
|
||||
if (!IsSystem()) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
if (full_profile()) {
|
||||
// For APU, any host pointer is always accessible by the gpu.
|
||||
*agent_ptr = host_ptr;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
std::vector<HSAuint32> whitelist_nodes;
|
||||
if (num_agents == 0 || agents == NULL) {
|
||||
// Map to all GPU agents.
|
||||
whitelist_nodes = core::Runtime::runtime_singleton_->gpu_ids();
|
||||
} else {
|
||||
for (int i = 0; i < num_agents; ++i) {
|
||||
core::Agent* agent = core::Agent::Convert(agents[i]);
|
||||
if (agent == NULL || !agent->IsValid()) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
if (agent->device_type() == core::Agent::kAmdGpuDevice) {
|
||||
whitelist_nodes.push_back(agent->node_id());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (whitelist_nodes.size() == 0) {
|
||||
// No GPU agents in the whitelist. So no need to register and map since the
|
||||
// platform only has CPUs.
|
||||
*agent_ptr = host_ptr;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Call kernel driver to register and pin the memory.
|
||||
if (RegisterMemory(host_ptr, size, whitelist_nodes.size(),
|
||||
&whitelist_nodes[0])) {
|
||||
uint64_t alternate_va = 0;
|
||||
if (MakeKfdMemoryResident(whitelist_nodes.size(), &whitelist_nodes[0],
|
||||
host_ptr, size, &alternate_va, map_flag_)) {
|
||||
assert(alternate_va != 0);
|
||||
*agent_ptr = reinterpret_cast<void*>(alternate_va);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
amd::MemoryRegion::DeregisterMemory(host_ptr);
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::Unlock(void* host_ptr) const {
|
||||
if (!IsSystem()) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
if (full_profile()) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
MakeKfdMemoryUnresident(host_ptr);
|
||||
DeregisterMemory(host_ptr);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::AssignAgent(void* ptr, size_t size,
|
||||
const core::Agent& agent,
|
||||
hsa_access_permission_t access) const {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -1,210 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/amd_topology.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/amd_cpu_agent.h"
|
||||
#include "core/inc/amd_gpu_agent.h"
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
namespace amd {
|
||||
// Minimum acceptable KFD version numbers
|
||||
static const uint kKfdVersionMajor = 0;
|
||||
static const uint kKfdVersionMinor = 99;
|
||||
|
||||
CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
|
||||
if (node_prop.NumCPUCores == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CpuAgent* cpu = new CpuAgent(node_id, node_prop);
|
||||
core::Runtime::runtime_singleton_->RegisterAgent(cpu);
|
||||
|
||||
return cpu;
|
||||
}
|
||||
|
||||
GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
|
||||
if (node_prop.NumFComputeCores == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
GpuAgent* gpu = new GpuAgent(node_id, node_prop);
|
||||
core::Runtime::runtime_singleton_->RegisterAgent(gpu);
|
||||
|
||||
if (HSA_STATUS_SUCCESS != gpu->InitDma()) {
|
||||
assert(false && "Fail init blit");
|
||||
delete gpu;
|
||||
gpu = NULL;
|
||||
}
|
||||
|
||||
return gpu;
|
||||
}
|
||||
|
||||
void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) {
|
||||
// Register connectivity links for this agent to the runtime.
|
||||
if (num_link == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<HsaIoLinkProperties> links(num_link);
|
||||
if (HSAKMT_STATUS_SUCCESS !=
|
||||
hsaKmtGetNodeIoLinkProperties(node_id, num_link, &links[0])) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (HsaIoLinkProperties io_link : links) {
|
||||
// Populate link info with thunk property.
|
||||
hsa_amd_memory_pool_link_info_t link_info = {0};
|
||||
|
||||
if (io_link.Flags.ui32.Override == 1) {
|
||||
if (io_link.Flags.ui32.NoPeerToPeerDMA == 1) {
|
||||
// Ignore this link since peer to peer is not allowed.
|
||||
continue;
|
||||
}
|
||||
link_info.atomic_support_32bit = (io_link.Flags.ui32.NoAtomics32bit == 0);
|
||||
link_info.atomic_support_64bit = (io_link.Flags.ui32.NoAtomics64bit == 0);
|
||||
link_info.coherent_support = (io_link.Flags.ui32.NonCoherent == 0);
|
||||
} else {
|
||||
// TODO(bwicakso): decipher HSA_IOLINKTYPE to fill out the atomic
|
||||
// and coherent information.
|
||||
}
|
||||
|
||||
switch (io_link.IoLinkType) {
|
||||
case HSA_IOLINKTYPE_HYPERTRANSPORT:
|
||||
link_info.link_type = HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT;
|
||||
break;
|
||||
case HSA_IOLINKTYPE_PCIEXPRESS:
|
||||
link_info.link_type = HSA_AMD_LINK_INFO_TYPE_PCIE;
|
||||
break;
|
||||
case HSA_IOLINK_TYPE_QPI_1_1:
|
||||
link_info.link_type = HSA_AMD_LINK_INFO_TYPE_QPI;
|
||||
break;
|
||||
case HSA_IOLINK_TYPE_INFINIBAND:
|
||||
link_info.link_type = HSA_AMD_LINK_INFO_TYPE_INFINBAND;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
link_info.max_bandwidth = io_link.MaximumBandwidth;
|
||||
link_info.max_latency = io_link.MaximumLatency;
|
||||
link_info.min_bandwidth = io_link.MinimumBandwidth;
|
||||
link_info.min_latency = io_link.MinimumLatency;
|
||||
|
||||
core::Runtime::runtime_singleton_->RegisterLinkInfo(
|
||||
io_link.NodeFrom, io_link.NodeTo, io_link.Weight, link_info);
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Calls Kfd thunk to get the snapshot of the topology of the system,
|
||||
/// which includes associations between, node, devices, memory and caches.
|
||||
void BuildTopology() {
|
||||
HsaVersionInfo info;
|
||||
if (hsaKmtGetVersion(&info) != HSAKMT_STATUS_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.KernelInterfaceMajorVersion == kKfdVersionMajor &&
|
||||
info.KernelInterfaceMinorVersion < kKfdVersionMinor) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Disable KFD event support when using open source KFD
|
||||
if (info.KernelInterfaceMajorVersion == 1 &&
|
||||
info.KernelInterfaceMinorVersion == 0) {
|
||||
core::g_use_interrupt_wait = false;
|
||||
}
|
||||
|
||||
HsaSystemProperties props;
|
||||
hsaKmtReleaseSystemProperties();
|
||||
|
||||
if (hsaKmtAcquireSystemProperties(&props) != HSAKMT_STATUS_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes);
|
||||
|
||||
// Discover agents on every node in the platform.
|
||||
for (HSAuint32 node_id = 0; node_id < props.NumNodes; node_id++) {
|
||||
HsaNodeProperties node_prop = {0};
|
||||
if (hsaKmtGetNodeProperties(node_id, &node_prop) != HSAKMT_STATUS_SUCCESS) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const CpuAgent* cpu = DiscoverCpu(node_id, node_prop);
|
||||
const GpuAgent* gpu = DiscoverGpu(node_id, node_prop);
|
||||
|
||||
assert(!(cpu == NULL && gpu == NULL));
|
||||
|
||||
RegisterLinkInfo(node_id, node_prop.NumIOLinks);
|
||||
}
|
||||
}
|
||||
|
||||
bool Load() {
|
||||
// Open connection to kernel driver.
|
||||
if (hsaKmtOpenKFD() != HSAKMT_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Build topology table.
|
||||
BuildTopology();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Unload() {
|
||||
hsaKmtReleaseSystemProperties();
|
||||
|
||||
// Close connection to kernel driver.
|
||||
hsaKmtCloseKFD();
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace amd
|
||||
@@ -1,275 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/default_signal.h"
|
||||
#include "core/util/timer.h"
|
||||
|
||||
namespace core {
|
||||
|
||||
int DefaultSignal::rtti_id_ = 0;
|
||||
|
||||
DefaultSignal::DefaultSignal(hsa_signal_value_t initial_value)
|
||||
: Signal(initial_value) {
|
||||
signal_.kind = AMD_SIGNAL_KIND_USER;
|
||||
signal_.event_mailbox_ptr = NULL;
|
||||
HSA::hsa_memory_register(this, sizeof(DefaultSignal));
|
||||
}
|
||||
|
||||
DefaultSignal::~DefaultSignal() {
|
||||
invalid_ = true;
|
||||
while (InUse())
|
||||
;
|
||||
HSA::hsa_memory_deregister(this, sizeof(DefaultSignal));
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::LoadRelaxed() {
|
||||
return hsa_signal_value_t(
|
||||
atomic::Load(&signal_.value, std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::LoadAcquire() {
|
||||
return hsa_signal_value_t(
|
||||
atomic::Load(&signal_.value, std::memory_order_acquire));
|
||||
}
|
||||
|
||||
void DefaultSignal::StoreRelaxed(hsa_signal_value_t value) {
|
||||
atomic::Store(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void DefaultSignal::StoreRelease(hsa_signal_value_t value) {
|
||||
atomic::Store(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::WaitRelaxed(hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout,
|
||||
hsa_wait_state_t wait_hint) {
|
||||
atomic::Increment(&waiting_);
|
||||
MAKE_SCOPE_GUARD([&]() { atomic::Decrement(&waiting_); });
|
||||
bool condition_met = false;
|
||||
int64_t value;
|
||||
|
||||
assert(!g_use_interrupt_wait && "Use of non-host signal in host signal wait API.");
|
||||
|
||||
timer::fast_clock::time_point start_time, time;
|
||||
start_time = timer::fast_clock::now();
|
||||
|
||||
uint64_t hsa_freq;
|
||||
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
|
||||
const timer::fast_clock::duration fast_timeout =
|
||||
timer::duration_from_seconds<timer::fast_clock::duration>(
|
||||
double(timeout) / double(hsa_freq));
|
||||
|
||||
while (true) {
|
||||
if (invalid_) return 0;
|
||||
|
||||
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
|
||||
|
||||
switch (condition) {
|
||||
case HSA_SIGNAL_CONDITION_EQ: {
|
||||
condition_met = (value == compare_value);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_NE: {
|
||||
condition_met = (value != compare_value);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_GTE: {
|
||||
condition_met = (value >= compare_value);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_LT: {
|
||||
condition_met = (value < compare_value);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
if (condition_met) return hsa_signal_value_t(value);
|
||||
|
||||
time = timer::fast_clock::now();
|
||||
if (time - start_time > fast_timeout) {
|
||||
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
|
||||
return hsa_signal_value_t(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::WaitAcquire(hsa_signal_condition_t condition,
|
||||
hsa_signal_value_t compare_value,
|
||||
uint64_t timeout,
|
||||
hsa_wait_state_t wait_hint) {
|
||||
hsa_signal_value_t ret =
|
||||
WaitRelaxed(condition, compare_value, timeout, wait_hint);
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void DefaultSignal::AndRelaxed(hsa_signal_value_t value) {
|
||||
atomic::And(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void DefaultSignal::AndAcquire(hsa_signal_value_t value) {
|
||||
atomic::And(&signal_.value, int64_t(value), std::memory_order_acquire);
|
||||
}
|
||||
|
||||
void DefaultSignal::AndRelease(hsa_signal_value_t value) {
|
||||
atomic::And(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
}
|
||||
|
||||
void DefaultSignal::AndAcqRel(hsa_signal_value_t value) {
|
||||
atomic::And(&signal_.value, int64_t(value), std::memory_order_acq_rel);
|
||||
}
|
||||
|
||||
void DefaultSignal::OrRelaxed(hsa_signal_value_t value) {
|
||||
atomic::Or(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void DefaultSignal::OrAcquire(hsa_signal_value_t value) {
|
||||
atomic::Or(&signal_.value, int64_t(value), std::memory_order_acquire);
|
||||
}
|
||||
|
||||
void DefaultSignal::OrRelease(hsa_signal_value_t value) {
|
||||
atomic::Or(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
}
|
||||
|
||||
void DefaultSignal::OrAcqRel(hsa_signal_value_t value) {
|
||||
atomic::Or(&signal_.value, int64_t(value), std::memory_order_acq_rel);
|
||||
}
|
||||
|
||||
void DefaultSignal::XorRelaxed(hsa_signal_value_t value) {
|
||||
atomic::Xor(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void DefaultSignal::XorAcquire(hsa_signal_value_t value) {
|
||||
atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acquire);
|
||||
}
|
||||
|
||||
void DefaultSignal::XorRelease(hsa_signal_value_t value) {
|
||||
atomic::Xor(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
}
|
||||
|
||||
void DefaultSignal::XorAcqRel(hsa_signal_value_t value) {
|
||||
atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acq_rel);
|
||||
}
|
||||
|
||||
void DefaultSignal::AddRelaxed(hsa_signal_value_t value) {
|
||||
atomic::Add(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void DefaultSignal::AddAcquire(hsa_signal_value_t value) {
|
||||
atomic::Add(&signal_.value, int64_t(value), std::memory_order_acquire);
|
||||
}
|
||||
|
||||
void DefaultSignal::AddRelease(hsa_signal_value_t value) {
|
||||
atomic::Add(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
}
|
||||
|
||||
void DefaultSignal::AddAcqRel(hsa_signal_value_t value) {
|
||||
atomic::Add(&signal_.value, int64_t(value), std::memory_order_acq_rel);
|
||||
}
|
||||
|
||||
void DefaultSignal::SubRelaxed(hsa_signal_value_t value) {
|
||||
atomic::Sub(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void DefaultSignal::SubAcquire(hsa_signal_value_t value) {
|
||||
atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acquire);
|
||||
}
|
||||
|
||||
void DefaultSignal::SubRelease(hsa_signal_value_t value) {
|
||||
atomic::Sub(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
}
|
||||
|
||||
void DefaultSignal::SubAcqRel(hsa_signal_value_t value) {
|
||||
atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acq_rel);
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::ExchRelaxed(hsa_signal_value_t value) {
|
||||
return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value),
|
||||
std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::ExchAcquire(hsa_signal_value_t value) {
|
||||
return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value),
|
||||
std::memory_order_acquire));
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::ExchRelease(hsa_signal_value_t value) {
|
||||
return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value),
|
||||
std::memory_order_release));
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::ExchAcqRel(hsa_signal_value_t value) {
|
||||
return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value),
|
||||
std::memory_order_acq_rel));
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::CasRelaxed(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value),
|
||||
int64_t(expected),
|
||||
std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::CasAcquire(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value),
|
||||
int64_t(expected),
|
||||
std::memory_order_acquire));
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::CasRelease(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value),
|
||||
int64_t(expected),
|
||||
std::memory_order_release));
|
||||
}
|
||||
|
||||
hsa_signal_value_t DefaultSignal::CasAcqRel(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value),
|
||||
int64_t(expected),
|
||||
std::memory_order_acq_rel));
|
||||
}
|
||||
|
||||
} // namespace core
|
||||
@@ -1,99 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/host_queue.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
namespace core {
|
||||
HostQueue::HostQueue(hsa_region_t region, uint32_t ring_size,
|
||||
hsa_queue_type_t type, uint32_t features,
|
||||
hsa_signal_t doorbell_signal)
|
||||
: Queue(),
|
||||
size_(ring_size),
|
||||
active_(false) {
|
||||
if (!Shared::IsSharedObjectAllocationValid()) {
|
||||
return;
|
||||
}
|
||||
|
||||
HSA::hsa_memory_register(this, sizeof(HostQueue));
|
||||
|
||||
const size_t queue_buffer_size = size_ * sizeof(AqlPacket);
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
HSA::hsa_memory_allocate(region, queue_buffer_size, &ring_)) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(IsMultipleOf(ring_, kRingAlignment));
|
||||
assert(ring_ != NULL);
|
||||
|
||||
amd_queue_.hsa_queue.base_address = ring_;
|
||||
amd_queue_.hsa_queue.size = size_;
|
||||
amd_queue_.hsa_queue.doorbell_signal = doorbell_signal;
|
||||
amd_queue_.hsa_queue.id = Runtime::runtime_singleton_->GetQueueId();
|
||||
amd_queue_.hsa_queue.type = type;
|
||||
amd_queue_.hsa_queue.features = features;
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
AMD_HSA_BITS_SET(
|
||||
amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64, 1);
|
||||
#else
|
||||
AMD_HSA_BITS_SET(
|
||||
amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64, 0);
|
||||
#endif
|
||||
amd_queue_.write_dispatch_id = amd_queue_.read_dispatch_id = 0;
|
||||
AMD_HSA_BITS_SET(
|
||||
amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 0);
|
||||
|
||||
active_ = true;
|
||||
}
|
||||
|
||||
HostQueue::~HostQueue() {
|
||||
if (!Shared::IsSharedObjectAllocationValid()) {
|
||||
return;
|
||||
}
|
||||
|
||||
HSA::hsa_memory_free(ring_);
|
||||
HSA::hsa_memory_deregister(this, sizeof(HostQueue));
|
||||
}
|
||||
|
||||
} // namespace core
|
||||
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -1,191 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/hsa_api_trace_int.h"
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/hsa_table_interface.h"
|
||||
|
||||
namespace core {
|
||||
|
||||
ApiTable hsa_api_table_;
|
||||
ApiTable hsa_internal_api_table_;
|
||||
|
||||
ApiTable::ApiTable() {
|
||||
table.std_exts_ = NULL;
|
||||
Reset();
|
||||
}
|
||||
|
||||
void ApiTable::LinkExts(ExtTable* ptr) {
|
||||
assert(ptr != NULL && "Invalid extension table linked.");
|
||||
extension_backup = *ptr;
|
||||
table.std_exts_ = ptr;
|
||||
}
|
||||
|
||||
void ApiTable::Reset() {
|
||||
table.hsa_init_fn = HSA::hsa_init;
|
||||
table.hsa_shut_down_fn = HSA::hsa_shut_down;
|
||||
table.hsa_system_get_info_fn = HSA::hsa_system_get_info;
|
||||
table.hsa_system_extension_supported_fn = HSA::hsa_system_extension_supported;
|
||||
table.hsa_system_get_extension_table_fn = HSA::hsa_system_get_extension_table;
|
||||
table.hsa_iterate_agents_fn = HSA::hsa_iterate_agents;
|
||||
table.hsa_agent_get_info_fn = HSA::hsa_agent_get_info;
|
||||
table.hsa_agent_get_exception_policies_fn =
|
||||
HSA::hsa_agent_get_exception_policies;
|
||||
table.hsa_agent_extension_supported_fn = HSA::hsa_agent_extension_supported;
|
||||
table.hsa_queue_create_fn = HSA::hsa_queue_create;
|
||||
table.hsa_soft_queue_create_fn = HSA::hsa_soft_queue_create;
|
||||
table.hsa_queue_destroy_fn = HSA::hsa_queue_destroy;
|
||||
table.hsa_queue_inactivate_fn = HSA::hsa_queue_inactivate;
|
||||
table.hsa_queue_load_read_index_acquire_fn =
|
||||
HSA::hsa_queue_load_read_index_acquire;
|
||||
table.hsa_queue_load_read_index_relaxed_fn =
|
||||
HSA::hsa_queue_load_read_index_relaxed;
|
||||
table.hsa_queue_load_write_index_acquire_fn =
|
||||
HSA::hsa_queue_load_write_index_acquire;
|
||||
table.hsa_queue_load_write_index_relaxed_fn =
|
||||
HSA::hsa_queue_load_write_index_relaxed;
|
||||
table.hsa_queue_store_write_index_relaxed_fn =
|
||||
HSA::hsa_queue_store_write_index_relaxed;
|
||||
table.hsa_queue_store_write_index_release_fn =
|
||||
HSA::hsa_queue_store_write_index_release;
|
||||
table.hsa_queue_cas_write_index_acq_rel_fn =
|
||||
HSA::hsa_queue_cas_write_index_acq_rel;
|
||||
table.hsa_queue_cas_write_index_acquire_fn =
|
||||
HSA::hsa_queue_cas_write_index_acquire;
|
||||
table.hsa_queue_cas_write_index_relaxed_fn =
|
||||
HSA::hsa_queue_cas_write_index_relaxed;
|
||||
table.hsa_queue_cas_write_index_release_fn =
|
||||
HSA::hsa_queue_cas_write_index_release;
|
||||
table.hsa_queue_add_write_index_acq_rel_fn =
|
||||
HSA::hsa_queue_add_write_index_acq_rel;
|
||||
table.hsa_queue_add_write_index_acquire_fn =
|
||||
HSA::hsa_queue_add_write_index_acquire;
|
||||
table.hsa_queue_add_write_index_relaxed_fn =
|
||||
HSA::hsa_queue_add_write_index_relaxed;
|
||||
table.hsa_queue_add_write_index_release_fn =
|
||||
HSA::hsa_queue_add_write_index_release;
|
||||
table.hsa_queue_store_read_index_relaxed_fn =
|
||||
HSA::hsa_queue_store_read_index_relaxed;
|
||||
table.hsa_queue_store_read_index_release_fn =
|
||||
HSA::hsa_queue_store_read_index_release;
|
||||
table.hsa_agent_iterate_regions_fn = HSA::hsa_agent_iterate_regions;
|
||||
table.hsa_region_get_info_fn = HSA::hsa_region_get_info;
|
||||
table.hsa_memory_register_fn = HSA::hsa_memory_register;
|
||||
table.hsa_memory_deregister_fn = HSA::hsa_memory_deregister;
|
||||
table.hsa_memory_allocate_fn = HSA::hsa_memory_allocate;
|
||||
table.hsa_memory_free_fn = HSA::hsa_memory_free;
|
||||
table.hsa_memory_copy_fn = HSA::hsa_memory_copy;
|
||||
table.hsa_memory_assign_agent_fn = HSA::hsa_memory_assign_agent;
|
||||
table.hsa_signal_create_fn = HSA::hsa_signal_create;
|
||||
table.hsa_signal_destroy_fn = HSA::hsa_signal_destroy;
|
||||
table.hsa_signal_load_relaxed_fn = HSA::hsa_signal_load_relaxed;
|
||||
table.hsa_signal_load_acquire_fn = HSA::hsa_signal_load_acquire;
|
||||
table.hsa_signal_store_relaxed_fn = HSA::hsa_signal_store_relaxed;
|
||||
table.hsa_signal_store_release_fn = HSA::hsa_signal_store_release;
|
||||
table.hsa_signal_wait_relaxed_fn = HSA::hsa_signal_wait_relaxed;
|
||||
table.hsa_signal_wait_acquire_fn = HSA::hsa_signal_wait_acquire;
|
||||
table.hsa_signal_and_relaxed_fn = HSA::hsa_signal_and_relaxed;
|
||||
table.hsa_signal_and_acquire_fn = HSA::hsa_signal_and_acquire;
|
||||
table.hsa_signal_and_release_fn = HSA::hsa_signal_and_release;
|
||||
table.hsa_signal_and_acq_rel_fn = HSA::hsa_signal_and_acq_rel;
|
||||
table.hsa_signal_or_relaxed_fn = HSA::hsa_signal_or_relaxed;
|
||||
table.hsa_signal_or_acquire_fn = HSA::hsa_signal_or_acquire;
|
||||
table.hsa_signal_or_release_fn = HSA::hsa_signal_or_release;
|
||||
table.hsa_signal_or_acq_rel_fn = HSA::hsa_signal_or_acq_rel;
|
||||
table.hsa_signal_xor_relaxed_fn = HSA::hsa_signal_xor_relaxed;
|
||||
table.hsa_signal_xor_acquire_fn = HSA::hsa_signal_xor_acquire;
|
||||
table.hsa_signal_xor_release_fn = HSA::hsa_signal_xor_release;
|
||||
table.hsa_signal_xor_acq_rel_fn = HSA::hsa_signal_xor_acq_rel;
|
||||
table.hsa_signal_exchange_relaxed_fn = HSA::hsa_signal_exchange_relaxed;
|
||||
table.hsa_signal_exchange_acquire_fn = HSA::hsa_signal_exchange_acquire;
|
||||
table.hsa_signal_exchange_release_fn = HSA::hsa_signal_exchange_release;
|
||||
table.hsa_signal_exchange_acq_rel_fn = HSA::hsa_signal_exchange_acq_rel;
|
||||
table.hsa_signal_add_relaxed_fn = HSA::hsa_signal_add_relaxed;
|
||||
table.hsa_signal_add_acquire_fn = HSA::hsa_signal_add_acquire;
|
||||
table.hsa_signal_add_release_fn = HSA::hsa_signal_add_release;
|
||||
table.hsa_signal_add_acq_rel_fn = HSA::hsa_signal_add_acq_rel;
|
||||
table.hsa_signal_subtract_relaxed_fn = HSA::hsa_signal_subtract_relaxed;
|
||||
table.hsa_signal_subtract_acquire_fn = HSA::hsa_signal_subtract_acquire;
|
||||
table.hsa_signal_subtract_release_fn = HSA::hsa_signal_subtract_release;
|
||||
table.hsa_signal_subtract_acq_rel_fn = HSA::hsa_signal_subtract_acq_rel;
|
||||
table.hsa_signal_cas_relaxed_fn = HSA::hsa_signal_cas_relaxed;
|
||||
table.hsa_signal_cas_acquire_fn = HSA::hsa_signal_cas_acquire;
|
||||
table.hsa_signal_cas_release_fn = HSA::hsa_signal_cas_release;
|
||||
table.hsa_signal_cas_acq_rel_fn = HSA::hsa_signal_cas_acq_rel;
|
||||
table.hsa_isa_from_name_fn = HSA::hsa_isa_from_name;
|
||||
table.hsa_isa_get_info_fn = HSA::hsa_isa_get_info;
|
||||
table.hsa_isa_compatible_fn = HSA::hsa_isa_compatible;
|
||||
table.hsa_code_object_serialize_fn = HSA::hsa_code_object_serialize;
|
||||
table.hsa_code_object_deserialize_fn = HSA::hsa_code_object_deserialize;
|
||||
table.hsa_code_object_destroy_fn = HSA::hsa_code_object_destroy;
|
||||
table.hsa_code_object_get_info_fn = HSA::hsa_code_object_get_info;
|
||||
table.hsa_code_object_get_symbol_fn = HSA::hsa_code_object_get_symbol;
|
||||
table.hsa_code_symbol_get_info_fn = HSA::hsa_code_symbol_get_info;
|
||||
table.hsa_code_object_iterate_symbols_fn =
|
||||
HSA::hsa_code_object_iterate_symbols;
|
||||
table.hsa_executable_create_fn = HSA::hsa_executable_create;
|
||||
table.hsa_executable_destroy_fn = HSA::hsa_executable_destroy;
|
||||
table.hsa_executable_load_code_object_fn =
|
||||
HSA::hsa_executable_load_code_object;
|
||||
table.hsa_executable_freeze_fn = HSA::hsa_executable_freeze;
|
||||
table.hsa_executable_get_info_fn = HSA::hsa_executable_get_info;
|
||||
table.hsa_executable_global_variable_define_fn =
|
||||
HSA::hsa_executable_global_variable_define;
|
||||
table.hsa_executable_agent_global_variable_define_fn =
|
||||
HSA::hsa_executable_agent_global_variable_define;
|
||||
table.hsa_executable_readonly_variable_define_fn =
|
||||
HSA::hsa_executable_readonly_variable_define;
|
||||
table.hsa_executable_validate_fn = HSA::hsa_executable_validate;
|
||||
table.hsa_executable_get_symbol_fn = HSA::hsa_executable_get_symbol;
|
||||
table.hsa_executable_symbol_get_info_fn = HSA::hsa_executable_symbol_get_info;
|
||||
table.hsa_executable_iterate_symbols_fn = HSA::hsa_executable_iterate_symbols;
|
||||
table.hsa_status_string_fn = HSA::hsa_status_string;
|
||||
|
||||
if (table.std_exts_ != NULL) *table.std_exts_ = extension_backup;
|
||||
}
|
||||
|
||||
class Init {
|
||||
public:
|
||||
Init() { hsa_table_interface_init(&hsa_api_table_.table); }
|
||||
};
|
||||
static Init LinkAtLoad;
|
||||
}
|
||||
@@ -1,555 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "hsakmt.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/agent.h"
|
||||
#include "core/inc/amd_cpu_agent.h"
|
||||
#include "core/inc/amd_gpu_agent.h"
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/inc/interrupt_signal.h"
|
||||
|
||||
template <class T>
|
||||
struct ValidityError;
|
||||
template <>
|
||||
struct ValidityError<core::Signal*> {
|
||||
enum { value = HSA_STATUS_ERROR_INVALID_SIGNAL };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ValidityError<core::Agent*> {
|
||||
enum { value = HSA_STATUS_ERROR_INVALID_AGENT };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ValidityError<core::MemoryRegion*> {
|
||||
enum { value = HSA_STATUS_ERROR_INVALID_REGION };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ValidityError<amd::MemoryRegion*> {
|
||||
enum { value = HSA_STATUS_ERROR_INVALID_REGION };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ValidityError<core::Queue*> {
|
||||
enum { value = HSA_STATUS_ERROR_INVALID_QUEUE };
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ValidityError<const T*> {
|
||||
enum { value = ValidityError<T*>::value };
|
||||
};
|
||||
|
||||
#define IS_BAD_PTR(ptr) \
|
||||
do { \
|
||||
if ((ptr) == NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT; \
|
||||
} while (false)
|
||||
|
||||
#define IS_VALID(ptr) \
|
||||
do { \
|
||||
if ((ptr) == NULL || !(ptr)->IsValid()) \
|
||||
return hsa_status_t(ValidityError<decltype(ptr)>::value); \
|
||||
} while (false)
|
||||
|
||||
#define CHECK_ALLOC(ptr) \
|
||||
do { \
|
||||
if ((ptr) == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; \
|
||||
} while (false)
|
||||
|
||||
#define IS_OPEN() \
|
||||
do { \
|
||||
if (!core::Runtime::runtime_singleton_->IsOpen()) \
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED; \
|
||||
} while (false)
|
||||
|
||||
template <class T>
|
||||
static __forceinline bool IsValid(T* ptr) {
|
||||
return (ptr == NULL) ? NULL : ptr->IsValid();
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_coherency_get_type(hsa_agent_t agent_handle,
|
||||
hsa_amd_coherency_type_t* type) {
|
||||
IS_OPEN();
|
||||
|
||||
const core::Agent* agent = core::Agent::Convert(agent_handle);
|
||||
|
||||
IS_VALID(agent);
|
||||
|
||||
IS_BAD_PTR(type);
|
||||
|
||||
if (agent->device_type() != core::Agent::kAmdGpuDevice) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
const amd::GpuAgentInt* gpu_agent =
|
||||
static_cast<const amd::GpuAgentInt*>(agent);
|
||||
|
||||
*type = gpu_agent->current_coherency_type();
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_coherency_set_type(hsa_agent_t agent_handle,
|
||||
hsa_amd_coherency_type_t type) {
|
||||
IS_OPEN();
|
||||
|
||||
core::Agent* agent = core::Agent::Convert(agent_handle);
|
||||
|
||||
IS_VALID(agent);
|
||||
|
||||
if (type < HSA_AMD_COHERENCY_TYPE_COHERENT ||
|
||||
type > HSA_AMD_COHERENCY_TYPE_NONCOHERENT) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (agent->device_type() != core::Agent::kAmdGpuDevice) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
amd::GpuAgent* gpu_agent = static_cast<amd::GpuAgent*>(agent);
|
||||
|
||||
if (!gpu_agent->current_coherency_type(type)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count) {
|
||||
IS_OPEN();
|
||||
|
||||
if (ptr == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (count == 0) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
return core::Runtime::runtime_singleton_->FillMemory(ptr, value, count);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent_handle,
|
||||
const void* src, hsa_agent_t src_agent_handle,
|
||||
size_t size, uint32_t num_dep_signals,
|
||||
const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
if (dst == NULL || src == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if ((num_dep_signals == 0 && dep_signals != NULL) ||
|
||||
(num_dep_signals > 0 && dep_signals == NULL)) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle);
|
||||
IS_VALID(dst_agent);
|
||||
|
||||
core::Agent* src_agent = core::Agent::Convert(src_agent_handle);
|
||||
IS_VALID(src_agent);
|
||||
|
||||
std::vector<core::Signal*> dep_signal_list(num_dep_signals);
|
||||
if (num_dep_signals > 0) {
|
||||
for (size_t i = 0; i < num_dep_signals; ++i) {
|
||||
core::Signal* dep_signal_obj = core::Signal::Convert(dep_signals[i]);
|
||||
IS_VALID(dep_signal_obj);
|
||||
dep_signal_list[i] = dep_signal_obj;
|
||||
}
|
||||
}
|
||||
|
||||
core::Signal* out_signal_obj = core::Signal::Convert(completion_signal);
|
||||
IS_VALID(out_signal_obj);
|
||||
|
||||
if (size > 0) {
|
||||
return core::Runtime::runtime_singleton_->CopyMemory(
|
||||
dst, *dst_agent, src, *src_agent, size, dep_signal_list,
|
||||
*out_signal_obj);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_profiling_set_profiler_enabled(hsa_queue_t* queue, int enable) {
|
||||
IS_OPEN();
|
||||
|
||||
core::Queue* cmd_queue = core::Queue::Convert(queue);
|
||||
|
||||
IS_VALID(cmd_queue);
|
||||
|
||||
AMD_HSA_BITS_SET(cmd_queue->amd_queue_.queue_properties,
|
||||
AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, (enable != 0));
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_profiling_get_dispatch_time(
|
||||
hsa_agent_t agent_handle, hsa_signal_t hsa_signal,
|
||||
hsa_amd_profiling_dispatch_time_t* time) {
|
||||
IS_OPEN();
|
||||
|
||||
IS_BAD_PTR(time);
|
||||
|
||||
core::Agent* agent = core::Agent::Convert(agent_handle);
|
||||
|
||||
IS_VALID(agent);
|
||||
|
||||
core::Signal* signal = core::Signal::Convert(hsa_signal);
|
||||
|
||||
IS_VALID(signal);
|
||||
|
||||
if (agent->device_type() != core::Agent::kAmdGpuDevice) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
amd::GpuAgentInt* gpu_agent = static_cast<amd::GpuAgentInt*>(agent);
|
||||
|
||||
gpu_agent->TranslateTime(signal, *time);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_profiling_convert_tick_to_system_domain(hsa_agent_t agent_handle,
|
||||
uint64_t agent_tick,
|
||||
uint64_t* system_tick) {
|
||||
IS_OPEN();
|
||||
|
||||
IS_BAD_PTR(system_tick);
|
||||
|
||||
core::Agent* agent = core::Agent::Convert(agent_handle);
|
||||
|
||||
IS_VALID(agent);
|
||||
|
||||
if (agent->device_type() != core::Agent::kAmdGpuDevice) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
amd::GpuAgentInt* gpu_agent = static_cast<amd::GpuAgentInt*>(agent);
|
||||
|
||||
*system_tick = gpu_agent->TranslateTime(agent_tick);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t HSA_API
|
||||
hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* hsa_signals,
|
||||
hsa_signal_condition_t* conds,
|
||||
hsa_signal_value_t* values, uint64_t timeout_hint,
|
||||
hsa_wait_state_t wait_hint,
|
||||
hsa_signal_value_t* satisfying_value) {
|
||||
// Do not check for signal invalidation. Invalidation may occur during async
|
||||
// signal handler loop and is not an error.
|
||||
for (uint i = 0; i < signal_count; i++)
|
||||
assert(hsa_signals[i].handle != 0 &&
|
||||
static_cast<core::Checked<0x71FCCA6A3D5D5276>*>(
|
||||
core::Signal::Convert(hsa_signals[i]))->IsValid() &&
|
||||
"Invalid signal.");
|
||||
|
||||
return core::Signal::WaitAny(signal_count, hsa_signals, conds, values,
|
||||
timeout_hint, wait_hint, satisfying_value);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_signal_async_handler(hsa_signal_t hsa_signal,
|
||||
hsa_signal_condition_t cond,
|
||||
hsa_signal_value_t value,
|
||||
hsa_amd_signal_handler handler, void* arg) {
|
||||
IS_OPEN();
|
||||
|
||||
core::Signal* signal = core::Signal::Convert(hsa_signal);
|
||||
IS_VALID(signal);
|
||||
IS_BAD_PTR(handler);
|
||||
if (!core::InterruptSignal::IsType(signal))
|
||||
return HSA_STATUS_ERROR_INVALID_SIGNAL;
|
||||
return core::Runtime::runtime_singleton_->SetAsyncSignalHandler(
|
||||
hsa_signal, cond, value, handler, arg);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_async_function(void (*callback)(void* arg), void* arg) {
|
||||
IS_OPEN();
|
||||
|
||||
IS_BAD_PTR(callback);
|
||||
static const hsa_signal_t null_signal = {0};
|
||||
return core::Runtime::runtime_singleton_->SetAsyncSignalHandler(
|
||||
null_signal, HSA_SIGNAL_CONDITION_EQ, 0, (hsa_amd_signal_handler)callback,
|
||||
arg);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue,
|
||||
uint32_t num_cu_mask_count,
|
||||
const uint32_t* cu_mask) {
|
||||
IS_OPEN();
|
||||
IS_BAD_PTR(cu_mask);
|
||||
|
||||
core::Queue* cmd_queue = core::Queue::Convert(queue);
|
||||
IS_VALID(cmd_queue);
|
||||
return cmd_queue->SetCUMasking(num_cu_mask_count, cu_mask);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size,
|
||||
hsa_agent_t* agents, int num_agent,
|
||||
void** agent_ptr) {
|
||||
*agent_ptr = NULL;
|
||||
|
||||
IS_OPEN();
|
||||
|
||||
if (size == 0 || host_ptr == NULL || agent_ptr == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if ((agents != NULL && num_agent == 0) ||
|
||||
(agents == NULL && num_agent != 0)) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
const amd::MemoryRegion* system_region =
|
||||
reinterpret_cast<const amd::MemoryRegion*>(
|
||||
core::Runtime::runtime_singleton_->system_regions_fine()[0]);
|
||||
|
||||
return system_region->Lock(num_agent, agents, host_ptr, size, agent_ptr);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr) {
|
||||
IS_OPEN();
|
||||
|
||||
const amd::MemoryRegion* system_region =
|
||||
reinterpret_cast<const amd::MemoryRegion*>(
|
||||
core::Runtime::runtime_singleton_->system_regions_fine()[0]);
|
||||
|
||||
return system_region->Unlock(host_ptr);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool,
|
||||
hsa_amd_memory_pool_info_t attribute,
|
||||
void* value) {
|
||||
IS_OPEN();
|
||||
IS_BAD_PTR(value);
|
||||
|
||||
hsa_region_t region = {memory_pool.handle};
|
||||
const amd::MemoryRegion* mem_region = amd::MemoryRegion::Convert(region);
|
||||
if (mem_region == NULL) {
|
||||
return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL;
|
||||
}
|
||||
|
||||
return mem_region->GetPoolInfo(attribute, value);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools(
|
||||
hsa_agent_t agent_handle,
|
||||
hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data),
|
||||
void* data) {
|
||||
IS_OPEN();
|
||||
IS_BAD_PTR(callback);
|
||||
const core::Agent* agent = core::Agent::Convert(agent_handle);
|
||||
IS_VALID(agent);
|
||||
|
||||
if (agent->device_type() == core::Agent::kAmdCpuDevice) {
|
||||
return reinterpret_cast<const amd::CpuAgent*>(agent)->VisitRegion(
|
||||
false, reinterpret_cast<hsa_status_t (*)(hsa_region_t memory_pool,
|
||||
void* data)>(callback),
|
||||
data);
|
||||
}
|
||||
|
||||
return reinterpret_cast<const amd::GpuAgentInt*>(agent)->VisitRegion(
|
||||
false,
|
||||
reinterpret_cast<hsa_status_t (*)(hsa_region_t memory_pool, void* data)>(
|
||||
callback),
|
||||
data);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, size_t size,
|
||||
uint32_t flags, void** ptr) {
|
||||
IS_OPEN();
|
||||
|
||||
if (size == 0 || ptr == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
hsa_region_t region = {memory_pool.handle};
|
||||
const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region);
|
||||
|
||||
if (mem_region == NULL || !mem_region->IsValid()) {
|
||||
return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL;
|
||||
}
|
||||
|
||||
return core::Runtime::runtime_singleton_->AllocateMemory(true, mem_region,
|
||||
size, ptr);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_memory_pool_free(void* ptr) {
|
||||
return HSA::hsa_memory_free(ptr);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_agents_allow_access(uint32_t num_agents, const hsa_agent_t* agents,
|
||||
const uint32_t* flags, const void* ptr) {
|
||||
IS_OPEN();
|
||||
|
||||
if (num_agents == 0 || agents == NULL || flags != NULL || ptr == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
return core::Runtime::runtime_singleton_->AllowAccess(num_agents, agents,
|
||||
ptr);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_pool_can_migrate(hsa_amd_memory_pool_t src_memory_pool,
|
||||
hsa_amd_memory_pool_t dst_memory_pool,
|
||||
bool* result) {
|
||||
IS_OPEN();
|
||||
|
||||
if (result == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
hsa_region_t src_region_handle = {src_memory_pool.handle};
|
||||
const amd::MemoryRegion* src_mem_region =
|
||||
amd::MemoryRegion::Convert(src_region_handle);
|
||||
|
||||
if (src_mem_region == NULL || !src_mem_region->IsValid()) {
|
||||
return static_cast<hsa_status_t>(HSA_STATUS_ERROR_INVALID_MEMORY_POOL);
|
||||
}
|
||||
|
||||
hsa_region_t dst_region_handle = {dst_memory_pool.handle};
|
||||
const amd::MemoryRegion* dst_mem_region =
|
||||
amd::MemoryRegion::Convert(dst_region_handle);
|
||||
|
||||
if (dst_mem_region == NULL || !dst_mem_region->IsValid()) {
|
||||
return static_cast<hsa_status_t>(HSA_STATUS_ERROR_INVALID_MEMORY_POOL);
|
||||
}
|
||||
|
||||
return src_mem_region->CanMigrate(*dst_mem_region, *result);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr,
|
||||
hsa_amd_memory_pool_t memory_pool,
|
||||
uint32_t flags) {
|
||||
IS_OPEN();
|
||||
|
||||
if (ptr == NULL || flags != 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
hsa_region_t dst_region_handle = {memory_pool.handle};
|
||||
const amd::MemoryRegion* dst_mem_region =
|
||||
amd::MemoryRegion::Convert(dst_region_handle);
|
||||
|
||||
if (dst_mem_region == NULL || !dst_mem_region->IsValid()) {
|
||||
return static_cast<hsa_status_t>(HSA_STATUS_ERROR_INVALID_MEMORY_POOL);
|
||||
}
|
||||
|
||||
return dst_mem_region->Migrate(flags, ptr);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_agent_memory_pool_get_info(
|
||||
hsa_agent_t agent_handle, hsa_amd_memory_pool_t memory_pool,
|
||||
hsa_amd_agent_memory_pool_info_t attribute, void* value) {
|
||||
IS_OPEN();
|
||||
|
||||
if (value == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
const core::Agent* agent = core::Agent::Convert(agent_handle);
|
||||
IS_VALID(agent);
|
||||
|
||||
hsa_region_t region_handle = {memory_pool.handle};
|
||||
const amd::MemoryRegion* mem_region =
|
||||
amd::MemoryRegion::Convert(region_handle);
|
||||
|
||||
if (mem_region == NULL || !mem_region->IsValid()) {
|
||||
return static_cast<hsa_status_t>(HSA_STATUS_ERROR_INVALID_MEMORY_POOL);
|
||||
}
|
||||
|
||||
return mem_region->GetAgentPoolInfo(*agent, attribute, value);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_interop_map_buffer(uint32_t num_agents,
|
||||
hsa_agent_t* agents, int interop_handle,
|
||||
uint32_t flags, size_t* size,
|
||||
void** ptr, size_t* metadata_size,
|
||||
const void** metadata) {
|
||||
IS_OPEN();
|
||||
IS_BAD_PTR(agents);
|
||||
IS_BAD_PTR(size);
|
||||
IS_BAD_PTR(ptr);
|
||||
if (flags != 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
if (num_agents == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
core::Agent* short_agents[64];
|
||||
core::Agent** core_agents = short_agents;
|
||||
if (num_agents > 64) {
|
||||
core_agents = new core::Agent* [num_agents];
|
||||
if (core_agents == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_agents; i++) {
|
||||
core::Agent* device = core::Agent::Convert(agents[i]);
|
||||
IS_VALID(device);
|
||||
core_agents[i] = device;
|
||||
}
|
||||
|
||||
auto ret = core::Runtime::runtime_singleton_->InteropMap(
|
||||
num_agents, core_agents, interop_handle, flags, size, ptr, metadata_size,
|
||||
metadata);
|
||||
|
||||
if (num_agents > 64) delete[] core_agents;
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_interop_unmap_buffer(void* ptr) {
|
||||
IS_OPEN();
|
||||
if (ptr != NULL) core::Runtime::runtime_singleton_->InteropUnmap(ptr);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -1,530 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/hsa_ext_interface.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
|
||||
namespace core {
|
||||
// Implementations for missing / unsupported extensions
|
||||
template <class T0>
|
||||
static T0 hsa_ext_null() {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1>
|
||||
static T0 hsa_ext_null(T1) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2>
|
||||
static T0 hsa_ext_null(T1, T2) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3>
|
||||
static T0 hsa_ext_null(T1, T2, T3) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10, class T11>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10, class T11, class T12>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10, class T11, class T12,
|
||||
class T13>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10, class T11, class T12,
|
||||
class T13, class T14>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
|
||||
T14) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10, class T11, class T12,
|
||||
class T13, class T14, class T15>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
|
||||
T14, T15) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10, class T11, class T12,
|
||||
class T13, class T14, class T15, class T16>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
|
||||
T14, T15, T16) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10, class T11, class T12,
|
||||
class T13, class T14, class T15, class T16, class T17>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
|
||||
T14, T15, T16, T17) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10, class T11, class T12,
|
||||
class T13, class T14, class T15, class T16, class T17, class T18>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
|
||||
T14, T15, T16, T17, T18) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10, class T11, class T12,
|
||||
class T13, class T14, class T15, class T16, class T17, class T18,
|
||||
class T19>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
|
||||
T14, T15, T16, T17, T18, T19) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
template <class T0, class T1, class T2, class T3, class T4, class T5, class T6,
|
||||
class T7, class T8, class T9, class T10, class T11, class T12,
|
||||
class T13, class T14, class T15, class T16, class T17, class T18,
|
||||
class T19, class T20>
|
||||
static T0 hsa_ext_null(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
|
||||
T14, T15, T16, T17, T18, T19, T20) {
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
|
||||
ExtensionEntryPoints::ExtensionEntryPoints() { InitTable(); }
|
||||
|
||||
void ExtensionEntryPoints::InitTable() {
|
||||
table.hsa_ext_program_create_fn = hsa_ext_null;
|
||||
table.hsa_ext_program_destroy_fn = hsa_ext_null;
|
||||
table.hsa_ext_program_add_module_fn = hsa_ext_null;
|
||||
table.hsa_ext_program_iterate_modules_fn = hsa_ext_null;
|
||||
table.hsa_ext_program_get_info_fn = hsa_ext_null;
|
||||
table.hsa_ext_program_finalize_fn = hsa_ext_null;
|
||||
table.hsa_ext_image_get_capability_fn = hsa_ext_null;
|
||||
table.hsa_ext_image_data_get_info_fn = hsa_ext_null;
|
||||
table.hsa_ext_image_create_fn = hsa_ext_null;
|
||||
table.hsa_ext_image_import_fn = hsa_ext_null;
|
||||
table.hsa_ext_image_export_fn = hsa_ext_null;
|
||||
table.hsa_ext_image_copy_fn = hsa_ext_null;
|
||||
table.hsa_ext_image_clear_fn = hsa_ext_null;
|
||||
table.hsa_ext_image_destroy_fn = hsa_ext_null;
|
||||
table.hsa_ext_sampler_create_fn = hsa_ext_null;
|
||||
table.hsa_ext_sampler_destroy_fn = hsa_ext_null;
|
||||
table.hsa_amd_image_get_info_max_dim_fn = hsa_ext_null;
|
||||
table.hsa_amd_image_create_fn = hsa_ext_null;
|
||||
}
|
||||
|
||||
void ExtensionEntryPoints::Unload() {
|
||||
for (int i = 0; i < libs_.size(); i++) {
|
||||
void* ptr = os::GetExportAddress(libs_[i], "Unload");
|
||||
if (ptr) {
|
||||
((Unload_t)ptr)();
|
||||
}
|
||||
}
|
||||
// Due to valgrind bug, runtime cannot dlclose extensions see:
|
||||
// http://valgrind.org/docs/manual/faq.html#faq.unhelpful
|
||||
if (os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND") != "1") {
|
||||
for (int i = 0; i < libs_.size(); i++) {
|
||||
os::CloseLib(libs_[i]);
|
||||
}
|
||||
}
|
||||
libs_.clear();
|
||||
InitTable();
|
||||
}
|
||||
|
||||
bool ExtensionEntryPoints::Load(std::string library_name) {
|
||||
os::LibHandle lib = os::LoadLib(library_name);
|
||||
if (lib == NULL) {
|
||||
return false;
|
||||
}
|
||||
libs_.push_back(lib);
|
||||
|
||||
void* ptr;
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_program_create_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_program_create_fn ==
|
||||
(decltype(::hsa_ext_program_create)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_program_create_fn = (decltype(::hsa_ext_program_create)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_program_destroy_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_program_destroy_fn ==
|
||||
(decltype(::hsa_ext_program_destroy)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_program_destroy_fn =
|
||||
(decltype(::hsa_ext_program_destroy)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_program_add_module_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_program_add_module_fn ==
|
||||
(decltype(::hsa_ext_program_add_module)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_program_add_module_fn =
|
||||
(decltype(::hsa_ext_program_add_module)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_program_iterate_modules_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_program_iterate_modules_fn ==
|
||||
(decltype(::hsa_ext_program_iterate_modules)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_program_iterate_modules_fn =
|
||||
(decltype(::hsa_ext_program_iterate_modules)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_program_get_info_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_program_get_info_fn ==
|
||||
(decltype(::hsa_ext_program_get_info)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_program_get_info_fn =
|
||||
(decltype(::hsa_ext_program_get_info)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_program_finalize_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_program_finalize_fn ==
|
||||
(decltype(::hsa_ext_program_finalize)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_program_finalize_fn =
|
||||
(decltype(::hsa_ext_program_finalize)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_image_get_capability_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_image_get_capability_fn ==
|
||||
(decltype(::hsa_ext_image_get_capability)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_image_get_capability_fn =
|
||||
(decltype(::hsa_ext_image_get_capability)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_image_data_get_info_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_image_data_get_info_fn ==
|
||||
(decltype(::hsa_ext_image_data_get_info)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_image_data_get_info_fn =
|
||||
(decltype(::hsa_ext_image_data_get_info)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_image_create_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_image_create_fn ==
|
||||
(decltype(::hsa_ext_image_create)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_image_create_fn = (decltype(::hsa_ext_image_create)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_image_import_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_image_import_fn ==
|
||||
(decltype(::hsa_ext_image_import)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_image_import_fn = (decltype(::hsa_ext_image_import)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_image_export_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_image_export_fn ==
|
||||
(decltype(::hsa_ext_image_export)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_image_export_fn = (decltype(::hsa_ext_image_export)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_image_copy_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_image_copy_fn ==
|
||||
(decltype(::hsa_ext_image_copy)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_image_copy_fn = (decltype(::hsa_ext_image_copy)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_image_clear_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_image_clear_fn ==
|
||||
(decltype(::hsa_ext_image_clear)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_image_clear_fn = (decltype(::hsa_ext_image_clear)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_image_destroy_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_image_destroy_fn ==
|
||||
(decltype(::hsa_ext_image_destroy)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_image_destroy_fn = (decltype(::hsa_ext_image_destroy)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_sampler_create_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_sampler_create_fn ==
|
||||
(decltype(::hsa_ext_sampler_create)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_sampler_create_fn = (decltype(::hsa_ext_sampler_create)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_ext_sampler_destroy_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_ext_sampler_destroy_fn ==
|
||||
(decltype(::hsa_ext_sampler_destroy)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_ext_sampler_destroy_fn =
|
||||
(decltype(::hsa_ext_sampler_destroy)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_amd_image_get_info_max_dim_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_amd_image_get_info_max_dim_fn ==
|
||||
(decltype(::hsa_amd_image_get_info_max_dim)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_amd_image_get_info_max_dim_fn =
|
||||
(decltype(::hsa_amd_image_get_info_max_dim)*)ptr;
|
||||
}
|
||||
|
||||
ptr = os::GetExportAddress(lib, "hsa_amd_image_create_impl");
|
||||
if (ptr != NULL) {
|
||||
assert(table.hsa_amd_image_create_fn ==
|
||||
(decltype(::hsa_amd_image_create)*)hsa_ext_null &&
|
||||
"Duplicate load of extension import.");
|
||||
table.hsa_amd_image_create_fn =
|
||||
(decltype(::hsa_amd_image_create)*)ptr;
|
||||
}
|
||||
|
||||
core::hsa_internal_api_table_.extension_backup=table;
|
||||
core::hsa_internal_api_table_.table.std_exts_=&core::hsa_internal_api_table_.extension_backup;
|
||||
|
||||
ptr = os::GetExportAddress(lib, "Load");
|
||||
if (ptr != NULL) {
|
||||
((Load_t)ptr)(&core::hsa_internal_api_table_.table);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace core
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// Exported extension stub functions
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
hsa_status_t hsa_ext_program_create(
|
||||
hsa_machine_model_t machine_model, hsa_profile_t profile,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
||||
const char* options, hsa_ext_program_t* program) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_program_create_fn(machine_model, profile,
|
||||
default_float_rounding_mode, options, program);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_program_destroy(hsa_ext_program_t program) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_program_destroy_fn(program);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_program_add_module(hsa_ext_program_t program,
|
||||
hsa_ext_module_t module) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_program_add_module_fn(program, module);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_program_iterate_modules(
|
||||
hsa_ext_program_t program,
|
||||
hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
|
||||
void* data),
|
||||
void* data) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_program_iterate_modules_fn(program, callback, data);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_program_get_info(hsa_ext_program_t program,
|
||||
hsa_ext_program_info_t attribute,
|
||||
void* value) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_program_get_info_fn(program, attribute, value);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_program_finalize(
|
||||
hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
|
||||
hsa_ext_control_directives_t control_directives, const char* options,
|
||||
hsa_code_object_type_t code_object_type, hsa_code_object_t* code_object) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_program_finalize_fn(program, isa, call_convention,
|
||||
control_directives, options,
|
||||
code_object_type, code_object);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_get_capability(
|
||||
hsa_agent_t agent, hsa_ext_image_geometry_t geometry,
|
||||
const hsa_ext_image_format_t* image_format, uint32_t* capability_mask) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_image_get_capability_fn(agent, geometry, image_format,
|
||||
capability_mask);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_data_get_info(
|
||||
hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_data_info_t* image_data_info) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_image_data_get_info_fn(agent, image_descriptor,
|
||||
access_permission, image_data_info);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_create(
|
||||
hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor,
|
||||
const void* image_data, hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_t* image) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_image_create_fn(agent, image_descriptor, image_data,
|
||||
access_permission, image);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_import(hsa_agent_t agent, const void* src_memory,
|
||||
size_t src_row_pitch, size_t src_slice_pitch,
|
||||
hsa_ext_image_t dst_image,
|
||||
const hsa_ext_image_region_t* image_region) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_image_import_fn(agent, src_memory, src_row_pitch,
|
||||
src_slice_pitch, dst_image, image_region);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_export(hsa_agent_t agent, hsa_ext_image_t src_image,
|
||||
void* dst_memory, size_t dst_row_pitch,
|
||||
size_t dst_slice_pitch,
|
||||
const hsa_ext_image_region_t* image_region) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_image_export_fn(agent, src_image, dst_memory, dst_row_pitch,
|
||||
dst_slice_pitch, image_region);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_copy(hsa_agent_t agent, hsa_ext_image_t src_image,
|
||||
const hsa_dim3_t* src_offset,
|
||||
hsa_ext_image_t dst_image,
|
||||
const hsa_dim3_t* dst_offset,
|
||||
const hsa_dim3_t* range) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_image_copy_fn(agent, src_image, src_offset, dst_image,
|
||||
dst_offset, range);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_clear(hsa_agent_t agent, hsa_ext_image_t image,
|
||||
const void* data,
|
||||
const hsa_ext_image_region_t* image_region) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_image_clear_fn(agent, image, data, image_region);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_destroy(hsa_agent_t agent, hsa_ext_image_t image) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_image_destroy_fn(agent, image);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_sampler_create(
|
||||
hsa_agent_t agent, const hsa_ext_sampler_descriptor_t* sampler_descriptor,
|
||||
hsa_ext_sampler_t* sampler) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_sampler_create_fn(agent, sampler_descriptor, sampler);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_sampler_destroy(hsa_agent_t agent,
|
||||
hsa_ext_sampler_t sampler) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_ext_sampler_destroy_fn(agent, sampler);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// Stubs for internal extension functions
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
hsa_status_t hsa_amd_image_get_info_max_dim(hsa_agent_t component,
|
||||
hsa_agent_info_t attribute,
|
||||
void* value) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_amd_image_get_info_max_dim_fn(component, attribute, value);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_image_create(
|
||||
hsa_agent_t agent,
|
||||
const hsa_ext_image_descriptor_t *image_descriptor,
|
||||
const hsa_amd_image_descriptor_t *image_layout,
|
||||
const void *image_data,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_t *image) {
|
||||
return core::Runtime::runtime_singleton_->extensions_.table
|
||||
.hsa_amd_image_create_fn(agent, image_descriptor, image_layout, image_data, access_permission, image);
|
||||
}
|
||||
@@ -1,372 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/interrupt_signal.h"
|
||||
#include "core/util/timer.h"
|
||||
|
||||
namespace core {
|
||||
|
||||
HsaEvent* InterruptSignal::CreateEvent(HSA_EVENTTYPE type, bool manual_reset) {
|
||||
HsaEventDescriptor event_descriptor;
|
||||
event_descriptor.EventType = type;
|
||||
event_descriptor.SyncVar.SyncVar.UserData = NULL;
|
||||
event_descriptor.SyncVar.SyncVarSize = sizeof(hsa_signal_value_t);
|
||||
event_descriptor.NodeId = 0;
|
||||
|
||||
HsaEvent* ret = NULL;
|
||||
if (HSAKMT_STATUS_SUCCESS ==
|
||||
hsaKmtCreateEvent(&event_descriptor, manual_reset, false, &ret)) {
|
||||
if (type == HSA_EVENTTYPE_MEMORY) {
|
||||
memset(&ret->EventData.EventData.MemoryAccessFault.Failure, 0,
|
||||
sizeof(HsaAccessAttributeFailure));
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int InterruptSignal::rtti_id_ = 0;
|
||||
|
||||
void InterruptSignal::DestroyEvent(HsaEvent* evt) { hsaKmtDestroyEvent(evt); }
|
||||
|
||||
InterruptSignal::InterruptSignal(hsa_signal_value_t initial_value,
|
||||
HsaEvent* use_event)
|
||||
: Signal(initial_value) {
|
||||
if (use_event != NULL) {
|
||||
event_ = use_event;
|
||||
free_event_ = false;
|
||||
} else {
|
||||
event_ = CreateEvent(HSA_EVENTTYPE_SIGNAL, false);
|
||||
free_event_ = true;
|
||||
}
|
||||
|
||||
if (event_ != NULL) {
|
||||
signal_.event_id = event_->EventId;
|
||||
signal_.event_mailbox_ptr = event_->EventData.HWData2;
|
||||
} else {
|
||||
signal_.event_id = 0;
|
||||
signal_.event_mailbox_ptr = 0;
|
||||
}
|
||||
signal_.kind = AMD_SIGNAL_KIND_USER;
|
||||
|
||||
wait_on_event_ = true;
|
||||
}
|
||||
|
||||
InterruptSignal::~InterruptSignal() {
|
||||
invalid_ = true;
|
||||
SetEvent();
|
||||
while (InUse())
|
||||
;
|
||||
if (free_event_) hsaKmtDestroyEvent(event_);
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::LoadRelaxed() {
|
||||
return hsa_signal_value_t(
|
||||
atomic::Load(&signal_.value, std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::LoadAcquire() {
|
||||
return hsa_signal_value_t(
|
||||
atomic::Load(&signal_.value, std::memory_order_acquire));
|
||||
}
|
||||
|
||||
void InterruptSignal::StoreRelaxed(hsa_signal_value_t value) {
|
||||
wait_on_event_ = true;
|
||||
atomic::Store(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::StoreRelease(hsa_signal_value_t value) {
|
||||
wait_on_event_ = true;
|
||||
atomic::Store(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::WaitRelaxed(
|
||||
hsa_signal_condition_t condition, hsa_signal_value_t compare_value,
|
||||
uint64_t timeout, hsa_wait_state_t wait_hint) {
|
||||
uint32_t prior = atomic::Increment(&waiting_);
|
||||
|
||||
// assert(prior == 0 && "Multiple waiters on interrupt signal!");
|
||||
// Allow only the first waiter to sleep (temporary, known to be bad).
|
||||
if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE;
|
||||
|
||||
MAKE_SCOPE_GUARD([&]() { atomic::Decrement(&waiting_); });
|
||||
|
||||
int64_t value;
|
||||
|
||||
timer::fast_clock::time_point start_time = timer::fast_clock::now();
|
||||
|
||||
// Set a polling timeout value
|
||||
// Exact time is not hugely important, it should just be a short while which
|
||||
// is smaller than the thread scheduling quantum (usually around 16ms)
|
||||
const timer::fast_clock::duration kMaxElapsed = std::chrono::milliseconds(5);
|
||||
|
||||
uint64_t hsa_freq;
|
||||
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
|
||||
const timer::fast_clock::duration fast_timeout =
|
||||
timer::duration_from_seconds<timer::fast_clock::duration>(
|
||||
double(timeout) / double(hsa_freq));
|
||||
|
||||
bool condition_met = false;
|
||||
while (true) {
|
||||
if (invalid_) return 0;
|
||||
|
||||
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
|
||||
|
||||
switch (condition) {
|
||||
case HSA_SIGNAL_CONDITION_EQ: {
|
||||
condition_met = (value == compare_value);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_NE: {
|
||||
condition_met = (value != compare_value);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_GTE: {
|
||||
condition_met = (value >= compare_value);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_LT: {
|
||||
condition_met = (value < compare_value);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
if (condition_met) return hsa_signal_value_t(value);
|
||||
|
||||
timer::fast_clock::time_point time = timer::fast_clock::now();
|
||||
if (time - start_time > kMaxElapsed) {
|
||||
if (time - start_time > fast_timeout) {
|
||||
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
|
||||
return hsa_signal_value_t(value);
|
||||
}
|
||||
if (wait_on_event_ && wait_hint != HSA_WAIT_STATE_ACTIVE) {
|
||||
uint32_t wait_ms;
|
||||
auto time_remaining = fast_timeout - (time - start_time);
|
||||
if ((timeout == -1) ||
|
||||
(time_remaining > std::chrono::milliseconds(uint32_t(-1))))
|
||||
wait_ms = uint32_t(-1);
|
||||
else
|
||||
wait_ms = timer::duration_cast<std::chrono::milliseconds>(
|
||||
time_remaining).count();
|
||||
hsaKmtWaitOnEvent(event_, wait_ms);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::WaitAcquire(
|
||||
hsa_signal_condition_t condition, hsa_signal_value_t compare_value,
|
||||
uint64_t timeout, hsa_wait_state_t wait_hint) {
|
||||
hsa_signal_value_t ret =
|
||||
WaitRelaxed(condition, compare_value, timeout, wait_hint);
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void InterruptSignal::AndRelaxed(hsa_signal_value_t value) {
|
||||
atomic::And(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::AndAcquire(hsa_signal_value_t value) {
|
||||
atomic::And(&signal_.value, int64_t(value), std::memory_order_acquire);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::AndRelease(hsa_signal_value_t value) {
|
||||
atomic::And(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::AndAcqRel(hsa_signal_value_t value) {
|
||||
atomic::And(&signal_.value, int64_t(value), std::memory_order_acq_rel);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::OrRelaxed(hsa_signal_value_t value) {
|
||||
atomic::Or(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::OrAcquire(hsa_signal_value_t value) {
|
||||
atomic::Or(&signal_.value, int64_t(value), std::memory_order_acquire);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::OrRelease(hsa_signal_value_t value) {
|
||||
atomic::Or(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::OrAcqRel(hsa_signal_value_t value) {
|
||||
atomic::Or(&signal_.value, int64_t(value), std::memory_order_acq_rel);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::XorRelaxed(hsa_signal_value_t value) {
|
||||
atomic::Xor(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::XorAcquire(hsa_signal_value_t value) {
|
||||
atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acquire);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::XorRelease(hsa_signal_value_t value) {
|
||||
atomic::Xor(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::XorAcqRel(hsa_signal_value_t value) {
|
||||
atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acq_rel);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::AddRelaxed(hsa_signal_value_t value) {
|
||||
atomic::Add(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::AddAcquire(hsa_signal_value_t value) {
|
||||
atomic::Add(&signal_.value, int64_t(value), std::memory_order_acquire);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::AddRelease(hsa_signal_value_t value) {
|
||||
atomic::Add(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::AddAcqRel(hsa_signal_value_t value) {
|
||||
atomic::Add(&signal_.value, int64_t(value), std::memory_order_acq_rel);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::SubRelaxed(hsa_signal_value_t value) {
|
||||
atomic::Sub(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::SubAcquire(hsa_signal_value_t value) {
|
||||
atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acquire);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::SubRelease(hsa_signal_value_t value) {
|
||||
atomic::Sub(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::SubAcqRel(hsa_signal_value_t value) {
|
||||
atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acq_rel);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::ExchRelaxed(hsa_signal_value_t value) {
|
||||
hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange(
|
||||
&signal_.value, int64_t(value), std::memory_order_relaxed));
|
||||
SetEvent();
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::ExchAcquire(hsa_signal_value_t value) {
|
||||
hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange(
|
||||
&signal_.value, int64_t(value), std::memory_order_acquire));
|
||||
SetEvent();
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::ExchRelease(hsa_signal_value_t value) {
|
||||
hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange(
|
||||
&signal_.value, int64_t(value), std::memory_order_release));
|
||||
SetEvent();
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::ExchAcqRel(hsa_signal_value_t value) {
|
||||
hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange(
|
||||
&signal_.value, int64_t(value), std::memory_order_acq_rel));
|
||||
SetEvent();
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::CasRelaxed(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
hsa_signal_value_t ret = hsa_signal_value_t(
|
||||
atomic::Cas(&signal_.value, int64_t(value), int64_t(expected),
|
||||
std::memory_order_relaxed));
|
||||
SetEvent();
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::CasAcquire(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
hsa_signal_value_t ret = hsa_signal_value_t(
|
||||
atomic::Cas(&signal_.value, int64_t(value), int64_t(expected),
|
||||
std::memory_order_acquire));
|
||||
SetEvent();
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::CasRelease(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
hsa_signal_value_t ret = hsa_signal_value_t(
|
||||
atomic::Cas(&signal_.value, int64_t(value), int64_t(expected),
|
||||
std::memory_order_release));
|
||||
SetEvent();
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_signal_value_t InterruptSignal::CasAcqRel(hsa_signal_value_t expected,
|
||||
hsa_signal_value_t value) {
|
||||
hsa_signal_value_t ret = hsa_signal_value_t(
|
||||
atomic::Cas(&signal_.value, int64_t(value), int64_t(expected),
|
||||
std::memory_order_acq_rel));
|
||||
SetEvent();
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace core
|
||||
@@ -1,130 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/inc/isa.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
namespace core {
|
||||
|
||||
const IsaRegistry::IsaMap IsaRegistry::supported_isas_ =
|
||||
IsaRegistry::GetSupportedIsas();
|
||||
|
||||
const Isa *IsaRegistry::GetIsa(const std::string &full_name) {
|
||||
auto isareg_iter = supported_isas_.find(full_name);
|
||||
return isareg_iter == supported_isas_.end() ? nullptr : &isareg_iter->second;
|
||||
}
|
||||
|
||||
const Isa *IsaRegistry::GetIsa(const Isa::Version &version) {
|
||||
auto isareg_iter = supported_isas_.find(Isa(version).GetFullName());
|
||||
return isareg_iter == supported_isas_.end() ? nullptr : &isareg_iter->second;
|
||||
}
|
||||
|
||||
const IsaRegistry::IsaMap IsaRegistry::GetSupportedIsas() {
|
||||
#define ISAREG_ENTRY_GEN(maj, min, stp) \
|
||||
Isa amd_amdgpu_##maj##min##stp; \
|
||||
amd_amdgpu_##maj##min##stp.version_ = Isa::Version(maj, min, stp); \
|
||||
supported_isas.insert( \
|
||||
std::make_pair( \
|
||||
amd_amdgpu_##maj##min##stp.GetFullName(), amd_amdgpu_##maj##min##stp)); \
|
||||
|
||||
IsaMap supported_isas;
|
||||
|
||||
ISAREG_ENTRY_GEN(7, 0, 0)
|
||||
ISAREG_ENTRY_GEN(7, 0, 1)
|
||||
ISAREG_ENTRY_GEN(8, 0, 0)
|
||||
ISAREG_ENTRY_GEN(8, 0, 1)
|
||||
ISAREG_ENTRY_GEN(8, 0, 2)
|
||||
ISAREG_ENTRY_GEN(8, 0, 3)
|
||||
ISAREG_ENTRY_GEN(8, 1, 0)
|
||||
ISAREG_ENTRY_GEN(9, 0, 0)
|
||||
|
||||
return supported_isas;
|
||||
}
|
||||
|
||||
std::string Isa::GetFullName() const {
|
||||
std::stringstream full_name;
|
||||
full_name << GetVendor() << ":" << GetArchitecture() << ":"
|
||||
<< GetMajorVersion() << ":" << GetMinorVersion() << ":"
|
||||
<< GetStepping();
|
||||
return full_name.str();
|
||||
}
|
||||
|
||||
bool Isa::GetInfo(const hsa_isa_info_t &attribute, void *value) const {
|
||||
if (!value) {
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (attribute) {
|
||||
case HSA_ISA_INFO_NAME_LENGTH: {
|
||||
std::string full_name = GetFullName();
|
||||
*((uint32_t *)value) = static_cast<uint32_t>(full_name.size());
|
||||
return true;
|
||||
}
|
||||
case HSA_ISA_INFO_NAME: {
|
||||
std::string full_name = GetFullName();
|
||||
memcpy(value, full_name.c_str(), full_name.size());
|
||||
return true;
|
||||
}
|
||||
// @todo: following case needs to be removed
|
||||
case HSA_ISA_INFO_CALL_CONVENTION_COUNT: {
|
||||
*((uint32_t *)value) = 1;
|
||||
return true;
|
||||
}
|
||||
// @todo: following case needs to be removed
|
||||
case HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE: {
|
||||
*((uint32_t *)value) = 64;
|
||||
return true;
|
||||
}
|
||||
// @todo: following needs to be removed
|
||||
case HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT: {
|
||||
*((uint32_t *)value) = 40;
|
||||
return true;
|
||||
}
|
||||
default: {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace core
|
||||
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -1,187 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_SIGNAL_CPP_
|
||||
#define HSA_RUNTME_CORE_SIGNAL_CPP_
|
||||
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/util/timer.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace core {
|
||||
|
||||
uint32_t Signal::WaitAny(uint32_t signal_count, hsa_signal_t* hsa_signals,
|
||||
hsa_signal_condition_t* conds,
|
||||
hsa_signal_value_t* values, uint64_t timeout,
|
||||
hsa_wait_state_t wait_hint,
|
||||
hsa_signal_value_t* satisfying_value) {
|
||||
hsa_signal_handle* signals =
|
||||
reinterpret_cast<hsa_signal_handle*>(hsa_signals);
|
||||
uint32_t prior = 0;
|
||||
for (uint32_t i = 0; i < signal_count; i++)
|
||||
prior = Max(prior, atomic::Increment(&signals[i]->waiting_));
|
||||
|
||||
MAKE_SCOPE_GUARD([&]() {
|
||||
for (uint32_t i = 0; i < signal_count; i++)
|
||||
atomic::Decrement(&signals[i]->waiting_);
|
||||
});
|
||||
|
||||
// Allow only the first waiter to sleep (temporary, known to be bad).
|
||||
if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE;
|
||||
|
||||
// Ensure that all signals in the list can be slept on.
|
||||
if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
|
||||
for (uint32_t i = 0; i < signal_count; i++) {
|
||||
if (signals[i]->EopEvent() == NULL) {
|
||||
wait_hint = HSA_WAIT_STATE_ACTIVE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t small_size = 10;
|
||||
HsaEvent* short_evts[small_size];
|
||||
HsaEvent** evts = NULL;
|
||||
uint32_t unique_evts = 0;
|
||||
if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
|
||||
if (signal_count > small_size)
|
||||
evts = new HsaEvent* [signal_count];
|
||||
else
|
||||
evts = short_evts;
|
||||
for (uint32_t i = 0; i < signal_count; i++)
|
||||
evts[i] = signals[i]->EopEvent();
|
||||
std::sort(evts, evts + signal_count);
|
||||
HsaEvent** end = std::unique(evts, evts + signal_count);
|
||||
unique_evts = uint32_t(end - evts);
|
||||
}
|
||||
MAKE_SCOPE_GUARD([&]() {
|
||||
if (signal_count > small_size) delete[] evts;
|
||||
});
|
||||
|
||||
int64_t value;
|
||||
|
||||
timer::fast_clock::time_point start_time = timer::fast_clock::now();
|
||||
|
||||
// Set a polling timeout value
|
||||
// Exact time is not hugely important, it should just be a short while which
|
||||
// is smaller than the thread scheduling quantum (usually around 16ms)
|
||||
const timer::fast_clock::duration kMaxElapsed = std::chrono::milliseconds(5);
|
||||
|
||||
// Convert timeout value into the fast_clock domain
|
||||
uint64_t hsa_freq;
|
||||
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
|
||||
const timer::fast_clock::duration fast_timeout =
|
||||
timer::duration_from_seconds<timer::fast_clock::duration>(
|
||||
double(timeout) / double(hsa_freq));
|
||||
|
||||
bool condition_met = false;
|
||||
while (true) {
|
||||
for (uint32_t i = 0; i < signal_count; i++) {
|
||||
if (signals[i]->invalid_) return uint32_t(-1);
|
||||
|
||||
// Handling special event.
|
||||
if (signals[i]->EopEvent() != NULL) {
|
||||
const HSA_EVENTTYPE event_type =
|
||||
signals[i]->EopEvent()->EventData.EventType;
|
||||
if (event_type == HSA_EVENTTYPE_MEMORY) {
|
||||
const HsaMemoryAccessFault& fault =
|
||||
signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault;
|
||||
const uint32_t* failure =
|
||||
reinterpret_cast<const uint32_t*>(&fault.Failure);
|
||||
if (*failure != 0) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
value =
|
||||
atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed);
|
||||
|
||||
switch (conds[i]) {
|
||||
case HSA_SIGNAL_CONDITION_EQ: {
|
||||
condition_met = (value == values[i]);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_NE: {
|
||||
condition_met = (value != values[i]);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_GTE: {
|
||||
condition_met = (value >= values[i]);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_LT: {
|
||||
condition_met = (value < values[i]);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return uint32_t(-1);
|
||||
}
|
||||
if (condition_met) {
|
||||
if (satisfying_value != NULL) *satisfying_value = value;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
timer::fast_clock::time_point time = timer::fast_clock::now();
|
||||
if (time - start_time > kMaxElapsed) {
|
||||
if (time - start_time > fast_timeout) {
|
||||
return uint32_t(-1);
|
||||
}
|
||||
if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
|
||||
uint32_t wait_ms;
|
||||
auto time_remaining = fast_timeout - (time - start_time);
|
||||
if ((timeout == -1) ||
|
||||
(time_remaining > std::chrono::milliseconds(uint32_t(-1))))
|
||||
wait_ms = uint32_t(-1);
|
||||
else
|
||||
wait_ms = timer::duration_cast<std::chrono::milliseconds>(
|
||||
time_remaining).count();
|
||||
hsaKmtWaitOnMultipleEvents(evts, unique_evts, false, wait_ms);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace core
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,405 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Helpers to use non-atomic types with C++11 atomic operations.
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_UTIL_ATOMIC_HELPERS_H_
|
||||
#define HSA_RUNTIME_CORE_UTIL_ATOMIC_HELPERS_H_
|
||||
|
||||
#include <atomic>
|
||||
#include "utils.h"
|
||||
|
||||
/// @brief: Special assert used here to check each atomic variable for lock free
|
||||
/// implementation.
|
||||
/// ANY locked atomics are very likely incompatable with out-of-library
|
||||
/// concurrent access (HW access for instance)
|
||||
#define lockless_check(exp) assert(exp)
|
||||
|
||||
namespace atomic {
|
||||
/// @brief: Checks if type T is compatible with its atomic representation.
|
||||
/// @param: ptr(Input), a pointer to type T for check.
|
||||
/// @return: void.
|
||||
template <class T>
|
||||
static __forceinline void BasicCheck(const T* ptr) {
|
||||
static_assert(sizeof(T) == sizeof(std::atomic<T>),
|
||||
"Type is size incompatible with its atomic representation!");
|
||||
lockless_check(
|
||||
reinterpret_cast<const std::atomic<T>*>(ptr)->is_lock_free() &&
|
||||
"Atomic operation is not lock free! Use may conflict with peripheral HW "
|
||||
"atomics!");
|
||||
};
|
||||
|
||||
/// @brief: function overloading, for more info, see previous one.
|
||||
/// @param: ptr(Input), a pointer to a volatile type.
|
||||
/// @return: void.
|
||||
template <class T>
|
||||
static __forceinline void BasicCheck(const volatile T* ptr) {
|
||||
static_assert(sizeof(T) == sizeof(std::atomic<T>),
|
||||
"Type is size incompatible with its atomic representation!");
|
||||
lockless_check(
|
||||
reinterpret_cast<const volatile std::atomic<T>*>(ptr)->is_lock_free() &&
|
||||
"Atomic operation is not lock free! Use may conflict with peripheral HW "
|
||||
"atomics!");
|
||||
};
|
||||
|
||||
/// @brief: Load value of type T atomically with specified memory order.
|
||||
/// @param: ptr(Input), a pointer to type T.
|
||||
/// @param: order(Input), memory order with atomic load, relaxed by default.
|
||||
/// @return: T, loaded value.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Load(const T* ptr, std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
const std::atomic<T>* aptr = reinterpret_cast<const std::atomic<T>*>(ptr);
|
||||
return aptr->load(order);
|
||||
}
|
||||
|
||||
/// @brief: function overloading, for more info, see previous one.
|
||||
/// @param: ptr(Input), a pointer to volatile type T.
|
||||
/// @param: order(Input), memory order with atomic load, relaxed by default.
|
||||
/// @return: T, loaded value.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Load(const volatile T* ptr,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile const std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile const std::atomic<T>*>(ptr);
|
||||
return aptr->load(order);
|
||||
}
|
||||
|
||||
/// @brief: Store value of type T with specified memory order.
|
||||
/// @param: ptr(Input), a pointer to instance which will be stored.
|
||||
/// @param: val(Input), value to be stored.
|
||||
/// @param: order(Input), memory order with atomic store, relaxed by default.
|
||||
/// @return: void.
|
||||
template <class T>
|
||||
static __forceinline void Store(
|
||||
T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
std::atomic<T>* aptr = reinterpret_cast<std::atomic<T>*>(ptr);
|
||||
aptr->store(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Function overloading, for more info, see previous one.
|
||||
/// @param: ptr(Input), a pointer to volatile instance which will be stored.
|
||||
/// @param: val(Input), value to be stored.
|
||||
/// @param: order(Input), memory order with atomic store, relaxed by default.
|
||||
/// @return: void.
|
||||
template <class T>
|
||||
static __forceinline void Store(
|
||||
volatile T* ptr, T val,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile std::atomic<T>*>(ptr);
|
||||
aptr->store(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Compare and swap value atomically with specified memory order.
|
||||
/// @param: ptr(Input), a pointer to variable which is operated on.
|
||||
/// @param: val(Input), value to be stored if condition is satisfied.
|
||||
/// @param: expected(Input), value which is expected.
|
||||
/// @param: order(Input), memory order with atomic operation.
|
||||
/// @return: T, observed value of type T.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Cas(T* ptr, T val, T expected,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
std::atomic<T>* aptr = reinterpret_cast<std::atomic<T>*>(ptr);
|
||||
aptr->compare_exchange_strong(expected, val, order);
|
||||
return expected;
|
||||
}
|
||||
|
||||
/// @brief: Function overloading, for more info, see previous one.
|
||||
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
|
||||
/// @param: val(Input), value to be stored if condition is satisfied.
|
||||
/// @param: expected(Input), value which is expected.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, observed value of type T.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Cas(volatile T* ptr, T val, T expected,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile std::atomic<T>*>(ptr);
|
||||
aptr->compare_exchange_strong(expected, val, order);
|
||||
return expected;
|
||||
}
|
||||
|
||||
/// @brief: Exchange the value atomically with specified memory order.
|
||||
/// @param: ptr(Input), a pointer to variable which is operated on.
|
||||
/// @param: val(Input), value to be stored.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, the value prior to the exchange.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Exchange(T* ptr, T val,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
std::atomic<T>* aptr = reinterpret_cast<std::atomic<T>*>(ptr);
|
||||
return aptr->exchange(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Function overloading, for more info, see previous one.
|
||||
/// @param: ptr(Input), a pointer to variable which is operated on.
|
||||
/// @param: val(Input), value to be stored.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, the value prior to the exchange.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Exchange(volatile T* ptr, T val,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile std::atomic<T>*>(ptr);
|
||||
return aptr->exchange(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Add value to variable atomically with specified memory order.
|
||||
/// @param: ptr(Input), a pointer to variable which is operated on.
|
||||
/// @param: val(Input), value to be added.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, the value of the variable prior to the addition.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Add(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
std::atomic<T>* aptr = reinterpret_cast<std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_add(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Subtract value from the variable atomically with specified memory
|
||||
/// order.
|
||||
/// @param: ptr(Input), a pointer to variable which is operated on.
|
||||
/// @param: val(Input), value to be subtraced.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, value of the variable prior to the subtraction.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Sub(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
std::atomic<T>* aptr = reinterpret_cast<std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_sub(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Bit And operation on variable atomically with specified memory
|
||||
/// order.
|
||||
/// @param: ptr(Input), a pointer to variable which is operated on.
|
||||
/// @param: val(Input), value which is ANDed with variable.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, value of variable prior to the operation.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
And(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
std::atomic<T>* aptr = reinterpret_cast<std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_and(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Bit Or operation on variable atomically with specified memory order.
|
||||
/// @param: ptr(Input), a pointer to variable which is operated on.
|
||||
/// @param: val(Input), value which is ORed with variable.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, value of variable prior to the operation.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Or(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
std::atomic<T>* aptr = reinterpret_cast<std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_or(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Bit Xor operation on variable atomically with specified memory
|
||||
/// order.
|
||||
/// @param: ptr(Input), a pointer to variable which is operated on.
|
||||
/// @param: val(Input), value which is XORed with variable.
|
||||
/// @order: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, valud of variable prior to the opertaion.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Xor(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
std::atomic<T>* aptr = reinterpret_cast<std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_xor(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Increase the value of variable atomically with specified memory
|
||||
/// order.
|
||||
/// @param: ptr(Input), a pointer to variable which is operated on.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, value of variable prior to the operation.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Increment(T* ptr, std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
std::atomic<T>* aptr = reinterpret_cast<std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_add(1, order);
|
||||
}
|
||||
|
||||
/// @brief: Decrease the value of the variable atomically with specified memory
|
||||
/// order.
|
||||
/// @param: ptr(Input), a pointer to variable which is operated on.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, value of variable prior to the operation.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Decrement(T* ptr, std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
std::atomic<T>* aptr = reinterpret_cast<std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_sub(1, order);
|
||||
}
|
||||
|
||||
/// @brief: Add value to variable atomically with specified memory order.
|
||||
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
|
||||
/// @param: val(Input), value to be added.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, the value of the variable prior to the addition.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Add(volatile T* ptr, T val,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_add(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Subtract value from the variable atomically with specified memory
|
||||
/// order.
|
||||
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
|
||||
/// @param: val(Input), value to be subtraced.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, value of the variable prior to the subtraction.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Sub(volatile T* ptr, T val,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_sub(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Bit And operation on variable atomically with specified memory
|
||||
/// order.
|
||||
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
|
||||
/// @param: val(Input), value which is ANDed with variable.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, value of variable prior to the operation.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
And(volatile T* ptr, T val,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_and(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Bit Or operation on variable atomically with specified memory order.
|
||||
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
|
||||
/// @param: val(Input), value which is ORed with variable.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, value of variable prior to the operation.
|
||||
template <class T>
|
||||
static __forceinline T Or(volatile T* ptr, T val,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_or(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Bit Xor operation on variable atomically with specified memory
|
||||
/// order.
|
||||
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
|
||||
/// @param: val(Input), value which is XORed with variable.
|
||||
/// @order: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, valud of variable prior to the opertaion.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Xor(volatile T* ptr, T val,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_xor(val, order);
|
||||
}
|
||||
|
||||
/// @brief: Increase the value of variable atomically with specified memory
|
||||
/// order.
|
||||
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, value of variable prior to the operation.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Increment(volatile T* ptr,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_add(1, order);
|
||||
}
|
||||
|
||||
/// @brief: Decrease the value of the variable atomically with specified memory
|
||||
/// order.
|
||||
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
|
||||
/// @param: order(Input), memory order which is relaxed by default.
|
||||
/// @return: T, value of variable prior to the operation.
|
||||
template <class T>
|
||||
static __forceinline T
|
||||
Decrement(volatile T* ptr,
|
||||
std::memory_order order = std::memory_order_relaxed) {
|
||||
BasicCheck<T>(ptr);
|
||||
volatile std::atomic<T>* aptr =
|
||||
reinterpret_cast<volatile std::atomic<T>*>(ptr);
|
||||
return aptr->fetch_sub(1, order);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove special assert to avoid name polution
|
||||
#undef lockless_check
|
||||
|
||||
#endif // HSA_RUNTIME_CORE_UTIL_ATOMIC_HELPERS_H_
|
||||
@@ -1,344 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef __linux__
|
||||
#include "core/util/os.h"
|
||||
|
||||
#include <link.h>
|
||||
#include <dlfcn.h>
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace os {
|
||||
|
||||
static_assert(sizeof(LibHandle) == sizeof(void*),
|
||||
"OS abstraction size mismatch");
|
||||
static_assert(sizeof(Mutex) == sizeof(pthread_mutex_t*),
|
||||
"OS abstraction size mismatch");
|
||||
static_assert(sizeof(Thread) == sizeof(pthread_t),
|
||||
"OS abstraction size mismatch");
|
||||
|
||||
LibHandle LoadLib(std::string filename) {
|
||||
void* ret = dlopen(filename.c_str(), RTLD_LAZY);
|
||||
return *(LibHandle*)&ret;
|
||||
}
|
||||
|
||||
void* GetExportAddress(LibHandle lib, std::string export_name) {
|
||||
void* ret = dlsym(*(void**)&lib, export_name.c_str());
|
||||
|
||||
// dlsym searches the given library and all the library's load dependencies.
|
||||
// Remaining code limits symbol lookup to only the library handle given.
|
||||
// This lookup pattern matches Windows.
|
||||
if (ret == NULL) return ret;
|
||||
|
||||
link_map* map;
|
||||
int err = dlinfo(*(void**)&lib, RTLD_DI_LINKMAP, &map);
|
||||
assert(err != -1 && "dlinfo failed.");
|
||||
|
||||
Dl_info info;
|
||||
err = dladdr(ret, &info);
|
||||
assert(err != 0 && "dladdr failed.");
|
||||
|
||||
if (strcmp(info.dli_fname, map->l_name) == 0) return ret;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void CloseLib(LibHandle lib) { dlclose(*(void**)&lib); }
|
||||
|
||||
Mutex CreateMutex() {
|
||||
pthread_mutex_t* mutex = new pthread_mutex_t;
|
||||
pthread_mutex_init(mutex, NULL);
|
||||
return *(Mutex*)&mutex;
|
||||
}
|
||||
|
||||
bool TryAcquireMutex(Mutex lock) {
|
||||
return pthread_mutex_trylock(*(pthread_mutex_t**)&lock) == 0;
|
||||
}
|
||||
|
||||
bool AcquireMutex(Mutex lock) {
|
||||
return pthread_mutex_lock(*(pthread_mutex_t**)&lock) == 0;
|
||||
}
|
||||
|
||||
void ReleaseMutex(Mutex lock) {
|
||||
pthread_mutex_unlock(*(pthread_mutex_t**)&lock);
|
||||
}
|
||||
|
||||
void DestroyMutex(Mutex lock) {
|
||||
pthread_mutex_destroy(*(pthread_mutex_t**)&lock);
|
||||
delete *(pthread_mutex_t**)&lock;
|
||||
}
|
||||
|
||||
void Sleep(int delay_in_millisec) { usleep(delay_in_millisec * 1000); }
|
||||
|
||||
void YieldThread() { sched_yield(); }
|
||||
|
||||
struct ThreadArgs {
|
||||
void* entry_args;
|
||||
ThreadEntry entry_function;
|
||||
};
|
||||
|
||||
void* __stdcall ThreadTrampoline(void* arg) {
|
||||
ThreadArgs* ar = (ThreadArgs*)arg;
|
||||
ThreadEntry CallMe = ar->entry_function;
|
||||
void* Data = ar->entry_args;
|
||||
delete ar;
|
||||
CallMe(Data);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Thread CreateThread(ThreadEntry function, void* threadArgument,
|
||||
uint stackSize) {
|
||||
ThreadArgs* args = new ThreadArgs;
|
||||
args->entry_args = threadArgument;
|
||||
args->entry_function = function;
|
||||
pthread_t thread;
|
||||
pthread_attr_t attrib;
|
||||
pthread_attr_init(&attrib);
|
||||
if (stackSize != 0) pthread_attr_setstacksize(&attrib, stackSize);
|
||||
bool success =
|
||||
(pthread_create(&thread, &attrib, ThreadTrampoline, args) == 0);
|
||||
pthread_attr_destroy(&attrib);
|
||||
if (!success) {
|
||||
pthread_join(thread, NULL);
|
||||
return NULL;
|
||||
}
|
||||
return *(Thread*)&thread;
|
||||
}
|
||||
|
||||
void CloseThread(Thread thread) { pthread_detach(*(pthread_t*)&thread); }
|
||||
|
||||
bool WaitForThread(Thread thread) {
|
||||
return pthread_join(*(pthread_t*)&thread, NULL);
|
||||
}
|
||||
|
||||
bool WaitForAllThreads(Thread* threads, uint threadCount) {
|
||||
for (uint i = 0; i < threadCount; i++) WaitForThread(threads[i]);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SetEnvVar(std::string env_var_name, std::string env_var_value) {
|
||||
setenv(env_var_name.c_str(), env_var_value.c_str(), 1);
|
||||
}
|
||||
|
||||
std::string GetEnvVar(std::string env_var_name) {
|
||||
char* buff;
|
||||
buff = getenv(env_var_name.c_str());
|
||||
std::string ret;
|
||||
if (buff) {
|
||||
ret = buff;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t GetUserModeVirtualMemorySize() {
|
||||
#ifdef _LP64
|
||||
// https://www.kernel.org/doc/Documentation/x86/x86_64/mm.txt :
|
||||
// user space is 0000000000000000 - 00007fffffffffff (=47 bits)
|
||||
return (size_t)(0x800000000000);
|
||||
#else
|
||||
return (size_t)(0xffffffff); // ~4GB
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t GetUsablePhysicalHostMemorySize() {
|
||||
struct sysinfo info = {0};
|
||||
if (sysinfo(&info) != 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const size_t physical_size =
|
||||
static_cast<size_t>(info.totalram * info.mem_unit);
|
||||
return std::min(GetUserModeVirtualMemorySize(), physical_size);
|
||||
}
|
||||
|
||||
uintptr_t GetUserModeVirtualMemoryBase() { return (uintptr_t)0; }
|
||||
|
||||
// Os event implementation
|
||||
typedef struct EventDescriptor_ {
|
||||
pthread_cond_t event;
|
||||
pthread_mutex_t mutex;
|
||||
bool state;
|
||||
bool auto_reset;
|
||||
} EventDescriptor;
|
||||
|
||||
EventHandle CreateOsEvent(bool auto_reset, bool init_state) {
|
||||
EventDescriptor* eventDescrp;
|
||||
eventDescrp = (EventDescriptor*)malloc(sizeof(EventDescriptor));
|
||||
|
||||
pthread_mutex_init(&eventDescrp->mutex, NULL);
|
||||
pthread_cond_init(&eventDescrp->event, NULL);
|
||||
eventDescrp->auto_reset = auto_reset;
|
||||
eventDescrp->state = init_state;
|
||||
|
||||
EventHandle handle = reinterpret_cast<EventHandle>(eventDescrp);
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
int DestroyOsEvent(EventHandle event) {
|
||||
if (event == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
EventDescriptor* eventDescrp = reinterpret_cast<EventDescriptor*>(event);
|
||||
int ret_code = pthread_cond_destroy(&eventDescrp->event);
|
||||
ret_code |= pthread_mutex_destroy(&eventDescrp->mutex);
|
||||
free(eventDescrp);
|
||||
return ret_code;
|
||||
}
|
||||
|
||||
int WaitForOsEvent(EventHandle event, unsigned int milli_seconds) {
|
||||
if (event == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
EventDescriptor* eventDescrp = reinterpret_cast<EventDescriptor*>(event);
|
||||
// Event wait time is 0 and state is non-signaled, return directly
|
||||
if (milli_seconds == 0) {
|
||||
int tmp_ret = pthread_mutex_trylock(&eventDescrp->mutex);
|
||||
if (tmp_ret == EBUSY) {
|
||||
// Timeout
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int ret_code = 0;
|
||||
pthread_mutex_lock(&eventDescrp->mutex);
|
||||
if (!eventDescrp->state) {
|
||||
if (milli_seconds == 0) {
|
||||
ret_code = 1;
|
||||
} else {
|
||||
struct timespec ts;
|
||||
struct timeval tp;
|
||||
|
||||
ret_code = gettimeofday(&tp, NULL);
|
||||
ts.tv_sec = tp.tv_sec;
|
||||
ts.tv_nsec = tp.tv_usec * 1000;
|
||||
|
||||
unsigned int sec = milli_seconds / 1000;
|
||||
unsigned int mSec = milli_seconds % 1000;
|
||||
|
||||
ts.tv_sec += sec;
|
||||
ts.tv_nsec += mSec * 1000000;
|
||||
|
||||
// More then one second, add 1 sec to the tv_sec elem
|
||||
if (ts.tv_nsec > 1000000000) {
|
||||
ts.tv_sec += 1;
|
||||
ts.tv_nsec = ts.tv_nsec - 1000000000;
|
||||
}
|
||||
|
||||
ret_code =
|
||||
pthread_cond_timedwait(&eventDescrp->event, &eventDescrp->mutex, &ts);
|
||||
// Time out
|
||||
if (ret_code == 110) {
|
||||
ret_code = 0x14003; // 1 means time out in HSA
|
||||
}
|
||||
|
||||
if (ret_code == 0 && eventDescrp->auto_reset) {
|
||||
eventDescrp->state = false;
|
||||
}
|
||||
}
|
||||
} else if (eventDescrp->auto_reset) {
|
||||
eventDescrp->state = false;
|
||||
}
|
||||
pthread_mutex_unlock(&eventDescrp->mutex);
|
||||
|
||||
return ret_code;
|
||||
}
|
||||
|
||||
int SetOsEvent(EventHandle event) {
|
||||
if (event == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
EventDescriptor* eventDescrp = reinterpret_cast<EventDescriptor*>(event);
|
||||
int ret_code = 0;
|
||||
ret_code = pthread_mutex_lock(&eventDescrp->mutex);
|
||||
eventDescrp->state = true;
|
||||
ret_code = pthread_mutex_unlock(&eventDescrp->mutex);
|
||||
ret_code |= pthread_cond_signal(&eventDescrp->event);
|
||||
|
||||
return ret_code;
|
||||
}
|
||||
|
||||
int ResetOsEvent(EventHandle event) {
|
||||
if (event == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
EventDescriptor* eventDescrp = reinterpret_cast<EventDescriptor*>(event);
|
||||
int ret_code = 0;
|
||||
ret_code = pthread_mutex_lock(&eventDescrp->mutex);
|
||||
eventDescrp->state = false;
|
||||
ret_code = pthread_mutex_unlock(&eventDescrp->mutex);
|
||||
|
||||
return ret_code;
|
||||
}
|
||||
|
||||
uint64_t ReadAccurateClock() {
|
||||
timespec time;
|
||||
int err = clock_gettime(CLOCK_MONOTONIC_RAW, &time);
|
||||
assert(err == 0 && "clock_gettime(CLOCK_MONOTONIC_RAW,...) failed");
|
||||
return uint64_t(time.tv_sec) * 1000000000ull + uint64_t(time.tv_nsec);
|
||||
}
|
||||
|
||||
uint64_t AccurateClockFrequency() {
|
||||
timespec time;
|
||||
int err = clock_getres(CLOCK_MONOTONIC_RAW, &time);
|
||||
assert(err == 0 && "clock_getres(CLOCK_MONOTONIC_RAW,...) failed");
|
||||
assert(time.tv_sec == 0 &&
|
||||
"clock_getres(CLOCK_MONOTONIC_RAW,...) returned very low frequency "
|
||||
"(<1Hz).");
|
||||
assert(time.tv_nsec < 0xFFFFFFFF &&
|
||||
"clock_getres(CLOCK_MONOTONIC_RAW,...) returned very low frequency "
|
||||
"(<1Hz).");
|
||||
return uint64_t(time.tv_nsec) * 1000000000ull;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,136 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Library of syncronization primitives - to be added to as needed.
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_UTIL_LOCKS_H_
|
||||
#define HSA_RUNTIME_CORE_UTIL_LOCKS_H_
|
||||
|
||||
#include "utils.h"
|
||||
#include "os.h"
|
||||
|
||||
/// @brief: A class behaves as a lock in a scope. When trying to enter into the
|
||||
/// critical section, creat a object of this class. After the control path goes
|
||||
/// out of the scope, it will release the lock automatically.
|
||||
template <class LockType>
|
||||
class ScopedAcquire {
|
||||
public:
|
||||
/// @brief: When constructing, acquire the lock.
|
||||
/// @param: lock(Input), pointer to an existing lock.
|
||||
explicit ScopedAcquire(LockType* lock) : lock_(lock) { lock_->Acquire(); }
|
||||
|
||||
/// @brief: when destructing, release the lock.
|
||||
~ScopedAcquire() { lock_->Release(); }
|
||||
|
||||
private:
|
||||
LockType* lock_;
|
||||
/// @brief: Disable copiable and assignable ability.
|
||||
DISALLOW_COPY_AND_ASSIGN(ScopedAcquire);
|
||||
};
|
||||
|
||||
/// @brief: a class represents a kernel mutex.
|
||||
/// Uses the kernel's scheduler to keep the waiting thread from being scheduled
|
||||
/// until the lock is released (Best for long waits, though anything using
|
||||
/// a kernel object is a long wait).
|
||||
class KernelMutex {
|
||||
public:
|
||||
KernelMutex() { lock_ = os::CreateMutex(); }
|
||||
~KernelMutex() { os::DestroyMutex(lock_); }
|
||||
|
||||
bool Try() { return os::TryAcquireMutex(lock_); }
|
||||
bool Acquire() { return os::AcquireMutex(lock_); }
|
||||
void Release() { os::ReleaseMutex(lock_); }
|
||||
|
||||
private:
|
||||
os::Mutex lock_;
|
||||
|
||||
/// @brief: Disable copiable and assignable ability.
|
||||
DISALLOW_COPY_AND_ASSIGN(KernelMutex);
|
||||
};
|
||||
|
||||
/// @brief: represents a spin lock.
|
||||
/// For very short hold durations on the order of the thread scheduling
|
||||
/// quanta or less.
|
||||
class SpinMutex {
|
||||
public:
|
||||
SpinMutex() { lock_ = 0; }
|
||||
|
||||
bool Try() {
|
||||
int old = 0;
|
||||
return lock_.compare_exchange_strong(old, 1);
|
||||
}
|
||||
bool Acquire() {
|
||||
int old = 0;
|
||||
while (!lock_.compare_exchange_strong(old, 1))
|
||||
{
|
||||
old=0;
|
||||
os::YieldThread();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void Release() { lock_ = 0; }
|
||||
|
||||
private:
|
||||
std::atomic<int> lock_;
|
||||
|
||||
/// @brief: Disable copiable and assignable ability.
|
||||
DISALLOW_COPY_AND_ASSIGN(SpinMutex);
|
||||
};
|
||||
|
||||
class KernelEvent {
|
||||
public:
|
||||
KernelEvent() { evt_ = os::CreateOsEvent(true, true); }
|
||||
~KernelEvent() { os::DestroyOsEvent(evt_); }
|
||||
|
||||
bool IsSet() { return os::WaitForOsEvent(evt_, 0)==0; }
|
||||
bool WaitForSet() { return os::WaitForOsEvent(evt_, 0xFFFFFFFF)==0; }
|
||||
void Set() { os::SetOsEvent(evt_); }
|
||||
void Reset() { os::ResetOsEvent(evt_); }
|
||||
|
||||
private:
|
||||
os::EventHandle evt_;
|
||||
|
||||
/// @brief: Disable copiable and assignable ability.
|
||||
DISALLOW_COPY_AND_ASSIGN(KernelEvent);
|
||||
};
|
||||
|
||||
#endif // HSA_RUNTIME_CORE_SUTIL_LOCKS_H_
|
||||
@@ -1,216 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Minimal operating system abstraction interfaces.
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_UTIL_OS_H_
|
||||
#define HSA_RUNTIME_CORE_UTIL_OS_H_
|
||||
|
||||
#include <string>
|
||||
#include "utils.h"
|
||||
|
||||
namespace os {
|
||||
typedef void* LibHandle;
|
||||
typedef void* Mutex;
|
||||
typedef void* Thread;
|
||||
typedef void* EventHandle;
|
||||
|
||||
enum class os_t { OS_WIN = 0, OS_LINUX, COUNT };
|
||||
static __forceinline std::underlying_type<os_t>::type os_index(os_t val) {
|
||||
return std::underlying_type<os_t>::type(val);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
static const os_t current_os = os_t::OS_WIN;
|
||||
#elif __linux__
|
||||
static const os_t current_os = os_t::OS_LINUX;
|
||||
#else
|
||||
static_assert(false, "Operating System not detected!");
|
||||
#endif
|
||||
|
||||
/// @brief: Loads dynamic library based on file name. Return value will be NULL
|
||||
/// if failed.
|
||||
/// @param: filename(Input), file name of the library.
|
||||
/// @return: LibHandle.
|
||||
LibHandle LoadLib(std::string filename);
|
||||
|
||||
/// @brief: Gets the address of exported symbol. Return NULl if failed.
|
||||
/// @param: lib(Input), library handle which exporting from.
|
||||
/// @param: export_name(Input), the name of the exported symbol.
|
||||
/// @return: void*.
|
||||
void* GetExportAddress(LibHandle lib, std::string export_name);
|
||||
|
||||
/// @brief: Unloads the dynamic library.
|
||||
/// @param: lib(Input), library handle which will be unloaded.
|
||||
void CloseLib(LibHandle lib);
|
||||
|
||||
/// @brief: Creates a mutex, will return NULL if failed.
|
||||
/// @param: void.
|
||||
/// @return: Mutex.
|
||||
Mutex CreateMutex();
|
||||
|
||||
/// @brief: Tries to acquire the mutex once, if successed, return true.
|
||||
/// @param: lock(Input), handle to the mutex.
|
||||
/// @return: bool.
|
||||
bool TryAcquireMutex(Mutex lock);
|
||||
|
||||
/// @brief: Aquires the mutex, if the mutex is locked, it will wait until it is
|
||||
/// released. If the mutex is acquired successfully, it will return true.
|
||||
/// @param: lock(Input), handle to the mutex.
|
||||
/// @return: bool.
|
||||
bool AcquireMutex(Mutex lock);
|
||||
|
||||
/// @brief: Releases the mutex.
|
||||
/// @param: lock(Input), handle to the mutex.
|
||||
/// @return: void.
|
||||
void ReleaseMutex(Mutex lock);
|
||||
|
||||
/// @brief: Destroys the mutex.
|
||||
/// @param: lock(Input), handle to the mutex.
|
||||
/// @return: void.
|
||||
void DestroyMutex(Mutex lock);
|
||||
|
||||
/// @brief: Puts current thread to sleep.
|
||||
/// @param: delayInMs(Input), time in millisecond for sleeping.
|
||||
/// @return: void.
|
||||
void Sleep(int delayInMs);
|
||||
|
||||
/// @brief: Yields current thread.
|
||||
/// @param: void.
|
||||
/// @return: void.
|
||||
void YieldThread();
|
||||
|
||||
typedef void (*ThreadEntry)(void*);
|
||||
|
||||
/// @brief: Creates a thread will return NULL if failed.
|
||||
/// @param: entry_function(Input), a pointer to the function which the thread
|
||||
/// starts from.
|
||||
/// @param: entry_argument(Input), a pointer to the argument of the thread
|
||||
/// function.
|
||||
/// @param: stack_size(Input), size of the thread's stack, 0 by default.
|
||||
/// @return: Thread, a handle to thread created.
|
||||
Thread CreateThread(ThreadEntry entry_function, void* entry_argument,
|
||||
uint stack_size = 0);
|
||||
|
||||
/// @brief: Destroys the thread.
|
||||
/// @param: thread(Input), thread handle to what will be destroyed.
|
||||
/// @return: void.
|
||||
void CloseThread(Thread thread);
|
||||
|
||||
/// @brief: Waits for specific thread to finish, if successed, return true.
|
||||
/// @param: thread(Input), handle to waiting thread.
|
||||
/// @return: bool.
|
||||
bool WaitForThread(Thread thread);
|
||||
|
||||
/// @brief: Waits for multiple threads to finish, if successed, return ture.
|
||||
/// @param; threads(Input), a pointer to a list of thread handle.
|
||||
/// @param: thread_count(Input), number of threads to be waited on.
|
||||
/// @return: bool.
|
||||
bool WaitForAllThreads(Thread* threads, uint thread_count);
|
||||
|
||||
/// @brief: Sets the environment value.
|
||||
/// @param: env_var_name(Input), name of the environment value.
|
||||
/// @param: env_var_value(Input), value of the environment value.s
|
||||
/// @return: void.
|
||||
void SetEnvVar(std::string env_var_name, std::string env_var_value);
|
||||
|
||||
/// @brief: Gets the value of environment value.
|
||||
/// @param: env_var_name(Input), name of the environment value.
|
||||
/// @return: std::string, value of the environment value, returned as string.
|
||||
std::string GetEnvVar(std::string env_var_name);
|
||||
|
||||
/// @brief: Gets the max virtual memory size accessible to the application.
|
||||
/// @param: void.
|
||||
/// @return: size_t, size of the accessible memory to the application.
|
||||
size_t GetUserModeVirtualMemorySize();
|
||||
|
||||
/// @brief: Gets the max physical host system memory size.
|
||||
/// @param: void.
|
||||
/// @return: size_t, size of the physical host system memory.
|
||||
size_t GetUsablePhysicalHostMemorySize();
|
||||
|
||||
/// @brief: Gets the virtual memory base address. It is hardcoded to 0.
|
||||
/// @param: void.
|
||||
/// @return: uintptr_t, always 0.
|
||||
uintptr_t GetUserModeVirtualMemoryBase();
|
||||
|
||||
/// @brief os event api, create an event
|
||||
/// @param: auto_reset whether an event can reset the status automatically
|
||||
/// @param: init_state initial state of the event
|
||||
/// @return: event handle
|
||||
EventHandle CreateOsEvent(bool auto_reset, bool init_state);
|
||||
|
||||
/// @brief os event api, destroy an event
|
||||
/// @param: event handle
|
||||
/// @return: whether destroy is correct
|
||||
int DestroyOsEvent(EventHandle event);
|
||||
|
||||
/// @brief os event api, wait on event
|
||||
/// @param: event Event handle
|
||||
/// @param: milli_seconds wait time
|
||||
/// @return: Indicate success or timeout
|
||||
int WaitForOsEvent(EventHandle event, unsigned int milli_seconds);
|
||||
|
||||
/// @brief os event api, set event state
|
||||
/// @param: event Event handle
|
||||
/// @return: Whether event set is correct
|
||||
int SetOsEvent(EventHandle event);
|
||||
|
||||
/// @brief os event api, reset event state
|
||||
/// @param: event Event handle
|
||||
/// @return: Whether event reset is correct
|
||||
int ResetOsEvent(EventHandle event);
|
||||
|
||||
/// @brief reads a clock which is deemed to be accurate for elapsed time
|
||||
/// measurements, though not necessarilly fast to query
|
||||
/// @return clock counter value
|
||||
uint64_t ReadAccurateClock();
|
||||
|
||||
/// @brief retrieves the frequency in Hz of the unit used in ReadAccurateClock.
|
||||
/// It does not necessarilly reflect the resolution of the clock, but is the
|
||||
/// value needed to convert a difference in the clock's counter value to elapsed
|
||||
/// seconds. This frequency does not change at runtime.
|
||||
/// @return returns the frequency
|
||||
uint64_t AccurateClockFrequency();
|
||||
}
|
||||
|
||||
#endif // HSA_RUNTIME_CORE_UTIL_OS_H_
|
||||
@@ -1,174 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "small_heap.h"
|
||||
|
||||
SmallHeap::memory_t::iterator SmallHeap::merge(
|
||||
SmallHeap::memory_t::iterator& keep,
|
||||
SmallHeap::memory_t::iterator& destroy) {
|
||||
assert((char*)keep->first + keep->second.len == (char*)destroy->first &&
|
||||
"Invalid merge");
|
||||
assert(keep->second.isfree() && "Merge with allocated block");
|
||||
assert(destroy->second.isfree() && "Merge with allocated block");
|
||||
|
||||
keep->second.len += destroy->second.len;
|
||||
keep->second.next_free = destroy->second.next_free;
|
||||
if (!destroy->second.islastfree())
|
||||
memory[destroy->second.next_free].prior_free = keep->first;
|
||||
|
||||
memory.erase(destroy);
|
||||
return keep;
|
||||
}
|
||||
|
||||
void SmallHeap::free(void* ptr) {
|
||||
if (ptr == NULL) return;
|
||||
|
||||
auto iterator = memory.find(ptr);
|
||||
|
||||
// Check for illegal free
|
||||
if (iterator == memory.end()) {
|
||||
assert(false && "Illegal free.");
|
||||
return;
|
||||
}
|
||||
|
||||
const auto start_guard = memory.find(0);
|
||||
const auto end_guard = memory.find((void*)0xFFFFFFFFFFFFFFFFull);
|
||||
|
||||
// Return memory to total and link node into free list
|
||||
total_free += iterator->second.len;
|
||||
if (first_free < iterator->first) {
|
||||
auto before = iterator;
|
||||
before--;
|
||||
while (before != start_guard && !before->second.isfree()) before--;
|
||||
assert(before->second.next_free > iterator->first &&
|
||||
"Inconsistency in small heap.");
|
||||
iterator->second.prior_free = before->first;
|
||||
iterator->second.next_free = before->second.next_free;
|
||||
before->second.next_free = iterator->first;
|
||||
if (!iterator->second.islastfree())
|
||||
memory[iterator->second.next_free].prior_free = iterator->first;
|
||||
} else {
|
||||
iterator->second.setfirstfree();
|
||||
iterator->second.next_free = first_free;
|
||||
first_free = iterator->first;
|
||||
if (!iterator->second.islastfree())
|
||||
memory[iterator->second.next_free].prior_free = iterator->first;
|
||||
}
|
||||
|
||||
// Attempt compaction
|
||||
auto before = iterator;
|
||||
before--;
|
||||
if (before != start_guard) {
|
||||
if (before->second.isfree()) {
|
||||
iterator = merge(before, iterator);
|
||||
}
|
||||
}
|
||||
|
||||
auto after = iterator;
|
||||
after++;
|
||||
if (after != end_guard) {
|
||||
if (after->second.isfree()) {
|
||||
iterator = merge(iterator, after);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* SmallHeap::alloc(size_t bytes) {
|
||||
// Is enough memory available?
|
||||
if ((bytes > total_free) || (bytes == 0)) return NULL;
|
||||
|
||||
memory_t::iterator current;
|
||||
memory_t::iterator prior;
|
||||
|
||||
// Walk the free list and allocate at first fitting location
|
||||
prior = current = memory.find(first_free);
|
||||
while (true) {
|
||||
if (bytes <= current->second.len) {
|
||||
// Decrement from total
|
||||
total_free -= bytes;
|
||||
|
||||
// Is allocation an exact fit?
|
||||
if (bytes == current->second.len) {
|
||||
if (prior == current) {
|
||||
first_free = current->second.next_free;
|
||||
if (!current->second.islastfree())
|
||||
memory[current->second.next_free].setfirstfree();
|
||||
} else {
|
||||
prior->second.next_free = current->second.next_free;
|
||||
if (!current->second.islastfree())
|
||||
memory[current->second.next_free].prior_free = prior->first;
|
||||
}
|
||||
current->second.next_free = NULL;
|
||||
return current->first;
|
||||
} else {
|
||||
// Split current node
|
||||
void* remaining = (char*)current->first + bytes;
|
||||
Node& node = memory[remaining];
|
||||
node.next_free = current->second.next_free;
|
||||
node.prior_free = current->second.prior_free;
|
||||
node.len = current->second.len - bytes;
|
||||
current->second.len = bytes;
|
||||
|
||||
if (prior == current) {
|
||||
first_free = remaining;
|
||||
node.setfirstfree();
|
||||
} else {
|
||||
prior->second.next_free = remaining;
|
||||
node.prior_free = prior->first;
|
||||
}
|
||||
if (!node.islastfree()) memory[node.next_free].prior_free = remaining;
|
||||
|
||||
current->second.next_free = NULL;
|
||||
return current->first;
|
||||
}
|
||||
}
|
||||
|
||||
// End of free list?
|
||||
if (current->second.islastfree()) break;
|
||||
|
||||
prior = current;
|
||||
current = memory.find(current->second.next_free);
|
||||
}
|
||||
|
||||
// Can't service the request due to fragmentation
|
||||
return NULL;
|
||||
}
|
||||
@@ -1,114 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// A simple first fit memory allocator with eager compaction. For use with few
|
||||
// items (where list iteration is faster than trees).
|
||||
// Not thread safe!
|
||||
|
||||
#ifndef HSA_RUNTME_CORE_UTIL_SMALL_HEAP_H_
|
||||
#define HSA_RUNTME_CORE_UTIL_SMALL_HEAP_H_
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
class SmallHeap {
|
||||
public:
|
||||
class Node {
|
||||
public:
|
||||
size_t len;
|
||||
void* next_free;
|
||||
void* prior_free;
|
||||
static const intptr_t END = -1;
|
||||
|
||||
__forceinline bool isfree() const { return next_free != NULL; }
|
||||
__forceinline bool islastfree() const { return intptr_t(next_free) == END; }
|
||||
__forceinline bool isfirstfree() const {
|
||||
return intptr_t(prior_free) == END;
|
||||
}
|
||||
__forceinline void setlastfree() {
|
||||
*reinterpret_cast<intptr_t*>(&next_free) = END;
|
||||
}
|
||||
__forceinline void setfirstfree() {
|
||||
*reinterpret_cast<intptr_t*>(&prior_free) = END;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
SmallHeap(const SmallHeap& rhs);
|
||||
SmallHeap& operator=(const SmallHeap& rhs);
|
||||
|
||||
void* const pool;
|
||||
const size_t length;
|
||||
|
||||
size_t total_free;
|
||||
void* first_free;
|
||||
std::map<void*, Node> memory;
|
||||
|
||||
typedef decltype(memory) memory_t;
|
||||
memory_t::iterator merge(memory_t::iterator& keep,
|
||||
memory_t::iterator& destroy);
|
||||
|
||||
public:
|
||||
SmallHeap() : pool(NULL), length(0), total_free(0) {}
|
||||
SmallHeap(void* base, size_t length)
|
||||
: pool(base), length(length), total_free(length) {
|
||||
first_free = pool;
|
||||
|
||||
Node& node = memory[first_free];
|
||||
node.len = length;
|
||||
node.setlastfree();
|
||||
node.setfirstfree();
|
||||
|
||||
memory[0].len = 0;
|
||||
memory[(void*)0xFFFFFFFFFFFFFFFFull].len = 0;
|
||||
}
|
||||
|
||||
void* alloc(size_t bytes);
|
||||
void free(void* ptr);
|
||||
|
||||
void* base() const { return pool; }
|
||||
size_t size() const { return length; }
|
||||
size_t remaining() const { return total_free; }
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,105 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "core/util/timer.h"
|
||||
|
||||
namespace timer {
|
||||
|
||||
accurate_clock::init::init() {
|
||||
freq = os::AccurateClockFrequency();
|
||||
accurate_clock::period_ns = 1e9 / double(freq);
|
||||
}
|
||||
|
||||
// Calibrates the fast clock using the accurate clock.
|
||||
fast_clock::init::init() {
|
||||
typedef accurate_clock clock;
|
||||
clock::duration delay(std::chrono::milliseconds(1));
|
||||
|
||||
// calibrate clock
|
||||
fast_clock::raw_rep min = 0;
|
||||
clock::duration elapsed = clock::duration::max();
|
||||
|
||||
do {
|
||||
for (int t = 0; t < 10; t++) {
|
||||
fast_clock::raw_rep r1, r2;
|
||||
clock::time_point t0, t1, t2, t3;
|
||||
|
||||
t0 = clock::now();
|
||||
std::atomic_signal_fence(std::memory_order_acq_rel);
|
||||
r1 = fast_clock::raw_now();
|
||||
std::atomic_signal_fence(std::memory_order_acq_rel);
|
||||
t1 = clock::now();
|
||||
std::atomic_signal_fence(std::memory_order_acq_rel);
|
||||
|
||||
do {
|
||||
t2 = clock::now();
|
||||
} while (t2 - t1 < delay);
|
||||
|
||||
std::atomic_signal_fence(std::memory_order_acq_rel);
|
||||
r2 = fast_clock::raw_now();
|
||||
std::atomic_signal_fence(std::memory_order_acq_rel);
|
||||
t3 = clock::now();
|
||||
|
||||
// If elapsed time is shorter than last recorded time and both the start
|
||||
// and end times are confirmed correlated then record the clock readings.
|
||||
// This protects against inaccuracy due to thread switching
|
||||
if ((t3 - t1 < elapsed) && ((t1 - t0) * 10 < (t2 - t1)) &&
|
||||
((t3 - t2) * 10 < (t2 - t1))) {
|
||||
elapsed = t3 - t1;
|
||||
min = r2 - r1;
|
||||
}
|
||||
}
|
||||
delay += delay;
|
||||
} while (min < 1000);
|
||||
|
||||
fast_clock::freq = double(min) / duration_in_seconds(elapsed);
|
||||
fast_clock::period_ps = 1e12 / fast_clock::freq;
|
||||
}
|
||||
|
||||
double accurate_clock::period_ns;
|
||||
accurate_clock::raw_frequency accurate_clock::freq;
|
||||
accurate_clock::init accurate_clock::accurate_clock_init;
|
||||
|
||||
double fast_clock::period_ps;
|
||||
fast_clock::raw_frequency fast_clock::freq;
|
||||
fast_clock::init fast_clock::fast_clock_init;
|
||||
}
|
||||
@@ -1,162 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_UTIL_TIMER_H_
|
||||
#define HSA_RUNTIME_CORE_UTIL_TIMER_H_
|
||||
|
||||
#include "core/util/utils.h"
|
||||
#include "core/util/os.h"
|
||||
#include <chrono>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace timer {
|
||||
|
||||
// Needed to patch around a mixed arithmetic bug in MSVC's duration_cast as of
|
||||
// VS 2013.
|
||||
template <bool isFloat, bool isSigned>
|
||||
struct wide_type {
|
||||
typedef double type;
|
||||
};
|
||||
template <>
|
||||
struct wide_type<false, false> {
|
||||
typedef uintmax_t type;
|
||||
};
|
||||
template <>
|
||||
struct wide_type<false, true> {
|
||||
typedef intmax_t type;
|
||||
};
|
||||
|
||||
template <typename To, typename Rep, typename Period>
|
||||
static __forceinline To
|
||||
duration_cast(const std::chrono::duration<Rep, Period>& d) {
|
||||
typedef typename wide_type<std::is_floating_point<Rep>::value,
|
||||
std::is_signed<Rep>::value>::type wide;
|
||||
typedef std::chrono::duration<wide, typename To::period> unit_convert_t;
|
||||
|
||||
unit_convert_t temp = std::chrono::duration_cast<unit_convert_t>(d);
|
||||
return To(static_cast<typename To::rep>(temp.count()));
|
||||
}
|
||||
// End patch
|
||||
|
||||
template <typename Rep, typename Period>
|
||||
static __forceinline double duration_in_seconds(
|
||||
std::chrono::duration<Rep, Period> delta) {
|
||||
typedef std::chrono::duration<double, std::ratio<1, 1>> seconds;
|
||||
return seconds(delta).count();
|
||||
}
|
||||
|
||||
template <typename rep>
|
||||
static __forceinline rep duration_from_seconds(double delta) {
|
||||
typedef std::chrono::duration<double, std::ratio<1, 1>> seconds;
|
||||
return std::chrono::duration_cast<rep>(seconds(delta));
|
||||
}
|
||||
|
||||
// Provices a C++11 standard clock interface to the os::AccurateClock functions
|
||||
class accurate_clock {
|
||||
public:
|
||||
typedef double rep;
|
||||
typedef std::nano period;
|
||||
typedef std::chrono::duration<rep, period> duration;
|
||||
typedef std::chrono::time_point<accurate_clock> time_point;
|
||||
|
||||
static const bool is_steady = true;
|
||||
|
||||
static __forceinline time_point now() {
|
||||
return time_point(duration(raw_now() * period_ns));
|
||||
}
|
||||
|
||||
// These two extra APIs and types let us use clocks without conversion to the
|
||||
// arbitrary period unit
|
||||
typedef uint64_t raw_rep;
|
||||
typedef uint64_t raw_frequency;
|
||||
|
||||
static __forceinline raw_rep raw_now() { return os::ReadAccurateClock(); }
|
||||
static __forceinline raw_frequency raw_freq() { return freq; }
|
||||
|
||||
private:
|
||||
static double period_ns;
|
||||
static raw_frequency freq;
|
||||
|
||||
class init {
|
||||
public:
|
||||
init();
|
||||
};
|
||||
static init accurate_clock_init;
|
||||
};
|
||||
|
||||
// Provices a C++11 standard clock interface to the lowest latency approximate
|
||||
// clock
|
||||
class fast_clock {
|
||||
public:
|
||||
typedef double rep;
|
||||
typedef std::pico period;
|
||||
typedef std::chrono::duration<rep, period> duration;
|
||||
typedef std::chrono::time_point<fast_clock> time_point;
|
||||
|
||||
static const bool is_steady = true;
|
||||
|
||||
static __forceinline time_point now() {
|
||||
return time_point(duration(raw_now() * period_ps));
|
||||
}
|
||||
|
||||
// These two extra APIs and types let us use clocks without conversion to the
|
||||
// arbitrary period unit
|
||||
typedef uint64_t raw_rep;
|
||||
typedef double raw_frequency;
|
||||
|
||||
static __forceinline raw_rep raw_now() { return __rdtsc(); }
|
||||
static __forceinline raw_frequency raw_freq() { return freq; }
|
||||
|
||||
private:
|
||||
static double period_ps;
|
||||
static raw_frequency freq;
|
||||
|
||||
class init {
|
||||
public:
|
||||
init();
|
||||
};
|
||||
static init fast_clock_init;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,267 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Generally useful utility functions
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_UTIL_UTILS_H_
|
||||
#define HSA_RUNTIME_CORE_UTIL_UTILS_H_
|
||||
|
||||
#include "stdint.h"
|
||||
#include "stddef.h"
|
||||
#include "stdlib.h"
|
||||
#include <assert.h>
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef uint64_t uint64;
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#include "mm_malloc.h"
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#include <x86intrin.h>
|
||||
#else
|
||||
#error \
|
||||
"Processor or compiler not identified. " \
|
||||
"Need to provide a lightweight approximate clock interface via function uint64_t __rdtsc() or adapt timer.h to your platform."
|
||||
#endif
|
||||
|
||||
#define __forceinline __inline__ __attribute__((always_inline))
|
||||
static __forceinline void __debugbreak() { __builtin_trap(); }
|
||||
#define __declspec(x) __attribute__((x))
|
||||
#undef __stdcall
|
||||
#define __stdcall // __attribute__((__stdcall__))
|
||||
#define __ALIGNED__(x) __attribute__((aligned(x)))
|
||||
|
||||
static __forceinline void* _aligned_malloc(size_t size, size_t alignment) {
|
||||
return _mm_malloc(size, alignment);
|
||||
}
|
||||
static __forceinline void _aligned_free(void* ptr) { return _mm_free(ptr); }
|
||||
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
||||
#include "intrin.h"
|
||||
#define __ALIGNED__(x) __declspec(align(x))
|
||||
#if (_MSC_VER < 1800)
|
||||
static __forceinline unsigned long long int strtoull(const char* str,
|
||||
char** endptr, int base) {
|
||||
return static_cast<unsigned long long>(_strtoui64(str, endptr, base));
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#error "Compiler and/or processor not identified."
|
||||
#endif
|
||||
|
||||
#define STRING2(x) #x
|
||||
#define STRING(x) STRING2(x)
|
||||
|
||||
#define PASTE2(x, y) x##y
|
||||
#define PASTE(x, y) PASTE2(x, y)
|
||||
|
||||
// A macro to disallow the copy and move constructor and operator= functions
|
||||
// This should be used in the private: declarations for a class
|
||||
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName&); \
|
||||
TypeName(TypeName&&); \
|
||||
void operator=(const TypeName&); \
|
||||
void operator=(TypeName&&);
|
||||
|
||||
template <typename lambda>
|
||||
class ScopeGuard {
|
||||
public:
|
||||
explicit __forceinline ScopeGuard(const lambda& release)
|
||||
: release_(release), dismiss_(false) {}
|
||||
|
||||
ScopeGuard(ScopeGuard& rhs) { *this = rhs; }
|
||||
|
||||
__forceinline ~ScopeGuard() {
|
||||
if (!dismiss_) release_();
|
||||
}
|
||||
__forceinline ScopeGuard& operator=(ScopeGuard& rhs) {
|
||||
dismiss_ = rhs.dismiss_;
|
||||
release_ = rhs.release_;
|
||||
rhs.dismiss_ = true;
|
||||
}
|
||||
__forceinline void Dismiss() { dismiss_ = true; }
|
||||
|
||||
private:
|
||||
lambda release_;
|
||||
bool dismiss_;
|
||||
};
|
||||
|
||||
template <typename lambda>
|
||||
static __forceinline ScopeGuard<lambda> MakeScopeGuard(lambda rel) {
|
||||
return ScopeGuard<lambda>(rel);
|
||||
}
|
||||
|
||||
#define MAKE_SCOPE_GUARD_HELPER(lname, sname, ...) \
|
||||
auto lname = __VA_ARGS__; \
|
||||
ScopeGuard<decltype(lname)> sname(lname);
|
||||
#define MAKE_SCOPE_GUARD(...) \
|
||||
MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), \
|
||||
PASTE(scopeGuard, __COUNTER__), __VA_ARGS__)
|
||||
#define MAKE_NAMED_SCOPE_GUARD(name, ...) \
|
||||
MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), name, \
|
||||
__VA_ARGS__)
|
||||
|
||||
/// @brief: Finds out the min one of two inputs, input must support ">"
|
||||
/// operator.
|
||||
/// @param: a(Input), a reference to type T.
|
||||
/// @param: b(Input), a reference to type T.
|
||||
/// @return: T.
|
||||
template <class T>
|
||||
static __forceinline T Min(const T& a, const T& b) {
|
||||
return (a > b) ? b : a;
|
||||
}
|
||||
|
||||
/// @brief: Find out the max one of two inputs, input must support ">" operator.
|
||||
/// @param: a(Input), a reference to type T.
|
||||
/// @param: b(Input), a reference to type T.
|
||||
/// @return: T.
|
||||
template <class T>
|
||||
static __forceinline T Max(const T& a, const T& b) {
|
||||
return (b > a) ? b : a;
|
||||
}
|
||||
|
||||
/// @brief: Free the memory space which is newed previously.
|
||||
/// @param: ptr(Input), a pointer to memory space. Can't be NULL.
|
||||
/// @return: void.
|
||||
struct DeleteObject {
|
||||
template <typename T>
|
||||
void operator()(const T* ptr) const {
|
||||
delete ptr;
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief: Checks if a value is power of two, if it is, return true. Be careful
|
||||
/// when passing 0.
|
||||
/// @param: val(Input), the data to be checked.
|
||||
/// @return: bool.
|
||||
template <typename T>
|
||||
static __forceinline bool IsPowerOfTwo(T val) {
|
||||
return (val & (val - 1)) == 0;
|
||||
}
|
||||
|
||||
/// @brief: Calculates the floor value aligned based on parameter of alignment.
|
||||
/// If value is at the boundary of alignment, it is unchanged.
|
||||
/// @param: value(Input), value to be calculated.
|
||||
/// @param: alignment(Input), alignment value.
|
||||
/// @return: T.
|
||||
template <typename T>
|
||||
static __forceinline T AlignDown(T value, size_t alignment) {
|
||||
assert(IsPowerOfTwo(alignment));
|
||||
return (T)(value & ~(alignment - 1));
|
||||
}
|
||||
|
||||
/// @brief: Same as previous one, but first parameter becomes pointer, for more
|
||||
/// info, see the previous desciption.
|
||||
/// @param: value(Input), pointer to type T.
|
||||
/// @param: alignment(Input), alignment value.
|
||||
/// @return: T*, pointer to type T.
|
||||
template <typename T>
|
||||
static __forceinline T* AlignDown(T* value, size_t alignment) {
|
||||
return (T*)AlignDown((intptr_t)value, alignment);
|
||||
}
|
||||
|
||||
/// @brief: Calculates the ceiling value aligned based on parameter of
|
||||
/// alignment.
|
||||
/// If value is at the boundary of alignment, it is unchanged.
|
||||
/// @param: value(Input), value to be calculated.
|
||||
/// @param: alignment(Input), alignment value.
|
||||
/// @param: T.
|
||||
template <typename T>
|
||||
static __forceinline T AlignUp(T value, size_t alignment) {
|
||||
return AlignDown((T)(value + alignment - 1), alignment);
|
||||
}
|
||||
|
||||
/// @brief: Same as previous one, but first parameter becomes pointer, for more
|
||||
/// info, see the previous desciption.
|
||||
/// @param: value(Input), pointer to type T.
|
||||
/// @param: alignment(Input), alignment value.
|
||||
/// @return: T*, pointer to type T.
|
||||
template <typename T>
|
||||
static __forceinline T* AlignUp(T* value, size_t alignment) {
|
||||
return (T*)AlignDown((intptr_t)((uint8_t*)value + alignment - 1), alignment);
|
||||
}
|
||||
|
||||
/// @brief: Checks if the input value is at the boundary of alignment, if it is,
|
||||
/// @return true.
|
||||
/// @param: value(Input), value to be checked.
|
||||
/// @param: alignment(Input), alignment value.
|
||||
/// @return: bool.
|
||||
template <typename T>
|
||||
static __forceinline bool IsMultipleOf(T value, size_t alignment) {
|
||||
return (AlignUp(value, alignment) == value);
|
||||
}
|
||||
|
||||
/// @brief: Same as previous one, but first parameter becomes pointer, for more
|
||||
/// info, see the previous desciption.
|
||||
/// @param: value(Input), pointer to type T.
|
||||
/// @param: alignment(Input), alignment value.
|
||||
/// @return: bool.
|
||||
template <typename T>
|
||||
static __forceinline bool IsMultipleOf(T* value, size_t alignment) {
|
||||
return (AlignUp(value, alignment) == value);
|
||||
}
|
||||
|
||||
static __forceinline uint32_t NextPow2(uint32_t value) {
|
||||
if (value == 0) return 1;
|
||||
uint32_t v = value - 1;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
return v + 1;
|
||||
}
|
||||
|
||||
static __forceinline uint64_t NextPow2(uint64_t value) {
|
||||
if (value == 0) return 1;
|
||||
uint64_t v = value - 1;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v |= v >> 32;
|
||||
return v + 1;
|
||||
}
|
||||
|
||||
#include "atomic_helpers.h"
|
||||
|
||||
#endif // HSA_RUNTIME_CORE_UTIL_UTIIS_H_
|
||||
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -1,91 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// The following set of header files provides definitions for AMD GPU
|
||||
// Architecture:
|
||||
// - amd_hsa_common.h
|
||||
// - amd_hsa_elf.h
|
||||
// - amd_hsa_kernel_code.h
|
||||
// - amd_hsa_queue.h
|
||||
// - amd_hsa_signal.h
|
||||
//
|
||||
// Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more
|
||||
// information.
|
||||
|
||||
#ifndef AMD_HSA_COMMON_H
|
||||
#define AMD_HSA_COMMON_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
// Descriptive version of the HSA Application Binary Interface.
|
||||
#define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)"
|
||||
|
||||
// Alignment attribute that specifies a minimum alignment (in bytes) for
|
||||
// variables of the specified type.
|
||||
#if defined(__GNUC__)
|
||||
# define __ALIGNED__(x) __attribute__((aligned(x)))
|
||||
#elif defined(_MSC_VER)
|
||||
# define __ALIGNED__(x) __declspec(align(x))
|
||||
#elif defined(RC_INVOKED)
|
||||
# define __ALIGNED__(x)
|
||||
#else
|
||||
# error
|
||||
#endif
|
||||
|
||||
// Creates enumeration entries for packed types. Enumeration entries include
|
||||
// bit shift amount, bit width, and bit mask.
|
||||
#define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width) \
|
||||
name ## _SHIFT = (shift), \
|
||||
name ## _WIDTH = (width), \
|
||||
name = (((1 << (width)) - 1) << (shift)) \
|
||||
|
||||
// Gets bits for specified mask from specified src packed instance.
|
||||
#define AMD_HSA_BITS_GET(src, mask) \
|
||||
((src & mask) >> mask ## _SHIFT) \
|
||||
|
||||
// Sets val bits for specified mask in specified dst packed instance.
|
||||
#define AMD_HSA_BITS_SET(dst, mask, val) \
|
||||
dst &= (~(1 << mask ## _SHIFT) & ~mask); \
|
||||
dst |= (((val) << mask ## _SHIFT) & mask) \
|
||||
|
||||
#endif // AMD_HSA_COMMON_H
|
||||
@@ -1,295 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_ELF_H
|
||||
#define AMD_HSA_ELF_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
|
||||
// ELF Header Enumeration Values.
|
||||
#define EM_AMDGPU 224
|
||||
#define ELFOSABI_AMDGPU_HSA 64
|
||||
#define ELFABIVERSION_AMDGPU_HSA 0
|
||||
#define EF_AMDGPU_XNACK 0x00000001
|
||||
#define EF_AMDGPU_TRAP_HANDLER 0x00000002
|
||||
|
||||
// ELF Section Header Flag Enumeration Values.
|
||||
#define SHF_AMDGPU_HSA_GLOBAL (0x00100000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_CODE (0x00400000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_AGENT (0x00800000 & SHF_MASKOS)
|
||||
|
||||
//
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0,
|
||||
AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1,
|
||||
AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2,
|
||||
AMDGPU_HSA_SEGMENT_CODE_AGENT = 3,
|
||||
AMDGPU_HSA_SEGMENT_LAST,
|
||||
} amdgpu_hsa_elf_segment_t;
|
||||
|
||||
// ELF Program Header Type Enumeration Values.
|
||||
#define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM)
|
||||
#define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT)
|
||||
#define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT)
|
||||
#define PT_AMDGPU_HSA_LOAD_CODE_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT)
|
||||
|
||||
// ELF Symbol Type Enumeration Values.
|
||||
#define STT_AMDGPU_HSA_KERNEL (STT_LOOS + 0)
|
||||
#define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1)
|
||||
#define STT_AMDGPU_HSA_METADATA (STT_LOOS + 2)
|
||||
|
||||
// ELF Symbol Binding Enumeration Values.
|
||||
#define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0)
|
||||
|
||||
// ELF Symbol Other Information Creation/Retrieval.
|
||||
#define ELF64_ST_AMDGPU_ALLOCATION(o) (((o) >> 2) & 0x3)
|
||||
#define ELF64_ST_AMDGPU_FLAGS(o) ((o) >> 4)
|
||||
#define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3))
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_LAST,
|
||||
} amdgpu_hsa_symbol_allocation_t;
|
||||
|
||||
// ELF Symbol Allocation Enumeration Values.
|
||||
#define STA_AMDGPU_HSA_DEFAULT AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT
|
||||
#define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM
|
||||
#define STA_AMDGPU_HSA_GLOBAL_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT
|
||||
#define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0,
|
||||
AMDGPU_HSA_SYMBOL_FLAG_CONST = 1,
|
||||
AMDGPU_HSA_SYMBOL_FLAG_LAST,
|
||||
} amdgpu_hsa_symbol_flag_t;
|
||||
|
||||
// ELF Symbol Flag Enumeration Values.
|
||||
#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST
|
||||
|
||||
// AMD GPU Relocation Type Enumeration Values.
|
||||
#define R_AMDGPU_NONE 0
|
||||
#define R_AMDGPU_32_LOW 1
|
||||
#define R_AMDGPU_32_HIGH 2
|
||||
#define R_AMDGPU_64 3
|
||||
#define R_AMDGPU_INIT_SAMPLER 4
|
||||
#define R_AMDGPU_INIT_IMAGE 5
|
||||
|
||||
// AMD GPU Note Type Enumeration Values.
|
||||
#define NT_AMDGPU_HSA_CODE_OBJECT_VERSION 1
|
||||
#define NT_AMDGPU_HSA_HSAIL 2
|
||||
#define NT_AMDGPU_HSA_ISA 3
|
||||
#define NT_AMDGPU_HSA_PRODUCER 4
|
||||
#define NT_AMDGPU_HSA_PRODUCER_OPTIONS 5
|
||||
#define NT_AMDGPU_HSA_EXTENSION 6
|
||||
#define NT_AMDGPU_HSA_HLDEBUG_DEBUG 101
|
||||
#define NT_AMDGPU_HSA_HLDEBUG_TARGET 102
|
||||
|
||||
// AMD GPU Metadata Kind Enumeration Values.
|
||||
typedef uint16_t amdgpu_hsa_metadata_kind16_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_METADATA_KIND_NONE = 0,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4
|
||||
} amdgpu_hsa_metadata_kind_t;
|
||||
|
||||
// AMD GPU Sampler Coordinate Normalization Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_coord8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0,
|
||||
AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1
|
||||
} amdgpu_hsa_sampler_coord_t;
|
||||
|
||||
// AMD GPU Sampler Filter Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_filter8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0,
|
||||
AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1
|
||||
} amdgpu_hsa_sampler_filter_t;
|
||||
|
||||
// AMD GPU Sampler Addressing Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_addressing8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4
|
||||
} amdgpu_hsa_sampler_addressing_t;
|
||||
|
||||
// AMD GPU Sampler Descriptor.
|
||||
typedef struct amdgpu_hsa_sampler_descriptor_s {
|
||||
uint16_t size;
|
||||
amdgpu_hsa_metadata_kind16_t kind;
|
||||
amdgpu_hsa_sampler_coord8_t coord;
|
||||
amdgpu_hsa_sampler_filter8_t filter;
|
||||
amdgpu_hsa_sampler_addressing8_t addressing;
|
||||
uint8_t reserved1;
|
||||
} amdgpu_hsa_sampler_descriptor_t;
|
||||
|
||||
// AMD GPU Image Geometry Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_geometry8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7
|
||||
} amdgpu_hsa_image_geometry_t;
|
||||
|
||||
// AMD GPU Image Channel Order Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_channel_order8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
|
||||
} amdgpu_hsa_image_channel_order_t;
|
||||
|
||||
// AMD GPU Image Channel Type Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_channel_type8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15
|
||||
} amdgpu_hsa_image_channel_type_t;
|
||||
|
||||
// AMD GPU Image Descriptor.
|
||||
typedef struct amdgpu_hsa_image_descriptor_s {
|
||||
uint16_t size;
|
||||
amdgpu_hsa_metadata_kind16_t kind;
|
||||
amdgpu_hsa_image_geometry8_t geometry;
|
||||
amdgpu_hsa_image_channel_order8_t channel_order;
|
||||
amdgpu_hsa_image_channel_type8_t channel_type;
|
||||
uint8_t reserved1;
|
||||
uint64_t width;
|
||||
uint64_t height;
|
||||
uint64_t depth;
|
||||
uint64_t array;
|
||||
} amdgpu_hsa_image_descriptor_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_code_object_version_s {
|
||||
uint32_t major_version;
|
||||
uint32_t minor_version;
|
||||
} amdgpu_hsa_note_code_object_version_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_hsail_s {
|
||||
uint32_t hsail_major_version;
|
||||
uint32_t hsail_minor_version;
|
||||
uint8_t profile;
|
||||
uint8_t machine_model;
|
||||
uint8_t default_float_round;
|
||||
} amdgpu_hsa_note_hsail_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_isa_s {
|
||||
uint16_t vendor_name_size;
|
||||
uint16_t architecture_name_size;
|
||||
uint32_t major;
|
||||
uint32_t minor;
|
||||
uint32_t stepping;
|
||||
char vendor_and_architecture_name[1];
|
||||
} amdgpu_hsa_note_isa_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_producer_s {
|
||||
uint16_t producer_name_size;
|
||||
uint16_t reserved;
|
||||
uint32_t producer_major_version;
|
||||
uint32_t producer_minor_version;
|
||||
char producer_name[1];
|
||||
} amdgpu_hsa_note_producer_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_producer_options_s {
|
||||
uint16_t producer_options_size;
|
||||
char producer_options[1];
|
||||
} amdgpu_hsa_note_producer_options_t;
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0,
|
||||
AMDGPU_HSA_RODATA_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_RODATA_READONLY_AGENT,
|
||||
AMDGPU_HSA_DATA_GLOBAL_PROGRAM,
|
||||
AMDGPU_HSA_DATA_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_DATA_READONLY_AGENT,
|
||||
AMDGPU_HSA_BSS_GLOBAL_PROGRAM,
|
||||
AMDGPU_HSA_BSS_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_BSS_READONLY_AGENT,
|
||||
AMDGPU_HSA_SECTION_LAST,
|
||||
} amdgpu_hsa_elf_section_t;
|
||||
|
||||
#endif // AMD_HSA_ELF_H
|
||||
@@ -1,271 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_KERNEL_CODE_H
|
||||
#define AMD_HSA_KERNEL_CODE_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "hsa.h"
|
||||
|
||||
// AMD Kernel Code Version Enumeration Values.
|
||||
typedef uint32_t amd_kernel_code_version32_t;
|
||||
enum amd_kernel_code_version_t {
|
||||
AMD_KERNEL_CODE_VERSION_MAJOR = 1,
|
||||
AMD_KERNEL_CODE_VERSION_MINOR = 1
|
||||
};
|
||||
|
||||
// AMD Machine Kind Enumeration Values.
|
||||
typedef uint16_t amd_machine_kind16_t;
|
||||
enum amd_machine_kind_t {
|
||||
AMD_MACHINE_KIND_UNDEFINED = 0,
|
||||
AMD_MACHINE_KIND_AMDGPU = 1
|
||||
};
|
||||
|
||||
// AMD Machine Version.
|
||||
typedef uint16_t amd_machine_version16_t;
|
||||
|
||||
// AMD Float Round Mode Enumeration Values.
|
||||
enum amd_float_round_mode_t {
|
||||
AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
|
||||
AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
|
||||
AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
|
||||
AMD_FLOAT_ROUND_MODE_ZERO = 3
|
||||
};
|
||||
|
||||
// AMD Float Denorm Mode Enumeration Values.
|
||||
enum amd_float_denorm_mode_t {
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
|
||||
AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
|
||||
};
|
||||
|
||||
// AMD Compute Program Resource Register One.
|
||||
typedef uint32_t amd_compute_pgm_rsrc_one32_t;
|
||||
enum amd_compute_pgm_rsrc_one_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED1, 26, 6)
|
||||
};
|
||||
|
||||
// AMD System VGPR Workitem ID Enumeration Values.
|
||||
enum amd_system_vgpr_workitem_id_t {
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
|
||||
};
|
||||
|
||||
// AMD Compute Program Resource Register Two.
|
||||
typedef uint32_t amd_compute_pgm_rsrc_two32_t;
|
||||
enum amd_compute_pgm_rsrc_two_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
|
||||
};
|
||||
|
||||
// AMD Element Byte Size Enumeration Values.
|
||||
enum amd_element_byte_size_t {
|
||||
AMD_ELEMENT_BYTE_SIZE_2 = 0,
|
||||
AMD_ELEMENT_BYTE_SIZE_4 = 1,
|
||||
AMD_ELEMENT_BYTE_SIZE_8 = 2,
|
||||
AMD_ELEMENT_BYTE_SIZE_16 = 3
|
||||
};
|
||||
|
||||
// AMD Kernel Code Properties.
|
||||
typedef uint32_t amd_kernel_code_properties32_t;
|
||||
enum amd_kernel_code_properties_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 10, 6),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
|
||||
};
|
||||
|
||||
// AMD Power Of Two Enumeration Values.
|
||||
typedef uint8_t amd_powertwo8_t;
|
||||
enum amd_powertwo_t {
|
||||
AMD_POWERTWO_1 = 0,
|
||||
AMD_POWERTWO_2 = 1,
|
||||
AMD_POWERTWO_4 = 2,
|
||||
AMD_POWERTWO_8 = 3,
|
||||
AMD_POWERTWO_16 = 4,
|
||||
AMD_POWERTWO_32 = 5,
|
||||
AMD_POWERTWO_64 = 6,
|
||||
AMD_POWERTWO_128 = 7,
|
||||
AMD_POWERTWO_256 = 8
|
||||
};
|
||||
|
||||
// AMD Enabled Control Directive Enumeration Values.
|
||||
typedef uint64_t amd_enabled_control_directive64_t;
|
||||
enum amd_enabled_control_directive_t {
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
|
||||
};
|
||||
|
||||
// AMD Exception Kind Enumeration Values.
|
||||
typedef uint16_t amd_exception_kind16_t;
|
||||
enum amd_exception_kind_t {
|
||||
AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
|
||||
AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
|
||||
AMD_EXCEPTION_KIND_OVERFLOW = 4,
|
||||
AMD_EXCEPTION_KIND_UNDERFLOW = 8,
|
||||
AMD_EXCEPTION_KIND_INEXACT = 16
|
||||
};
|
||||
|
||||
// AMD Control Directives.
|
||||
#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
|
||||
#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
|
||||
typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
|
||||
amd_enabled_control_directive64_t enabled_control_directives;
|
||||
uint16_t enable_break_exceptions;
|
||||
uint16_t enable_detect_exceptions;
|
||||
uint32_t max_dynamic_group_size;
|
||||
uint64_t max_flat_grid_size;
|
||||
uint32_t max_flat_workgroup_size;
|
||||
uint8_t required_dim;
|
||||
uint8_t reserved1[3];
|
||||
uint64_t required_grid_size[3];
|
||||
uint32_t required_workgroup_size[3];
|
||||
uint8_t reserved2[60];
|
||||
} amd_control_directives_t;
|
||||
|
||||
// AMD Kernel Code.
|
||||
#define AMD_ISA_ALIGN_BYTES 256
|
||||
#define AMD_KERNEL_CODE_ALIGN_BYTES 64
|
||||
#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
|
||||
typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
|
||||
amd_kernel_code_version32_t amd_kernel_code_version_major;
|
||||
amd_kernel_code_version32_t amd_kernel_code_version_minor;
|
||||
amd_machine_kind16_t amd_machine_kind;
|
||||
amd_machine_version16_t amd_machine_version_major;
|
||||
amd_machine_version16_t amd_machine_version_minor;
|
||||
amd_machine_version16_t amd_machine_version_stepping;
|
||||
int64_t kernel_code_entry_byte_offset;
|
||||
int64_t kernel_code_prefetch_byte_offset;
|
||||
uint64_t kernel_code_prefetch_byte_size;
|
||||
uint64_t max_scratch_backing_memory_byte_size;
|
||||
amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
|
||||
amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
|
||||
amd_kernel_code_properties32_t kernel_code_properties;
|
||||
uint32_t workitem_private_segment_byte_size;
|
||||
uint32_t workgroup_group_segment_byte_size;
|
||||
uint32_t gds_segment_byte_size;
|
||||
uint64_t kernarg_segment_byte_size;
|
||||
uint32_t workgroup_fbarrier_count;
|
||||
uint16_t wavefront_sgpr_count;
|
||||
uint16_t workitem_vgpr_count;
|
||||
uint16_t reserved_vgpr_first;
|
||||
uint16_t reserved_vgpr_count;
|
||||
uint16_t reserved_sgpr_first;
|
||||
uint16_t reserved_sgpr_count;
|
||||
uint16_t debug_wavefront_private_segment_offset_sgpr;
|
||||
uint16_t debug_private_segment_buffer_sgpr;
|
||||
amd_powertwo8_t kernarg_segment_alignment;
|
||||
amd_powertwo8_t group_segment_alignment;
|
||||
amd_powertwo8_t private_segment_alignment;
|
||||
amd_powertwo8_t wavefront_size;
|
||||
int32_t call_convention;
|
||||
uint8_t reserved1[12];
|
||||
uint64_t runtime_loader_kernel_symbol;
|
||||
amd_control_directives_t control_directives;
|
||||
} amd_kernel_code_t;
|
||||
|
||||
// TODO: this struct should be completely gone once debugger designs/implements
|
||||
// Debugger APIs.
|
||||
typedef struct amd_runtime_loader_debug_info_s {
|
||||
const void* elf_raw;
|
||||
size_t elf_size;
|
||||
const char *kernel_name;
|
||||
const void *owning_segment;
|
||||
hsa_profile_t profile;
|
||||
uint64_t gpuva;
|
||||
} amd_runtime_loader_debug_info_t;
|
||||
|
||||
#endif // AMD_HSA_KERNEL_CODE_H
|
||||
@@ -1,86 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_QUEUE_H
|
||||
#define AMD_HSA_QUEUE_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "hsa.h"
|
||||
|
||||
// AMD Queue Properties.
|
||||
typedef uint32_t amd_queue_properties32_t;
|
||||
enum amd_queue_properties_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 4, 28)
|
||||
};
|
||||
|
||||
// AMD Queue.
|
||||
#define AMD_QUEUE_ALIGN_BYTES 64
|
||||
#define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES)
|
||||
typedef struct AMD_QUEUE_ALIGN amd_queue_s {
|
||||
hsa_queue_t hsa_queue;
|
||||
uint32_t reserved1[4];
|
||||
volatile uint64_t write_dispatch_id;
|
||||
uint32_t group_segment_aperture_base_hi;
|
||||
uint32_t private_segment_aperture_base_hi;
|
||||
uint32_t max_cu_id;
|
||||
uint32_t max_wave_id;
|
||||
volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
|
||||
volatile uint32_t legacy_doorbell_lock;
|
||||
uint32_t reserved2[9];
|
||||
volatile uint64_t read_dispatch_id;
|
||||
uint32_t read_dispatch_id_field_base_byte_offset;
|
||||
uint32_t compute_tmpring_size;
|
||||
uint32_t scratch_resource_descriptor[4];
|
||||
uint64_t scratch_backing_memory_location;
|
||||
uint64_t scratch_backing_memory_byte_size;
|
||||
uint32_t scratch_workitem_byte_size;
|
||||
amd_queue_properties32_t queue_properties;
|
||||
uint32_t reserved3[2];
|
||||
hsa_signal_t queue_inactive_signal;
|
||||
uint32_t reserved4[14];
|
||||
} amd_queue_t;
|
||||
|
||||
#endif // AMD_HSA_QUEUE_H
|
||||
@@ -1,80 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_SIGNAL_H
|
||||
#define AMD_HSA_SIGNAL_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "amd_hsa_queue.h"
|
||||
|
||||
// AMD Signal Kind Enumeration Values.
|
||||
typedef int64_t amd_signal_kind64_t;
|
||||
enum amd_signal_kind_t {
|
||||
AMD_SIGNAL_KIND_INVALID = 0,
|
||||
AMD_SIGNAL_KIND_USER = 1,
|
||||
AMD_SIGNAL_KIND_DOORBELL = -1,
|
||||
AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2
|
||||
};
|
||||
|
||||
// AMD Signal.
|
||||
#define AMD_SIGNAL_ALIGN_BYTES 64
|
||||
#define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES)
|
||||
typedef struct AMD_SIGNAL_ALIGN amd_signal_s {
|
||||
amd_signal_kind64_t kind;
|
||||
union {
|
||||
volatile int64_t value;
|
||||
volatile uint32_t* legacy_hardware_doorbell_ptr;
|
||||
volatile uint64_t* hardware_doorbell_ptr;
|
||||
};
|
||||
uint64_t event_mailbox_ptr;
|
||||
uint32_t event_id;
|
||||
uint32_t reserved1;
|
||||
uint64_t start_ts;
|
||||
uint64_t end_ts;
|
||||
union {
|
||||
amd_queue_t* queue_ptr;
|
||||
uint64_t reserved2;
|
||||
};
|
||||
uint32_t reserved3[2];
|
||||
} amd_signal_t;
|
||||
|
||||
#endif // AMD_HSA_SIGNAL_H
|
||||
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -1,177 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_H
|
||||
#define HSA_RUNTIME_INC_HSA_API_TRACE_H
|
||||
|
||||
#include "hsa.h"
|
||||
#ifdef AMD_INTERNAL_BUILD
|
||||
#include "hsa_ext_image.h"
|
||||
#include "hsa_ext_amd.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
#else
|
||||
#include "inc/hsa_ext_image.h"
|
||||
#include "inc/hsa_ext_amd.h"
|
||||
#include "inc/hsa_ext_finalize.h"
|
||||
#endif
|
||||
|
||||
struct ExtTable {
|
||||
decltype(hsa_ext_program_create)* hsa_ext_program_create_fn;
|
||||
decltype(hsa_ext_program_destroy)* hsa_ext_program_destroy_fn;
|
||||
decltype(hsa_ext_program_add_module)* hsa_ext_program_add_module_fn;
|
||||
decltype(hsa_ext_program_iterate_modules)* hsa_ext_program_iterate_modules_fn;
|
||||
decltype(hsa_ext_program_get_info)* hsa_ext_program_get_info_fn;
|
||||
decltype(hsa_ext_program_finalize)* hsa_ext_program_finalize_fn;
|
||||
decltype(hsa_ext_image_get_capability)* hsa_ext_image_get_capability_fn;
|
||||
decltype(hsa_ext_image_data_get_info)* hsa_ext_image_data_get_info_fn;
|
||||
decltype(hsa_ext_image_create)* hsa_ext_image_create_fn;
|
||||
decltype(hsa_ext_image_import)* hsa_ext_image_import_fn;
|
||||
decltype(hsa_ext_image_export)* hsa_ext_image_export_fn;
|
||||
decltype(hsa_ext_image_copy)* hsa_ext_image_copy_fn;
|
||||
decltype(hsa_ext_image_clear)* hsa_ext_image_clear_fn;
|
||||
decltype(hsa_ext_image_destroy)* hsa_ext_image_destroy_fn;
|
||||
decltype(hsa_ext_sampler_create)* hsa_ext_sampler_create_fn;
|
||||
decltype(hsa_ext_sampler_destroy)* hsa_ext_sampler_destroy_fn;
|
||||
};
|
||||
|
||||
struct ApiTable {
|
||||
decltype(hsa_init)* hsa_init_fn;
|
||||
decltype(hsa_shut_down)* hsa_shut_down_fn;
|
||||
decltype(hsa_system_get_info)* hsa_system_get_info_fn;
|
||||
decltype(hsa_system_extension_supported)* hsa_system_extension_supported_fn;
|
||||
decltype(hsa_system_get_extension_table)* hsa_system_get_extension_table_fn;
|
||||
decltype(hsa_iterate_agents)* hsa_iterate_agents_fn;
|
||||
decltype(hsa_agent_get_info)* hsa_agent_get_info_fn;
|
||||
decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
decltype(hsa_soft_queue_create)* hsa_soft_queue_create_fn;
|
||||
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
decltype(hsa_queue_inactivate)* hsa_queue_inactivate_fn;
|
||||
decltype(hsa_queue_load_read_index_acquire)* hsa_queue_load_read_index_acquire_fn;
|
||||
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
|
||||
decltype(hsa_queue_load_write_index_acquire)* hsa_queue_load_write_index_acquire_fn;
|
||||
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_write_index_release)* hsa_queue_store_write_index_release_fn;
|
||||
decltype(hsa_queue_cas_write_index_acq_rel)* hsa_queue_cas_write_index_acq_rel_fn;
|
||||
decltype(hsa_queue_cas_write_index_acquire)* hsa_queue_cas_write_index_acquire_fn;
|
||||
decltype(hsa_queue_cas_write_index_relaxed)* hsa_queue_cas_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_cas_write_index_release)* hsa_queue_cas_write_index_release_fn;
|
||||
decltype(hsa_queue_add_write_index_acq_rel)* hsa_queue_add_write_index_acq_rel_fn;
|
||||
decltype(hsa_queue_add_write_index_acquire)* hsa_queue_add_write_index_acquire_fn;
|
||||
decltype(hsa_queue_add_write_index_relaxed)* hsa_queue_add_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_add_write_index_release)* hsa_queue_add_write_index_release_fn;
|
||||
decltype(hsa_queue_store_read_index_relaxed)* hsa_queue_store_read_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_read_index_release)* hsa_queue_store_read_index_release_fn;
|
||||
decltype(hsa_agent_iterate_regions)* hsa_agent_iterate_regions_fn;
|
||||
decltype(hsa_region_get_info)* hsa_region_get_info_fn;
|
||||
decltype(hsa_agent_get_exception_policies)* hsa_agent_get_exception_policies_fn;
|
||||
decltype(hsa_agent_extension_supported)* hsa_agent_extension_supported_fn;
|
||||
decltype(hsa_memory_register)* hsa_memory_register_fn;
|
||||
decltype(hsa_memory_deregister)* hsa_memory_deregister_fn;
|
||||
decltype(hsa_memory_allocate)* hsa_memory_allocate_fn;
|
||||
decltype(hsa_memory_free)* hsa_memory_free_fn;
|
||||
decltype(hsa_memory_copy)* hsa_memory_copy_fn;
|
||||
decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn;
|
||||
decltype(hsa_signal_create)* hsa_signal_create_fn;
|
||||
decltype(hsa_signal_destroy)* hsa_signal_destroy_fn;
|
||||
decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed_fn;
|
||||
decltype(hsa_signal_load_acquire)* hsa_signal_load_acquire_fn;
|
||||
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
|
||||
decltype(hsa_signal_store_release)* hsa_signal_store_release_fn;
|
||||
decltype(hsa_signal_wait_relaxed)* hsa_signal_wait_relaxed_fn;
|
||||
decltype(hsa_signal_wait_acquire)* hsa_signal_wait_acquire_fn;
|
||||
decltype(hsa_signal_and_relaxed)* hsa_signal_and_relaxed_fn;
|
||||
decltype(hsa_signal_and_acquire)* hsa_signal_and_acquire_fn;
|
||||
decltype(hsa_signal_and_release)* hsa_signal_and_release_fn;
|
||||
decltype(hsa_signal_and_acq_rel)* hsa_signal_and_acq_rel_fn;
|
||||
decltype(hsa_signal_or_relaxed)* hsa_signal_or_relaxed_fn;
|
||||
decltype(hsa_signal_or_acquire)* hsa_signal_or_acquire_fn;
|
||||
decltype(hsa_signal_or_release)* hsa_signal_or_release_fn;
|
||||
decltype(hsa_signal_or_acq_rel)* hsa_signal_or_acq_rel_fn;
|
||||
decltype(hsa_signal_xor_relaxed)* hsa_signal_xor_relaxed_fn;
|
||||
decltype(hsa_signal_xor_acquire)* hsa_signal_xor_acquire_fn;
|
||||
decltype(hsa_signal_xor_release)* hsa_signal_xor_release_fn;
|
||||
decltype(hsa_signal_xor_acq_rel)* hsa_signal_xor_acq_rel_fn;
|
||||
decltype(hsa_signal_exchange_relaxed)* hsa_signal_exchange_relaxed_fn;
|
||||
decltype(hsa_signal_exchange_acquire)* hsa_signal_exchange_acquire_fn;
|
||||
decltype(hsa_signal_exchange_release)* hsa_signal_exchange_release_fn;
|
||||
decltype(hsa_signal_exchange_acq_rel)* hsa_signal_exchange_acq_rel_fn;
|
||||
decltype(hsa_signal_add_relaxed)* hsa_signal_add_relaxed_fn;
|
||||
decltype(hsa_signal_add_acquire)* hsa_signal_add_acquire_fn;
|
||||
decltype(hsa_signal_add_release)* hsa_signal_add_release_fn;
|
||||
decltype(hsa_signal_add_acq_rel)* hsa_signal_add_acq_rel_fn;
|
||||
decltype(hsa_signal_subtract_relaxed)* hsa_signal_subtract_relaxed_fn;
|
||||
decltype(hsa_signal_subtract_acquire)* hsa_signal_subtract_acquire_fn;
|
||||
decltype(hsa_signal_subtract_release)* hsa_signal_subtract_release_fn;
|
||||
decltype(hsa_signal_subtract_acq_rel)* hsa_signal_subtract_acq_rel_fn;
|
||||
decltype(hsa_signal_cas_relaxed)* hsa_signal_cas_relaxed_fn;
|
||||
decltype(hsa_signal_cas_acquire)* hsa_signal_cas_acquire_fn;
|
||||
decltype(hsa_signal_cas_release)* hsa_signal_cas_release_fn;
|
||||
decltype(hsa_signal_cas_acq_rel)* hsa_signal_cas_acq_rel_fn;
|
||||
decltype(hsa_isa_from_name)* hsa_isa_from_name_fn;
|
||||
decltype(hsa_isa_get_info)* hsa_isa_get_info_fn;
|
||||
decltype(hsa_isa_compatible)* hsa_isa_compatible_fn;
|
||||
decltype(hsa_code_object_serialize)* hsa_code_object_serialize_fn;
|
||||
decltype(hsa_code_object_deserialize)* hsa_code_object_deserialize_fn;
|
||||
decltype(hsa_code_object_destroy)* hsa_code_object_destroy_fn;
|
||||
decltype(hsa_code_object_get_info)* hsa_code_object_get_info_fn;
|
||||
decltype(hsa_code_object_get_symbol)* hsa_code_object_get_symbol_fn;
|
||||
decltype(hsa_code_symbol_get_info)* hsa_code_symbol_get_info_fn;
|
||||
decltype(hsa_code_object_iterate_symbols)* hsa_code_object_iterate_symbols_fn;
|
||||
decltype(hsa_executable_create)* hsa_executable_create_fn;
|
||||
decltype(hsa_executable_destroy)* hsa_executable_destroy_fn;
|
||||
decltype(hsa_executable_load_code_object)* hsa_executable_load_code_object_fn;
|
||||
decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
|
||||
decltype(hsa_executable_get_info)* hsa_executable_get_info_fn;
|
||||
decltype(hsa_executable_global_variable_define)* hsa_executable_global_variable_define_fn;
|
||||
decltype(hsa_executable_agent_global_variable_define)* hsa_executable_agent_global_variable_define_fn;
|
||||
decltype(hsa_executable_readonly_variable_define)* hsa_executable_readonly_variable_define_fn;
|
||||
decltype(hsa_executable_validate)* hsa_executable_validate_fn;
|
||||
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol_fn;
|
||||
decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info_fn;
|
||||
decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols_fn;
|
||||
decltype(hsa_status_string)* hsa_status_string_fn;
|
||||
|
||||
ExtTable* std_exts_;
|
||||
};
|
||||
|
||||
#endif
|
||||
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -1,531 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#undef HSA_API
|
||||
#ifdef HSA_EXPORT_FINALIZER
|
||||
#define HSA_API HSA_API_EXPORT
|
||||
#else
|
||||
#define HSA_API HSA_API_IMPORT
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
struct BrigModuleHeader;
|
||||
typedef struct BrigModuleHeader* BrigModule_t;
|
||||
|
||||
/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Enumeration constants added to ::hsa_status_t by this extension.
|
||||
*/
|
||||
enum {
|
||||
/**
|
||||
* The HSAIL program is invalid.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
|
||||
/**
|
||||
* The HSAIL module is invalid.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
|
||||
/**
|
||||
* Machine model or profile of the HSAIL module do not match the machine model
|
||||
* or profile of the HSAIL program.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
|
||||
/**
|
||||
* The HSAIL module is already a part of the HSAIL program.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
|
||||
/**
|
||||
* Compatibility mismatch between symbol declaration and symbol definition.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
|
||||
/**
|
||||
* The finalization encountered an error while finalizing a kernel or
|
||||
* indirect function.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
|
||||
/**
|
||||
* Mismatch between a directive in the control directive structure and in
|
||||
* the HSAIL kernel.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
|
||||
};
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup ext-alt-finalizer-program Finalization Program
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
|
||||
* the definition of the BrigModule_t type.
|
||||
*/
|
||||
typedef BrigModule_t hsa_ext_module_t;
|
||||
|
||||
/**
|
||||
* @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
|
||||
* modules that collectively define functions and variables used by kernels and
|
||||
* indirect functions.
|
||||
*/
|
||||
typedef struct hsa_ext_program_s {
|
||||
/**
|
||||
* Opaque handle.
|
||||
*/
|
||||
uint64_t handle;
|
||||
} hsa_ext_program_t;
|
||||
|
||||
/**
|
||||
* @brief Create an empty HSAIL program.
|
||||
*
|
||||
* @param[in] machine_model Machine model used in the HSAIL program.
|
||||
*
|
||||
* @param[in] profile Profile used in the HSAIL program.
|
||||
*
|
||||
* @param[in] default_float_rounding_mode Default float rounding mode used in
|
||||
* the HSAIL program.
|
||||
*
|
||||
* @param[in] options Vendor-specific options. May be NULL.
|
||||
*
|
||||
* @param[out] program Memory location where the HSA runtime stores the newly
|
||||
* created HSAIL program handle.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
|
||||
* @p profile is invalid, @p default_float_rounding_mode is invalid, or
|
||||
* @p program is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_create(
|
||||
hsa_machine_model_t machine_model,
|
||||
hsa_profile_t profile,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
||||
const char *options,
|
||||
hsa_ext_program_t *program);
|
||||
|
||||
/**
|
||||
* @brief Destroy a HSAIL program.
|
||||
*
|
||||
* @details The HSAIL program handle becomes invalid after it has been
|
||||
* destroyed. Code object handles produced by ::hsa_ext_program_finalize are
|
||||
* still valid after the HSAIL program has been destroyed, and can be used as
|
||||
* intended. Resources allocated outside and associated with the HSAIL program
|
||||
* (such as HSAIL modules that are added to the HSAIL program) can be released
|
||||
* after the finalization program has been destroyed.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
|
||||
* invalid.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_destroy(
|
||||
hsa_ext_program_t program);
|
||||
|
||||
/**
|
||||
* @brief Add a HSAIL module to an existing HSAIL program.
|
||||
*
|
||||
* @details The HSA runtime does not perform a deep copy of the HSAIL module
|
||||
* upon addition. Instead, it stores a pointer to the HSAIL module. The
|
||||
* ownership of the HSAIL module belongs to the application, which must ensure
|
||||
* that @p module is not released before destroying the HSAIL program.
|
||||
*
|
||||
* The HSAIL module is successfully added to the HSAIL program if @p module is
|
||||
* valid, if all the declarations and definitions for the same symbol are
|
||||
* compatible, and if @p module specify machine model and profile that matches
|
||||
* the HSAIL program.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] module HSAIL module. The application can add the same HSAIL module
|
||||
* to @p program at most once. The HSAIL module must specify the same machine
|
||||
* model and profile as @p program. If the floating-mode rounding mode of @p
|
||||
* module is not default, then it should match that of @p program.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
|
||||
* module does not match machine model of @p program, or the profile of @p
|
||||
* module does not match profile of @p program.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
|
||||
* already a part of the HSAIL program.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
|
||||
* definition compatibility mismatch. See the symbol compatibility rules in the
|
||||
* HSA Programming Reference Manual.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_add_module(
|
||||
hsa_ext_program_t program,
|
||||
hsa_ext_module_t module);
|
||||
|
||||
/**
|
||||
* @brief Iterate over the HSAIL modules in a program, and invoke an
|
||||
* application-defined callback on every iteration.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] callback Callback to be invoked once per HSAIL module in the
|
||||
* program. The HSA runtime passes three arguments to the callback: the program,
|
||||
* a HSAIL module, and the application data. If @p callback returns a status
|
||||
* other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
|
||||
* stops and ::hsa_ext_program_iterate_modules returns that status value.
|
||||
*
|
||||
* @param[in] data Application data that is passed to @p callback on every
|
||||
* iteration. May be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_iterate_modules(
|
||||
hsa_ext_program_t program,
|
||||
hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
|
||||
void* data),
|
||||
void* data);
|
||||
|
||||
/**
|
||||
* @brief HSAIL program attributes.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Machine model specified when the HSAIL program was created. The type
|
||||
* of this attribute is ::hsa_machine_model_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
|
||||
/**
|
||||
* Profile specified when the HSAIL program was created. The type of
|
||||
* this attribute is ::hsa_profile_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_PROFILE = 1,
|
||||
/**
|
||||
* Default float rounding mode specified when the HSAIL program was
|
||||
* created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
|
||||
} hsa_ext_program_info_t;
|
||||
|
||||
/**
|
||||
* @brief Get the current value of an attribute for a given HSAIL program.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] attribute Attribute to query.
|
||||
*
|
||||
* @param[out] value Pointer to an application-allocated buffer where to store
|
||||
* the value of the attribute. If the buffer passed by the application is not
|
||||
* large enough to hold the value of @p attribute, the behaviour is undefined.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
|
||||
* HSAIL program attribute, or @p value is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_get_info(
|
||||
hsa_ext_program_t program,
|
||||
hsa_ext_program_info_t attribute,
|
||||
void *value);
|
||||
|
||||
/**
|
||||
* @brief Finalizer-determined call convention.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Finalizer-determined call convention.
|
||||
*/
|
||||
HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
|
||||
} hsa_ext_finalizer_call_convention_t;
|
||||
|
||||
/**
|
||||
* @brief Control directives specify low-level information about the
|
||||
* finalization process.
|
||||
*/
|
||||
typedef struct hsa_ext_control_directives_s {
|
||||
/**
|
||||
* Bitset indicating which control directives are enabled. The bit assigned to
|
||||
* a control directive is determined by the corresponding value in
|
||||
* BrigControlDirective.
|
||||
*
|
||||
* If a control directive is disabled, its corresponding field value (if any)
|
||||
* must be 0. Control directives that are only present or absent (such as
|
||||
* partial workgroups) have no corresponding field as the presence of the bit
|
||||
* in this mask is sufficient.
|
||||
*/
|
||||
uint64_t control_directives_mask;
|
||||
/**
|
||||
* Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
|
||||
* assigned to an HSAIL exception is determined by the corresponding value
|
||||
* in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
|
||||
* control directive, the finalizer uses the union of the two masks.
|
||||
*/
|
||||
uint16_t break_exceptions_mask;
|
||||
/**
|
||||
* Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
|
||||
* bit assigned to an HSAIL exception is determined by the corresponding value
|
||||
* in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
|
||||
* control directive, the finalizer uses the union of the two masks.
|
||||
*/
|
||||
uint16_t detect_exceptions_mask;
|
||||
/**
|
||||
* Maximum size (in bytes) of dynamic group memory that will be allocated by
|
||||
* the application for any dispatch of the kernel. If the kernel contains a
|
||||
* maxdynamicsize control directive, the two values should match.
|
||||
*/
|
||||
uint32_t max_dynamic_group_size;
|
||||
/**
|
||||
* Maximum number of grid work-items that will be used by the application to
|
||||
* launch the kernel. If the kernel contains a maxflatgridsize control
|
||||
* directive, the value of @a max_flat_grid_size must not be greater than the
|
||||
* value of the directive, and takes precedence.
|
||||
*
|
||||
* The value specified for maximum absolute grid size must be greater than or
|
||||
* equal to the product of the values specified by @a required_grid_size.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
|
||||
* control_directives_mask, this field must be greater than 0.
|
||||
*/
|
||||
uint64_t max_flat_grid_size;
|
||||
/**
|
||||
* Maximum number of work-group work-items that will be used by the
|
||||
* application to launch the kernel. If the kernel contains a
|
||||
* maxflatworkgroupsize control directive, the value of @a
|
||||
* max_flat_workgroup_size must not be greater than the value of the
|
||||
* directive, and takes precedence.
|
||||
*
|
||||
* The value specified for maximum absolute grid size must be greater than or
|
||||
* equal to the product of the values specified by @a required_workgroup_size.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
|
||||
* control_directives_mask, this field must be greater than 0.
|
||||
*/
|
||||
uint32_t max_flat_workgroup_size;
|
||||
/**
|
||||
* Reserved. Must be 0.
|
||||
*/
|
||||
uint32_t reserved1;
|
||||
/**
|
||||
* Grid size that will be used by the application in any dispatch of the
|
||||
* kernel. If the kernel contains a requiredgridsize control directive, the
|
||||
* dimensions should match.
|
||||
*
|
||||
* The specified grid size must be consistent with @a required_workgroup_size
|
||||
* and @a required_dim. Also, the product of the three dimensions must not
|
||||
* exceed @a max_flat_grid_size. Note that the listed invariants must hold
|
||||
* only if all the corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
|
||||
* control_directives_mask, the three dimension values must be greater than 0.
|
||||
*/
|
||||
uint64_t required_grid_size[3];
|
||||
/**
|
||||
* Work-group size that will be used by the application in any dispatch of the
|
||||
* kernel. If the kernel contains a requiredworkgroupsize control directive,
|
||||
* the dimensions should match.
|
||||
*
|
||||
* The specified work-group size must be consistent with @a required_grid_size
|
||||
* and @a required_dim. Also, the product of the three dimensions must not
|
||||
* exceed @a max_flat_workgroup_size. Note that the listed invariants must
|
||||
* hold only if all the corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
|
||||
* control_directives_mask, the three dimension values must be greater than 0.
|
||||
*/
|
||||
hsa_dim3_t required_workgroup_size;
|
||||
/**
|
||||
* Number of dimensions that will be used by the application to launch the
|
||||
* kernel. If the kernel contains a requireddim control directive, the two
|
||||
* values should match.
|
||||
*
|
||||
* The specified dimensions must be consistent with @a required_grid_size and
|
||||
* @a required_workgroup_size. This invariant must hold only if all the
|
||||
* corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
|
||||
* control_directives_mask, this field must be 1, 2, or 3.
|
||||
*/
|
||||
uint8_t required_dim;
|
||||
/**
|
||||
* Reserved. Must be 0.
|
||||
*/
|
||||
uint8_t reserved2[75];
|
||||
} hsa_ext_control_directives_t;
|
||||
|
||||
/**
|
||||
* @brief Finalize an HSAIL program for a given instruction set architecture.
|
||||
*
|
||||
* @details Finalize all of the kernels and indirect functions that belong to
|
||||
* the same HSAIL program for a specific instruction set architecture (ISA). The
|
||||
* transitive closure of all functions specified by call or scall must be
|
||||
* defined. Kernels and indirect functions that are being finalized must be
|
||||
* defined. Kernels and indirect functions that are referenced in kernels and
|
||||
* indirect functions being finalized may or may not be defined, but must be
|
||||
* declared. All the global/readonly segment variables that are referenced in
|
||||
* kernels and indirect functions being finalized may or may not be defined, but
|
||||
* must be declared.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] isa Instruction set architecture to finalize for.
|
||||
*
|
||||
* @param[in] call_convention A call convention used in a finalization. Must
|
||||
* have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
|
||||
* and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
|
||||
* isa (not inclusive).
|
||||
*
|
||||
* @param[in] control_directives Low-level control directives that influence
|
||||
* the finalization process.
|
||||
*
|
||||
* @param[in] options Vendor-specific options. May be NULL.
|
||||
*
|
||||
* @param[in] code_object_type Type of code object to produce.
|
||||
*
|
||||
* @param[out] code_object Code object generated by the Finalizer, which
|
||||
* contains the machine code for the kernels and indirect functions in the HSAIL
|
||||
* program. The code object is independent of the HSAIL module that was used to
|
||||
* generate it.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
|
||||
* invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
|
||||
* the control directive structure and in the HSAIL kernel mismatch, or if the
|
||||
* same directive is used with a different value in one of the functions used by
|
||||
* this kernel.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
|
||||
* encountered an error while compiling a kernel or an indirect function.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_finalize(
|
||||
hsa_ext_program_t program,
|
||||
hsa_isa_t isa,
|
||||
int32_t call_convention,
|
||||
hsa_ext_control_directives_t control_directives,
|
||||
const char *options,
|
||||
hsa_code_object_type_t code_object_type,
|
||||
hsa_code_object_t *code_object);
|
||||
|
||||
/** @} */
|
||||
|
||||
#define hsa_ext_finalizer_1_00
|
||||
|
||||
typedef struct hsa_ext_finalizer_1_00_pfn_s {
|
||||
hsa_status_t (*hsa_ext_program_create)(
|
||||
hsa_machine_model_t machine_model, hsa_profile_t profile,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
||||
const char *options, hsa_ext_program_t *program);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
|
||||
hsa_ext_module_t module);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_iterate_modules)(
|
||||
hsa_ext_program_t program,
|
||||
hsa_status_t (*callback)(hsa_ext_program_t program,
|
||||
hsa_ext_module_t module, void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_get_info)(
|
||||
hsa_ext_program_t program, hsa_ext_program_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_finalize)(
|
||||
hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
|
||||
hsa_ext_control_directives_t control_directives, const char *options,
|
||||
hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
|
||||
} hsa_ext_finalizer_1_00_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
@@ -1,964 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_EXT_IMAGE_H
|
||||
#define HSA_EXT_IMAGE_H
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#undef HSA_API
|
||||
#ifdef HSA_EXPORT_IMAGES
|
||||
#define HSA_API HSA_API_EXPORT
|
||||
#else
|
||||
#define HSA_API HSA_API_IMPORT
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
/** \defgroup ext-images Images and Samplers
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Image handle, populated by ::hsa_ext_image_create. Images
|
||||
* handles are only unique within an agent, not across agents.
|
||||
*
|
||||
*/
|
||||
typedef struct hsa_ext_image_s {
|
||||
/**
|
||||
* Opaque handle.
|
||||
*/
|
||||
uint64_t handle;
|
||||
|
||||
} hsa_ext_image_t;
|
||||
|
||||
/**
|
||||
* @brief Geometry associated with the HSA image (image dimensions allowed in
|
||||
* HSA). The enumeration values match the BRIG type BrigImageGeometry.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* One-dimensional image addressed by width coordinate.
|
||||
*/
|
||||
HSA_EXT_IMAGE_GEOMETRY_1D = 0,
|
||||
|
||||
/**
|
||||
* Two-dimensional image addressed by width and height coordinates.
|
||||
*/
|
||||
HSA_EXT_IMAGE_GEOMETRY_2D = 1,
|
||||
|
||||
/**
|
||||
* Three-dimensional image addressed by width, height, and depth coordinates.
|
||||
*/
|
||||
HSA_EXT_IMAGE_GEOMETRY_3D = 2,
|
||||
|
||||
/**
|
||||
* Array of one-dimensional images with the same size and format. 1D arrays
|
||||
* are addressed by index and width coordinate.
|
||||
*/
|
||||
HSA_EXT_IMAGE_GEOMETRY_1DA = 3,
|
||||
|
||||
/**
|
||||
* Array of two-dimensional images with the same size and format. 2D arrays
|
||||
* are addressed by index and width and height coordinates.
|
||||
*/
|
||||
HSA_EXT_IMAGE_GEOMETRY_2DA = 4,
|
||||
|
||||
/**
|
||||
* One-dimensional image interpreted as a buffer with specific restrictions.
|
||||
*/
|
||||
HSA_EXT_IMAGE_GEOMETRY_1DB = 5,
|
||||
|
||||
/**
|
||||
* Two-dimensional depth image addressed by width and height coordinates.
|
||||
*/
|
||||
HSA_EXT_IMAGE_GEOMETRY_2DDEPTH = 6,
|
||||
|
||||
/**
|
||||
* Array of two-dimensional depth images with the same size and format. 2D
|
||||
* arrays are addressed by index and width and height coordinates.
|
||||
*/
|
||||
HSA_EXT_IMAGE_GEOMETRY_2DADEPTH = 7
|
||||
} hsa_ext_image_geometry_t;
|
||||
|
||||
/**
|
||||
* @brief Channel type associated with the elements of an image. See the Image
|
||||
* section in the HSA Programming Reference Manual for definitions on each
|
||||
* component type. The enumeration values match the BRIG type
|
||||
* BrigImageChannelType.
|
||||
*/
|
||||
typedef enum {
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15
|
||||
} hsa_ext_image_channel_type_t;
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Channel order associated with the elements of an image. See the
|
||||
* Image section in the HSA Programming Reference Manual for definitions on each
|
||||
* component order. The enumeration values match the BRIG type
|
||||
* BrigImageChannelOrder.
|
||||
*/
|
||||
typedef enum {
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
|
||||
} hsa_ext_image_channel_order_t;
|
||||
|
||||
/**
|
||||
* @brief Image format.
|
||||
*/
|
||||
typedef struct hsa_ext_image_format_s {
|
||||
/**
|
||||
* Channel type.
|
||||
*/
|
||||
hsa_ext_image_channel_type_t channel_type;
|
||||
|
||||
/**
|
||||
* Channel order.
|
||||
*/
|
||||
hsa_ext_image_channel_order_t channel_order;
|
||||
} hsa_ext_image_format_t;
|
||||
|
||||
/**
|
||||
* @brief Implementation-independent image descriptor.
|
||||
*/
|
||||
typedef struct hsa_ext_image_descriptor_s {
|
||||
/**
|
||||
* Image geometry.
|
||||
*/
|
||||
hsa_ext_image_geometry_t geometry;
|
||||
/**
|
||||
* Width of the image, in components.
|
||||
*/
|
||||
size_t width;
|
||||
/**
|
||||
* Height of the image, in components. Only defined if the geometry is 2D or
|
||||
* higher.
|
||||
*/
|
||||
size_t height;
|
||||
/**
|
||||
* Depth of the image, in components. Only defined if @a geometry is
|
||||
* ::HSA_EXT_IMAGE_GEOMETRY_3D. A depth of 0 is same as a depth of 1.
|
||||
*/
|
||||
size_t depth;
|
||||
/**
|
||||
* Number of images in the image array. Only defined if @a geometry is
|
||||
* ::HSA_EXT_IMAGE_GEOMETRY_1DA, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
|
||||
* HSA_EXT_IMAGE_GEOMETRY_2DADEPTH.
|
||||
*/
|
||||
size_t array_size;
|
||||
/**
|
||||
* Image format.
|
||||
*/
|
||||
hsa_ext_image_format_t format;
|
||||
} hsa_ext_image_descriptor_t;
|
||||
|
||||
/**
|
||||
* @brief Image capability.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Images of this geometry and format are not supported in the agent.
|
||||
*/
|
||||
HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED = 0x0,
|
||||
/**
|
||||
* Read-only images of this geometry and format are supported by the
|
||||
* agent.
|
||||
*/
|
||||
HSA_EXT_IMAGE_CAPABILITY_READ_ONLY = 0x1,
|
||||
/**
|
||||
* Write-only images of this geometry and format are supported by the
|
||||
* agent.
|
||||
*/
|
||||
HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY = 0x2,
|
||||
/**
|
||||
* Read-write images of this geometry and format are supported by the
|
||||
* agent.
|
||||
*/
|
||||
HSA_EXT_IMAGE_CAPABILITY_READ_WRITE = 0x4,
|
||||
/**
|
||||
* Images of this geometry and format can be accessed from read-modify-write
|
||||
* operations in the agent.
|
||||
*/
|
||||
HSA_EXT_IMAGE_CAPABILITY_READ_MODIFY_WRITE = 0x8,
|
||||
/**
|
||||
* Images of this geometry and format are guaranteed to have a consistent
|
||||
* data layout regardless of how they are accessed by the associated
|
||||
* agent.
|
||||
*/
|
||||
HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT = 0x10
|
||||
} hsa_ext_image_capability_t;
|
||||
|
||||
/**
|
||||
* @brief Retrieve the supported image capabilities for a given combination of
|
||||
* agent, image format and geometry.
|
||||
*
|
||||
* @param[in] agent Agent to be associated with the image.
|
||||
*
|
||||
* @param[in] geometry Geometry.
|
||||
*
|
||||
* @param[in] image_format Pointer to an image format. Must not be NULL.
|
||||
*
|
||||
* @param[out] capability_mask Pointer to a memory location where the HSA
|
||||
* runtime stores a bit-mask of supported image capability
|
||||
* (::hsa_ext_image_capability_t) values. Must not be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p geometry is not a valid image
|
||||
* geometry value, @p image_format is NULL, or @p capability_mask is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_ext_image_get_capability(hsa_agent_t agent,
|
||||
hsa_ext_image_geometry_t geometry,
|
||||
const hsa_ext_image_format_t *image_format,
|
||||
uint32_t *capability_mask);
|
||||
|
||||
/**
|
||||
* @brief Agent-specific image size and alignment requirements, populated by
|
||||
* ::hsa_ext_image_data_get_info.
|
||||
*/
|
||||
typedef struct hsa_ext_image_data_info_s {
|
||||
/**
|
||||
* Image data size, in bytes.
|
||||
*/
|
||||
size_t size;
|
||||
|
||||
/**
|
||||
* Image data alignment, in bytes.
|
||||
*/
|
||||
size_t alignment;
|
||||
|
||||
} hsa_ext_image_data_info_t;
|
||||
|
||||
/**
|
||||
* @brief Retrieve the image data requirements for a given combination of image
|
||||
* descriptor, access permission, and agent.
|
||||
*
|
||||
* @details The optimal image data size and alignment requirements may vary
|
||||
* depending on the image attributes specified in @p image_descriptor. Also,
|
||||
* different implementation of the HSA runtime may return different requirements
|
||||
* for the same input values.
|
||||
*
|
||||
* The implementation must return the same image data requirements for different
|
||||
* access permissions with exactly the same image descriptor as long as
|
||||
* ::hsa_ext_image_get_capability reports
|
||||
* ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT for the geometry
|
||||
* and image format contained in the image descriptor.
|
||||
*
|
||||
* @param[in] agent Agent to be associated with the image.
|
||||
*
|
||||
* @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
|
||||
*
|
||||
* @param[in] access_permission Image access mode for @a agent.
|
||||
*
|
||||
* @param[out] image_data_info Memory location where the runtime stores the
|
||||
* size and alignment requirements. Must not be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent does
|
||||
* not support the image format specified by the descriptor.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent does
|
||||
* not support the image dimensions specified by the format descriptor.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
|
||||
* access_permission is not a valid access permission value, or @p
|
||||
* image_data_info is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_image_data_get_info(
|
||||
hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_data_info_t *image_data_info);
|
||||
|
||||
/**
|
||||
* @brief Creates a agent-defined image handle from an
|
||||
* implementation-independent image descriptor and a agent-specific image
|
||||
* data.
|
||||
*
|
||||
* @details Image created with different access permissions but the same image
|
||||
* descriptor can share the same image data if
|
||||
* ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT is reported by
|
||||
* ::hsa_ext_image_get_capability for the image format specified in the image
|
||||
* descriptor. Images with a s-form channel order can share the same image data
|
||||
* with other images that have the corresponding non-s-form channel order,
|
||||
* provided the rest of their image descriptors are identical.
|
||||
*
|
||||
* If necessary, an application can use image operations (import, export, copy,
|
||||
* clear) to prepare the image for the intended use regardless of the access
|
||||
* permissions.
|
||||
*
|
||||
* @param[in] agent agent to be associated with the image.
|
||||
*
|
||||
* @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
|
||||
*
|
||||
* @param[in] image_data Image data buffer that must have been allocated
|
||||
* according to the size and alignment requirements dictated by
|
||||
* ::hsa_ext_image_data_get_info. Must not be NULL.
|
||||
*
|
||||
* Any previous memory contents are preserved upon creation. The application is
|
||||
* responsible for ensuring that the lifetime of the image data exceeds that of
|
||||
* all the associated images.
|
||||
*
|
||||
* @param[in] access_permission Access permission of the image by the
|
||||
* agent. The access permission defines how the agent expects to use the
|
||||
* image and must match the corresponding HSAIL image handle type. The agent
|
||||
* must support the image format specified in @p image_descriptor for the given
|
||||
* permission.
|
||||
*
|
||||
* @param[out] image Pointer to a memory location where the HSA runtime stores
|
||||
* the newly created image handle. Must not be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent does
|
||||
* not have the capability to support the image format contained in the image
|
||||
* descriptor using the specified access permission.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime cannot create the
|
||||
* image because it is out of resources (for example, the agent does not
|
||||
* support the creation of more image handles with the given access permission).
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
|
||||
* image_data is NULL, @p access_permission is not a valid access permission
|
||||
* value, or @p image is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_ext_image_create(hsa_agent_t agent,
|
||||
const hsa_ext_image_descriptor_t *image_descriptor,
|
||||
const void *image_data,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_t *image);
|
||||
|
||||
/**
|
||||
* @brief Destroy an image previously created using ::hsa_ext_image_create.
|
||||
*
|
||||
* @details Destroying the image handle does not free the associated image data,
|
||||
* or modify its contents. The application should not destroy an image while
|
||||
* there are references to it queued for execution or currently being used in a
|
||||
* kernel.
|
||||
*
|
||||
* @param[in] agent Agent associated with the image.
|
||||
*
|
||||
* @param[in] image Image.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_ext_image_destroy(hsa_agent_t agent, hsa_ext_image_t image);
|
||||
|
||||
/**
|
||||
* @brief Copies a portion of one image (the source) to another image (the
|
||||
* destination).
|
||||
*
|
||||
* @details The source and destination image formats should match, except if the
|
||||
* channel type of one of the images is the standard form of the channel type of
|
||||
* the other image. For example, it is allowed to copy a source image with a
|
||||
* channel type of HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB to a destination image with
|
||||
* a channel type of HSA_EXT_IMAGE_CHANNEL_ORDER_RGB.
|
||||
*
|
||||
* The source and destination images do not have to be of the same geometry and
|
||||
* appropriate scaling is performed by the HSA runtime. It is possible to copy
|
||||
* subregions between any combinations of source and destination types, provided
|
||||
* that the dimensions of the subregions are the same. For example, it is
|
||||
* allowed to copy a rectangular region from a 2D image to a slice of a 3D
|
||||
* image.
|
||||
*
|
||||
* If the source and destination image data overlap, or the combination of
|
||||
* offset and range references an out-out-bounds element in any of the images,
|
||||
* the behavior is undefined.
|
||||
*
|
||||
* @param[in] agent Agent associated with both images.
|
||||
*
|
||||
* @param[in] src_image Source image. The agent associated with the source
|
||||
* image must be identical to that of the destination image.
|
||||
*
|
||||
* @param[in] src_offset Pointer to the offset within the source image where to
|
||||
* copy the data from. Must not be NULL.
|
||||
*
|
||||
* @param[in] dst_image Destination image.
|
||||
*
|
||||
* @param[in] dst_offset Pointer to the offset within the destination
|
||||
* image where to copy the data. Must not be NULL.
|
||||
*
|
||||
* @param[in] range Dimensions of the image portion to be copied. The HSA
|
||||
* runtime computes the size of the image data to be copied using this
|
||||
* argument. Must not be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p src_offset is
|
||||
* NULL, @p dst_offset is NULL, or @p range is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_ext_image_copy(hsa_agent_t agent, hsa_ext_image_t src_image,
|
||||
const hsa_dim3_t *src_offset, hsa_ext_image_t dst_image,
|
||||
const hsa_dim3_t *dst_offset, const hsa_dim3_t *range);
|
||||
|
||||
/**
|
||||
* @brief Image region.
|
||||
*/
|
||||
typedef struct hsa_ext_image_region_s {
|
||||
/**
|
||||
* Offset within an image (in coordinates).
|
||||
*/
|
||||
hsa_dim3_t offset;
|
||||
|
||||
/**
|
||||
* Dimensions of the image range (in coordinates). The x, y, and z dimensions
|
||||
* correspond to width, height, and depth respectively.
|
||||
*/
|
||||
hsa_dim3_t range;
|
||||
} hsa_ext_image_region_t;
|
||||
|
||||
/**
|
||||
* @brief Import a linearly organized image data from memory directly to an
|
||||
* image handle.
|
||||
*
|
||||
* @details This operation updates the image data referenced by the image handle
|
||||
* from the source memory. The size of the data imported from memory is
|
||||
* implicitly derived from the image region.
|
||||
*
|
||||
* If @p src_row_pitch is smaller than the destination region width (in bytes),
|
||||
* then @p src_row_pitch = region width.
|
||||
*
|
||||
* If @p src_slice_pitch is smaller than the destination region width * region
|
||||
* height (in bytes), then @p src_slice_pitch = region width * region height.
|
||||
*
|
||||
* It is the application's responsibility to avoid out of bounds memory access.
|
||||
*
|
||||
* None of the source memory or image data memory in the previously created
|
||||
* ::hsa_ext_image_create image handle can overlap. Overlapping of any
|
||||
* of the source and destination memory within the import operation produces
|
||||
* undefined results.
|
||||
*
|
||||
* @param[in] agent Agent associated with the image.
|
||||
*
|
||||
* @param[in] src_memory Source memory. Must not be NULL.
|
||||
*
|
||||
* @param[in] src_row_pitch Number of bytes in one row of the source memory.
|
||||
*
|
||||
* @param[in] src_slice_pitch Number of bytes in one slice of the source memory.
|
||||
*
|
||||
* @param[in] dst_image Destination image.
|
||||
*
|
||||
* @param[in] image_region Pointer to the image region to be updated. Must not
|
||||
* be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p src_memory is NULL, or @p
|
||||
* image_region is NULL.
|
||||
*
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_ext_image_import(hsa_agent_t agent, const void *src_memory,
|
||||
size_t src_row_pitch, size_t src_slice_pitch,
|
||||
hsa_ext_image_t dst_image,
|
||||
const hsa_ext_image_region_t *image_region);
|
||||
|
||||
/**
|
||||
* @brief Export the image data to linearly organized memory.
|
||||
*
|
||||
* @details The operation updates the destination memory with the image data of
|
||||
* @p src_image. The size of the data exported to memory is implicitly derived
|
||||
* from the image region.
|
||||
*
|
||||
* If @p dst_row_pitch is smaller than the source region width (in bytes), then
|
||||
* @p dst_row_pitch = region width.
|
||||
*
|
||||
* If @p dst_slice_pitch is smaller than the source region width * region height
|
||||
* (in bytes), then @p dst_slice_pitch = region width * region height.
|
||||
*
|
||||
* It is the application's responsibility to avoid out of bounds memory access.
|
||||
*
|
||||
* None of the destination memory or image data memory in the previously created
|
||||
* ::hsa_ext_image_create image handle can overlap. Overlapping of any of
|
||||
* the source and destination memory within the export operation produces
|
||||
* undefined results.
|
||||
*
|
||||
* @param[in] agent Agent associated with the image.
|
||||
*
|
||||
* @param[in] src_image Source image.
|
||||
*
|
||||
* @param[in] dst_memory Destination memory. Must not be NULL.
|
||||
*
|
||||
* @param[in] dst_row_pitch Number of bytes in one row of the destination
|
||||
* memory.
|
||||
*
|
||||
* @param[in] dst_slice_pitch Number of bytes in one slice of the destination
|
||||
* memory.
|
||||
*
|
||||
* @param[in] image_region Pointer to the image region to be exported. Must not
|
||||
* be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p dst_memory is NULL, or @p
|
||||
* image_region is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_ext_image_export(hsa_agent_t agent, hsa_ext_image_t src_image,
|
||||
void *dst_memory, size_t dst_row_pitch,
|
||||
size_t dst_slice_pitch,
|
||||
const hsa_ext_image_region_t *image_region);
|
||||
|
||||
/**
|
||||
* @brief Clear an image to the specified value.
|
||||
*
|
||||
* @details Clearing an image does not perform any format conversion and the
|
||||
* provided clear data is directly stored regardless of the image format. The
|
||||
* lowest bits of the data (number of bits depending on the image component
|
||||
* type) stored in the cleared image are based on the image component order.
|
||||
*
|
||||
* The number of elements in @p data should match the number of access
|
||||
* components for the channel order of @p image, as determined by the HSA
|
||||
* Programmer's Reference Manual. A single element is required for
|
||||
* HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH and
|
||||
* HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL, while any other channel order
|
||||
* requires 4 elements.
|
||||
*
|
||||
* Each element in @p data is a 32-bit value. The type of each element
|
||||
* should match the access type associated with the channel type of @p image,
|
||||
* as determined by the HSA Programmer's Reference Manual:
|
||||
* - HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8,
|
||||
* HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16, and
|
||||
* HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 map to int32_t.
|
||||
* - HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8,
|
||||
* HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, and
|
||||
* HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 map to uint32_t.
|
||||
* - Any other channel type maps to a 32-bit float.
|
||||
*
|
||||
* @param[in] agent Agent associated with the image.
|
||||
*
|
||||
* @param[in] image Image to be cleared.
|
||||
*
|
||||
* @param[in] data Clear value array. Specifying a clear value outside of the
|
||||
* range that can be represented by an image format results in undefined
|
||||
* behavior. Must not be NULL.
|
||||
*
|
||||
* @param[in] image_region Pointer to the image region to clear. Must not be
|
||||
* NULL. If the region references an out-out-bounds element, the behavior is
|
||||
* undefined.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p data is NULL, or @p
|
||||
* image_region is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_ext_image_clear(hsa_agent_t agent, hsa_ext_image_t image,
|
||||
const void *data,
|
||||
const hsa_ext_image_region_t *image_region);
|
||||
|
||||
/**
|
||||
* @brief Sampler handle. Samplers are populated by
|
||||
* ::hsa_ext_sampler_create. Sampler handles are only unique within an
|
||||
* agent, not across agents.
|
||||
*/
|
||||
typedef struct hsa_ext_sampler_s {
|
||||
/**
|
||||
* Opaque handle.
|
||||
*/
|
||||
uint64_t handle;
|
||||
} hsa_ext_sampler_t;
|
||||
|
||||
/**
|
||||
* @brief Sampler address modes. The sampler address mode describes the
|
||||
* processing of out-of-range image coordinates. The values match the BRIG
|
||||
* type BrigSamplerAddressing.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Out-of-range coordinates are not handled.
|
||||
*/
|
||||
HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED = 0,
|
||||
|
||||
/**
|
||||
* Clamp out-of-range coordinates to the image edge.
|
||||
*/
|
||||
HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = 1,
|
||||
|
||||
/**
|
||||
* Clamp out-of-range coordinates to the image border.
|
||||
*/
|
||||
HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER = 2,
|
||||
|
||||
/**
|
||||
* Wrap out-of-range coordinates back into the valid coordinate range.
|
||||
*/
|
||||
HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT = 3,
|
||||
|
||||
/**
|
||||
* Mirror out-of-range coordinates back into the valid coordinate range.
|
||||
*/
|
||||
HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = 4
|
||||
|
||||
} hsa_ext_sampler_addressing_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Sampler coordinate modes. The enumeration values match the BRIG
|
||||
* BRIG_SAMPLER_COORD bit in BrigSamplerModifier.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Coordinates are all in the range of 0 to (dimension-1).
|
||||
*/
|
||||
HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED = 0,
|
||||
|
||||
/**
|
||||
* Coordinates are all in the range of 0.0 to 1.0.
|
||||
*/
|
||||
HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED = 1
|
||||
|
||||
} hsa_ext_sampler_coordinate_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Sampler filter modes. The enumeration values match the BRIG type
|
||||
* BrigSamplerFilter.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Filter to the image element nearest (in Manhattan distance) to the
|
||||
* specified coordinate.
|
||||
*/
|
||||
HSA_EXT_SAMPLER_FILTER_MODE_NEAREST = 0,
|
||||
|
||||
/**
|
||||
* Filter to the image element calculated by combining the elements in a 2x2
|
||||
* square block or 2x2x2 cube block around the specified coordinate. The
|
||||
* elements are combined using linear interpolation.
|
||||
*/
|
||||
HSA_EXT_SAMPLER_FILTER_MODE_LINEAR = 1
|
||||
|
||||
} hsa_ext_sampler_filter_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Implementation-independent sampler descriptor.
|
||||
*/
|
||||
typedef struct hsa_ext_sampler_descriptor_s {
|
||||
/**
|
||||
* Sampler coordinate mode describes the normalization of image coordinates.
|
||||
*/
|
||||
hsa_ext_sampler_coordinate_mode_t coordinate_mode;
|
||||
|
||||
/**
|
||||
* Sampler filter type describes the type of sampling performed.
|
||||
*/
|
||||
hsa_ext_sampler_filter_mode_t filter_mode;
|
||||
|
||||
/**
|
||||
* Sampler address mode describes the processing of out-of-range image
|
||||
* coordinates.
|
||||
*/
|
||||
hsa_ext_sampler_addressing_mode_t address_mode;
|
||||
|
||||
} hsa_ext_sampler_descriptor_t;
|
||||
|
||||
/**
|
||||
* @brief Create a kernel agent defined sampler handle for a given combination
|
||||
* of a (agent-independent) sampler descriptor and agent.
|
||||
*
|
||||
* @param[in] agent Agent to be associated with the sampler.
|
||||
*
|
||||
* @param[in] sampler_descriptor Pointer to a sampler descriptor. Must not be
|
||||
* NULL.
|
||||
*
|
||||
* @param[out] sampler Memory location where the HSA runtime stores the newly
|
||||
* created sampler handle. Must not be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The agent cannot create the
|
||||
* specified handle because it is out of resources.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p sampler_descriptor is NULL, or
|
||||
* @p sampler is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_sampler_create(
|
||||
hsa_agent_t agent, const hsa_ext_sampler_descriptor_t *sampler_descriptor,
|
||||
hsa_ext_sampler_t *sampler);
|
||||
|
||||
/**
|
||||
* @brief Destroy a sampler previously created using ::hsa_ext_sampler_create.
|
||||
*
|
||||
* @param[in] agent Agent associated with the sampler.
|
||||
*
|
||||
* @param[in] sampler Sampler. The sampler handle should not be destroyed while
|
||||
* there are references to it queued for execution or currently being used in a
|
||||
* dispatch.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_ext_sampler_destroy(hsa_agent_t agent, hsa_ext_sampler_t sampler);
|
||||
|
||||
/**
|
||||
* @brief Enumeration constants added to ::hsa_status_t by this extension.
|
||||
*/
|
||||
enum {
|
||||
/**
|
||||
* Image format is not supported.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED = 0x3000,
|
||||
/**
|
||||
* Image size is not supported.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED = 0x3001
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Enumeration constants added to ::hsa_agent_info_t by this
|
||||
* extension. The value of any of these attributes is undefined if the
|
||||
* agent is not a kernel agent, or the implementation does not support images.
|
||||
*/
|
||||
enum {
|
||||
/**
|
||||
* Maximum number of elements in 1D images. Must be at most 16384. The type
|
||||
* of this attribute is uint32_t.
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS = 0x3000,
|
||||
/**
|
||||
* Maximum number of elements in 1DA images. Must be at most 16384. The type
|
||||
* of this attribute is uint32_t.
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS = 0x3001,
|
||||
/**
|
||||
* Maximum number of elements in 1DB images. Must be at most 65536. The type
|
||||
* of this attribute is uint32_t.
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS = 0x3002,
|
||||
/**
|
||||
* Maximum dimensions (width, height) of 2D images, in image elements. The X
|
||||
* and Y maximums must be at most 16384. The type of this attribute is
|
||||
* uint32_t[2].
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS = 0x3003,
|
||||
/**
|
||||
* Maximum dimensions (width, height) of 2DA images, in image elements. The X
|
||||
* and Y maximums must be at most 16384. The type of this attribute is
|
||||
* uint32_t[2].
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS = 0x3004,
|
||||
/**
|
||||
* Maximum dimensions (width, height) of 2DDEPTH images, in image
|
||||
* elements. The X and Y maximums must be at most 16384. The type of this
|
||||
* attribute is uint32_t[2].
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS = 0x3005,
|
||||
/**
|
||||
* Maximum dimensions (width, height) of 2DADEPTH images, in image
|
||||
* elements. The X and Y maximums must be at most 16384. The type of this
|
||||
* attribute is uint32_t[2].
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS = 0x3006,
|
||||
/**
|
||||
* Maximum dimensions (width, height, depth) of 3D images, in image
|
||||
* elements. The maximum along any dimension cannot exceed 2048. The type of
|
||||
* this attribute is uint32_t[3].
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS = 0x3007,
|
||||
/**
|
||||
* Maximum number of image layers in a image array. Must not exceed 2048. The
|
||||
* type of this attribute is uint32_t.
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS = 0x3008,
|
||||
/**
|
||||
* Maximum number of read-only image handles that can be created at any one
|
||||
* time. Must be at least 128. The type of this attribute is uint32_t.
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES = 0x3009,
|
||||
/**
|
||||
* Maximum number of write-only and read-write image handles (combined) that
|
||||
* can be created at any one time. Must be at least 64. The type of this
|
||||
* attribute is uint32_t.
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES = 0x300A,
|
||||
/**
|
||||
* Maximum number of sampler handlers that can be created at any one
|
||||
* time. Must be at least 16. The type of this attribute is uint32_t.
|
||||
*/
|
||||
HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS = 0x300B
|
||||
};
|
||||
|
||||
/** @} */
|
||||
|
||||
#define hsa_ext_images_1_00
|
||||
|
||||
typedef struct hsa_ext_images_1_00_pfn_s {
|
||||
hsa_status_t (*hsa_ext_image_get_capability)(
|
||||
hsa_agent_t agent, hsa_ext_image_geometry_t geometry,
|
||||
const hsa_ext_image_format_t *image_format, uint32_t *capability_mask);
|
||||
|
||||
hsa_status_t (*hsa_ext_image_data_get_info)(
|
||||
hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_data_info_t *image_data_info);
|
||||
|
||||
hsa_status_t (*hsa_ext_image_create)(
|
||||
hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor,
|
||||
const void *image_data, hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_t *image);
|
||||
|
||||
hsa_status_t (*hsa_ext_image_destroy)(hsa_agent_t agent,
|
||||
hsa_ext_image_t image);
|
||||
|
||||
hsa_status_t (*hsa_ext_image_copy)(hsa_agent_t agent,
|
||||
hsa_ext_image_t src_image,
|
||||
const hsa_dim3_t *src_offset,
|
||||
hsa_ext_image_t dst_image,
|
||||
const hsa_dim3_t *dst_offset,
|
||||
const hsa_dim3_t *range);
|
||||
|
||||
hsa_status_t (*hsa_ext_image_import)(
|
||||
hsa_agent_t agent, const void *src_memory, size_t src_row_pitch,
|
||||
size_t src_slice_pitch, hsa_ext_image_t dst_image,
|
||||
const hsa_ext_image_region_t *image_region);
|
||||
|
||||
hsa_status_t (*hsa_ext_image_export)(
|
||||
hsa_agent_t agent, hsa_ext_image_t src_image, void *dst_memory,
|
||||
size_t dst_row_pitch, size_t dst_slice_pitch,
|
||||
const hsa_ext_image_region_t *image_region);
|
||||
|
||||
hsa_status_t (*hsa_ext_image_clear)(
|
||||
hsa_agent_t agent, hsa_ext_image_t image, const void *data,
|
||||
const hsa_ext_image_region_t *image_region);
|
||||
|
||||
hsa_status_t (*hsa_ext_sampler_create)(
|
||||
hsa_agent_t agent, const hsa_ext_sampler_descriptor_t *sampler_descriptor,
|
||||
hsa_ext_sampler_t *sampler);
|
||||
|
||||
hsa_status_t (*hsa_ext_sampler_destroy)(hsa_agent_t agent,
|
||||
hsa_ext_sampler_t sampler);
|
||||
|
||||
} hsa_ext_images_1_00_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // end extern "C" block
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
#endif
|
||||
@@ -1,181 +0,0 @@
|
||||
//=====================================================================
|
||||
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
/// \author AMD Developer Tools Team
|
||||
/// \file
|
||||
///
|
||||
//=====================================================================
|
||||
|
||||
#ifndef SP3_ASIC_H
|
||||
#define SP3_ASIC_H
|
||||
|
||||
|
||||
#include "sp3-int.h"
|
||||
#include "sp3-vm.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
// ASIC types
|
||||
|
||||
|
||||
enum asic_backend {
|
||||
ASIC_BACKEND_SI,
|
||||
ASIC_BACKEND_CI,
|
||||
ASIC_BACKEND_GFX8,
|
||||
ASIC_BACKEND_GFX81,
|
||||
ASIC_MAX_BACKEND, // Must be the last entry
|
||||
};
|
||||
|
||||
|
||||
enum asic_cap_id {
|
||||
ASIC_THREAD_SIZE = 1,
|
||||
ASIC_FED_INSTRUCTIONS = 2,
|
||||
ASIC_LEGACY_LOG = 3,
|
||||
ASIC_LARGE_DS_READ = 4,
|
||||
ASIC_32BANK_LDS = 5,
|
||||
};
|
||||
|
||||
|
||||
struct asic_info {
|
||||
const char *name;
|
||||
enum asic_backend backend; // which backend to use
|
||||
int asic_thread_size; // number of threads in a wave
|
||||
int asic_fed_instructions; // FED instructions are available
|
||||
int asic_legacy_log; // Legacy EXP and LOG opcodes are available
|
||||
int asic_large_ds_read; // Large DS read opcodes (96b and 128b) are available
|
||||
int asic_32bank_lds; // Full 32 bank lds P1LL_F16 INTERP instruction available
|
||||
};
|
||||
|
||||
|
||||
struct sp3_asic_state {
|
||||
struct sp3_asic_aluop {
|
||||
int pos; // original position in code
|
||||
int op, na, nc; // na = number of args, nc = number of consts in args
|
||||
int lds, offset; // lds = is an LDS_IDX_OP subop, offset = LDS offset
|
||||
unsigned dst;
|
||||
unsigned arg[3];
|
||||
unsigned lit[3]; // float literals are no longer float at this point
|
||||
unsigned flags;
|
||||
int scalar;
|
||||
} bundle [5];
|
||||
unsigned lds_lit[2], lds_mask[2];
|
||||
int nbundle;
|
||||
int reorder;
|
||||
int last_reorder, last_po[5];
|
||||
int nscalar; // number of nominally-scalar opcodes in bundle
|
||||
int barrier_after; // require barrier after this clause
|
||||
|
||||
// sp3-r6xx
|
||||
int asic;
|
||||
struct da_reloc {
|
||||
unsigned addr, ref;
|
||||
struct da_reloc *next;
|
||||
} *da_relocs;
|
||||
struct cf_reloc **instrels;
|
||||
struct cf_reloc *labels;
|
||||
int sinstrels;
|
||||
int slabels;
|
||||
char unk_name[16];
|
||||
};
|
||||
#define A S->ap
|
||||
|
||||
|
||||
extern struct asic_info asics[];
|
||||
#define ASICNAME asics[A->asic].name
|
||||
#define ASIC asics[A->asic]
|
||||
void set_asic(Sp, int asic);
|
||||
int find_asic(const char *name);
|
||||
|
||||
|
||||
// opcode tables
|
||||
|
||||
void sp3_unbuild_tables(void);
|
||||
void sp3_si_unbuild_tables(void);
|
||||
void sp3_ci_unbuild_tables(void);
|
||||
void sp3_gfx8_unbuild_tables(void);
|
||||
|
||||
void sp3_build_tables(void);
|
||||
void sp3_si_build_tables(void);
|
||||
void sp3_ci_build_tables(void);
|
||||
void sp3_gfx8_build_tables(void);
|
||||
|
||||
|
||||
|
||||
|
||||
// helper functions
|
||||
|
||||
|
||||
#define FMT_FMT 0x00000000
|
||||
#define FMT_COMP 0x00010000
|
||||
#define FMT_ENDIAN 0x00020000
|
||||
#define FMT_NUM 0x00030000
|
||||
#define FMT_SRF 0x00040000
|
||||
#define FMT_MASK 0xFFFF0000
|
||||
#define FMT_IMASK 0x0000FFFF
|
||||
|
||||
void mark_sgpr(Sp, unsigned);
|
||||
void mark_vgpr(Sp, unsigned);
|
||||
void mark_global(Sp, unsigned);
|
||||
void mark_ctemp(Sp, unsigned);
|
||||
int is_mod_bool(Sp, pnode *, const char *);
|
||||
int get_mod_bool(Sp, pnode *, const char *);
|
||||
int get_mod_int(Sp, pnode *, int, int);
|
||||
int get_mod_int32(Sp, pnode *);
|
||||
int par_cmask(Sp, pnode *);
|
||||
unsigned reg_csel(Sp, unsigned , int);
|
||||
unsigned reg_msel(Sp, unsigned *, int);
|
||||
|
||||
const char *spec_sel_to_name(Sp, int sel);
|
||||
const char *sp3_fmt_to_name(Sp, int cls, int val);
|
||||
const char *sp3_si_fmt_to_name(Sp, int cls, int val);
|
||||
const char *sp3_ci_fmt_to_name(Sp, int cls, int val);
|
||||
const char *sp3_gfx8_fmt_to_name(Sp, int cls, int val);
|
||||
|
||||
void add_reloc_label(Sp, int li, int blame);
|
||||
void add_reloc_inst(Sp, int ii, int blame);
|
||||
void add_reloc_cf(Sp, int offs);
|
||||
|
||||
int grouping_for_group_size(Sp, int group_size);
|
||||
|
||||
//JENNICA - this block of name_tree will go away, replace
|
||||
//with backend specific.
|
||||
|
||||
enum nametree_enum {
|
||||
NAMETREE_OPCODES,
|
||||
NAMETREE_OPCODES_0ARG,
|
||||
NAMETREE_OPCODES_CALL,
|
||||
NAMETREE_VTX_FMTS,
|
||||
NAMETREE_SPEC_SELS,
|
||||
NAMETREE_SPEC_VEC_SELS,
|
||||
NAMETREE_SGPR_NAME_SELS,
|
||||
NAMETREE_CONSTS,
|
||||
NAMETREE_DEPRECATED,
|
||||
};
|
||||
|
||||
struct name_tree **get_name_tree(struct sp3_state *S, enum nametree_enum whichtree);
|
||||
|
||||
extern struct name_tree *opcodes_0arg;
|
||||
extern struct name_tree *opcodes_call;
|
||||
extern struct name_tree *vtx_fmts;
|
||||
extern struct name_tree *spec_sels;
|
||||
extern struct name_tree *spec_vec_sels;
|
||||
extern struct name_tree *sgpr_name_sels;
|
||||
extern struct name_tree *consts;
|
||||
extern struct name_tree *deprecated;
|
||||
|
||||
extern struct name_tree *asic_names;
|
||||
struct asic_caps{const char *name; int id;};
|
||||
extern struct asic_caps asiccaps[];
|
||||
extern struct name_tree *asic_caps; //JENNICA - this may need to go away.
|
||||
|
||||
void update_sgpr_names(Sp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,553 +0,0 @@
|
||||
//=====================================================================
|
||||
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
/// \author AMD Developer Tools Team
|
||||
/// \file
|
||||
///
|
||||
//=====================================================================
|
||||
|
||||
#ifndef SP3_INT_H
|
||||
#define SP3_INT_H
|
||||
|
||||
#include "sp3.h"
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifndef strdup
|
||||
#define strdup _strdup
|
||||
#endif
|
||||
#ifndef stricmp
|
||||
#define stricmp _stricmp
|
||||
#endif
|
||||
#ifndef strcasecmp
|
||||
#define strcasecmp _stricmp
|
||||
#endif
|
||||
#pragma warning(disable:4090 4204 4245 4296 4389 4701 4702)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct sp3_state;
|
||||
#define Sp struct sp3_state *S
|
||||
|
||||
// clause types
|
||||
|
||||
#define CT_NONE 0
|
||||
#define CT_SHADER 1
|
||||
|
||||
// parse tree
|
||||
|
||||
#define P_NUM 0 // integer
|
||||
#define P_FLT 1 // float
|
||||
#define P_STR 2 // string
|
||||
#define P_REG 3 // register component(s)
|
||||
#define P_RANGE 4 // closed range
|
||||
#define P_RANGEL 5 // right-open range
|
||||
#define P_SLICE 6 // array concatenation (used for slices)
|
||||
#define P_RCAST 7 // integer -> register cast
|
||||
#define P_LIST 8 // list (internal to the parser only)
|
||||
#define P_VAR 9 // variable (with name)
|
||||
#define P_VARE 10 // variable-element (result of lvalue slice)
|
||||
#define P_CL 11 // clause
|
||||
#define P_CLI 12 // clause instructions
|
||||
#define P_WHILE 13 // while loop
|
||||
#define P_REPEAT 14 // repeat-until loop
|
||||
#define P_IF 15 // if or if-else
|
||||
#define P_CFOR 16 // C-style for loop
|
||||
#define P_FOR 17 // vector for loop
|
||||
#define P_RET 18 // return from function
|
||||
#define P_CSLICE 19 // componentwise slice
|
||||
#define P_UREF 20 // unresolved reference
|
||||
#define P_FREF 21 // resolved reference
|
||||
#define P_CALL 22 // function call
|
||||
#define P_PRINT 23 // print to stdout
|
||||
#define P_PAR 24 // function parameters
|
||||
#define P_NF 25 // native function
|
||||
#define P_OMOD 27 // opcode modifier
|
||||
#define P_OMODS 28 // opcode modifiers
|
||||
#define P_OPARS 29 // opcode parameters
|
||||
#define P_OP 30 // opcode
|
||||
#define P_SWIZ0 31 // register swizzles with N components wrapped
|
||||
#define P_SWIZ1 32 // -"-
|
||||
#define P_SWIZ2 33 // -"-
|
||||
#define P_SWIZ3 34 // -"-
|
||||
#define P_SWIZ4 35 // -"-
|
||||
#define P_VTXFMT 36 // vertex formats
|
||||
#define P_LABEL 37 // unique identifier of a label
|
||||
#define P_LINIT 38 // generate label identifiers
|
||||
#define P_MARK 39 // mark a label
|
||||
#define P_OPCALL 40 // opcode that does a clause instantiation on par0
|
||||
#define P_ASIC 41 // ASIC model
|
||||
#define P_ASICCAP 42 // ASIC capability
|
||||
#define P_NCLOS 43 // create closure
|
||||
#define P_CLOS 44 // closure
|
||||
#define P_SH 45 // compiled shader
|
||||
|
||||
#define P_NOT 0x100
|
||||
#define P_BNOT 0x101
|
||||
#define P_NEG 0x102
|
||||
#define P_MUL 0x103
|
||||
#define P_DIV 0x104
|
||||
#define P_MOD 0x105
|
||||
#define P_ADD 0x106
|
||||
#define P_SUB 0x107
|
||||
#define P_SHL 0x108
|
||||
#define P_SHR 0x109
|
||||
#define P_SAR 0x10A
|
||||
#define P_LT 0x10B
|
||||
#define P_GT 0x10C
|
||||
#define P_LEQ 0x10D
|
||||
#define P_GEQ 0x10E
|
||||
#define P_EQ 0x10F
|
||||
#define P_NEQ 0x110
|
||||
#define P_BAND 0x111
|
||||
#define P_BOR 0x112
|
||||
#define P_BXOR 0x113
|
||||
#define P_AND 0x114
|
||||
#define P_OR 0x115
|
||||
#define P_XOR 0x116
|
||||
#define P_SEL 0x117
|
||||
#define P_XDEC 0x118
|
||||
#define P_XINC 0x119
|
||||
#define P_DECX 0x11A
|
||||
#define P_INCX 0x11B
|
||||
#define P_ASGN 0x11C
|
||||
#define P_IND 0x11D
|
||||
#define P_NOP 0x11E
|
||||
#define P_VSUM 0x11F
|
||||
#define P_VPROD 0x120
|
||||
#define P_VBOR 0x121
|
||||
#define P_VBAND 0x122
|
||||
#define P_VBXOR 0x123
|
||||
#define P_VOR 0x124
|
||||
#define P_VAND 0x125
|
||||
#define P_VXOR 0x126
|
||||
#define P_VMIN 0x127
|
||||
#define P_VMAX 0x128
|
||||
#define P_CADD 0x129
|
||||
#define P_CSUB 0x12A
|
||||
#define P_CMUL 0x12B
|
||||
#define P_CDIV 0x12C
|
||||
#define P_CSHL 0x12D
|
||||
#define P_CSHR 0x12E
|
||||
#define P_CSAR 0x12F
|
||||
#define P_CBAND 0x130
|
||||
#define P_CBOR 0x131
|
||||
#define P_CBXOR 0x132
|
||||
#define P_CAND 0x133
|
||||
#define P_COR 0x134
|
||||
#define P_CXOR 0x135
|
||||
#define P_CMIN 0x136
|
||||
#define P_CMAX 0x137
|
||||
#define P_MIN 0x138
|
||||
#define P_MAX 0x139
|
||||
#define P_PROBE 0x13A
|
||||
#define P_BITS 0x13B
|
||||
|
||||
// register types
|
||||
#define R_VGPR 0x00000
|
||||
#define R_OFF 0x04000
|
||||
#define R_SNAME 0x06000
|
||||
#define R_INTERP 0x08000
|
||||
#define R_SPEC 0x0A000
|
||||
#define R_SGPR 0x0C000
|
||||
#define R_EXPBUF 0x0E000
|
||||
#define R_TMASK 0x1E000
|
||||
|
||||
// magic values for R_SPEC
|
||||
#define R_P_CL 3 // used internally only (inline literal)
|
||||
#define R_P_CI_L 0xDB // used internally only
|
||||
#define R_P_LDX_L 0xDB // any LDS inline
|
||||
#define R_P_LDS_L 0xDF // direct LDS inline
|
||||
#define R_P_LDS_H 0xE0
|
||||
#define R_P_LDX_H 0xE0
|
||||
#define R_P_CI_S 0xF3 // end of new R8xx constants
|
||||
#define R_P_CI_H 0xFC
|
||||
#define R_P_NOTLAST 0xFF// notlast operand for export
|
||||
|
||||
// magic values for R_SNAME
|
||||
#define R_S_SCRATCH 1
|
||||
#define R_S_PSVS_STATE 2
|
||||
#define R_S_SO_WRITE_INDEX 3
|
||||
#define R_S_SO_BASE_OFFSET0 4
|
||||
#define R_S_SO_BASE_OFFSET1 5
|
||||
#define R_S_SO_BASE_OFFSET2 6
|
||||
#define R_S_SO_BASE_OFFSET3 7
|
||||
#define R_S_OFFCHIP_LDS 8
|
||||
#define R_S_IS_OFFCHIP 9
|
||||
#define R_S_RING_OFFSET 10
|
||||
#define R_S_GS_WAVE_ID 11
|
||||
#define R_S_TG_SIZE 12
|
||||
#define R_S_TF_BASE 13
|
||||
#define R_S_TGID_X 14
|
||||
#define R_S_TGID_Y 15
|
||||
#define R_S_TGID_Z 16
|
||||
#define R_S_WAVE_CNT 17
|
||||
#define R_S_GLOBAL_WAVE_ID 18
|
||||
|
||||
// register components
|
||||
#define R_CMASK 0x1C00
|
||||
#define R_CSHIFT 10
|
||||
#define R_CX 0x0000
|
||||
#define R_CY 0x0400
|
||||
#define R_CZ 0x0800
|
||||
#define R_CW 0x0C00
|
||||
#define R_CS 0x1000 // used to identify scalar elements
|
||||
#define R_CN 0x1800
|
||||
|
||||
#define R_IMASK 0x03FF
|
||||
|
||||
// source transforms
|
||||
#define R_NEG 0x80000
|
||||
#define R_ABS 0x100000
|
||||
#define R_SEXT 0x200000
|
||||
|
||||
// subencodings for export targets
|
||||
|
||||
#define R_E_TMASK 0x0380
|
||||
#define R_E_MRT 0x0000
|
||||
#define R_E_Z 0x0080
|
||||
#define R_E_POS 0x0100
|
||||
#define R_E_PARAM 0x0180
|
||||
#define R_E_ATTR 0x0280
|
||||
#define R_E_NULL 0x0300
|
||||
|
||||
#define R_E_IMASK 0x007F
|
||||
|
||||
// subencodings for interp
|
||||
|
||||
#define R_I_TMASK 0x0380
|
||||
#define R_I_P10 0x0000
|
||||
#define R_I_P20 0x0080
|
||||
#define R_I_P0 0x0100
|
||||
|
||||
// function parameters
|
||||
#define F_CANY 0x00000000
|
||||
#define F_CNUM 0x01000000
|
||||
#define F_CREG 0x02000000
|
||||
#define F_CTMP 0x03000000
|
||||
#define F_CFPTR 0x04000000
|
||||
#define F_CINT 0x05000000
|
||||
#define F_CMASK 0x07000000
|
||||
#define F_OPT 0x40000000
|
||||
#define F_VEC 0x80000000
|
||||
|
||||
typedef struct pnode {
|
||||
struct pnode *gc_next;
|
||||
int gc_mark;
|
||||
int type;
|
||||
int et; // error reporting tag
|
||||
int ni; // number of items
|
||||
union pnode_item {
|
||||
int num; // integer
|
||||
float flt; // float
|
||||
char *str; // string
|
||||
struct pnode *ptr; // tree item
|
||||
struct {
|
||||
struct pnode *v;
|
||||
int e;
|
||||
} ve; // variable-element pair
|
||||
struct {
|
||||
int p;
|
||||
char *n;
|
||||
} var; // variable (stack offset, name)
|
||||
struct sp3_shader *sh;
|
||||
unsigned int reg; // register components
|
||||
struct pnode *(* nf)(Sp, struct pnode **); // native function
|
||||
} i[1];
|
||||
} pnode;
|
||||
|
||||
pnode *p_str(Sp, char *s); // wrap a string
|
||||
pnode *p_float(Sp, float f); // wrap a float
|
||||
pnode *p_num(Sp, int i); // wrap an integer
|
||||
pnode *p_vec(Sp, int type, int len); // create a vector
|
||||
pnode *p_list(Sp, pnode *list, pnode *item); // append item to P_LIST
|
||||
pnode *p_list_rev(Sp, pnode *list); // reverse the order of the list
|
||||
pnode *p_tree(Sp, int type, int nitems, ...); // create a tree node
|
||||
pnode *p_l2t(Sp, int type, pnode *list); // list to tree
|
||||
pnode *p_l2v(Sp, int type, pnode *list); // list to vector
|
||||
pnode *p_x2x(Sp, int type, pnode *p); // cast to type
|
||||
pnode *p_clause(Sp, int vstk, int lstk, pnode *parlist, pnode *instlist, int type);
|
||||
pnode *p_reg(Sp, int type, int idx); // wrap a register
|
||||
pnode *p_swizzle(Sp, char *str); // parse a swizzle string
|
||||
pnode *p_lv2rv(Sp, pnode *lval); // lvalue to rvalue
|
||||
pnode *p_newlabel(Sp, pnode *t, int tag); // define new label
|
||||
pnode *p_label(Sp, int cnt); // fill with label IDs
|
||||
pnode *p_clone(Sp, pnode *src);
|
||||
|
||||
void print_node(pnode *); // print to stdout
|
||||
|
||||
void mark_gc_storage(Sp); // mark all internal storage of sp3 for gc
|
||||
|
||||
// functions provided by machine driver
|
||||
int is_opcode(struct sp3_state *S, const char *name); // is an opcode (any)
|
||||
int is_opcode_0arg(struct sp3_state *S, const char *name); // is an opcode (0-argument)
|
||||
int is_opcode_call(struct sp3_state *S, const char *name); // is a call op (1st argument is a closure)
|
||||
void sp3_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
|
||||
void sp3_si_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
|
||||
void sp3_ci_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
|
||||
void sp3_gfx8_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
|
||||
pnode *machine_const(Sp, char *name); // if a machine const, parse it (else NULL)
|
||||
void mark_label(Sp, int li); // "label:"
|
||||
pnode *asic_getcap(Sp, int id); // get ASIC capability #id
|
||||
void mach_cleanup(Sp); // initialize generator state
|
||||
|
||||
// name trees
|
||||
|
||||
#define NT_SEARCH 0
|
||||
#define NT_ADD 1
|
||||
#define NT_ADD_ONLY 2
|
||||
#define NT_ADD_STRDUP 4
|
||||
struct name_tree {
|
||||
const char *name;
|
||||
int tag;
|
||||
int add;
|
||||
struct name_tree *l, *r;
|
||||
};
|
||||
|
||||
struct name_tree *name_tree_operation(struct name_tree **t, const char *name, int tag, int add);
|
||||
void name_tree_delete(struct name_tree **t);
|
||||
|
||||
// symbol table
|
||||
|
||||
void f_decl(Sp, char *, pnode *);
|
||||
pnode *f_ref(Sp, char *);
|
||||
void f_check(Sp);
|
||||
pnode *f_call(Sp, const char *);
|
||||
|
||||
void f_decl_native(Sp, int, char *, pnode *(*)(Sp, pnode **), int, ...);
|
||||
|
||||
// parse-time variable stack
|
||||
|
||||
void vs_decl(Sp, const char *, int tag);
|
||||
int vs_lookup(Sp, const char *, pnode **, int);
|
||||
char *vs_getname(pnode *);
|
||||
|
||||
void vs_enter_func(Sp);
|
||||
int vs_leave_func(Sp, int *); // returns number of stack allocations &
|
||||
// (through param) number of lstack allocs
|
||||
void vs_enter_block(Sp);
|
||||
void vs_leave_block(Sp);
|
||||
|
||||
int vs_get_topmax(Sp); // returns number of stack allocation for top level
|
||||
|
||||
// runtime variable stack
|
||||
|
||||
void rv_set(Sp, pnode *, pnode *);
|
||||
pnode *rv_get(Sp, pnode *);
|
||||
void rv_alloc(Sp, int);
|
||||
void rv_setpar(Sp, int, pnode *);
|
||||
int rv_enter(Sp, int);
|
||||
void rv_leave(Sp, int);
|
||||
|
||||
int rl_enter(Sp, int);
|
||||
void rl_leave(Sp, int);
|
||||
|
||||
void rv_leave_native(Sp);
|
||||
pnode **rv_getpar_native(Sp);
|
||||
|
||||
// all-in-one variable setter
|
||||
|
||||
void rv_set_by_name(Sp, const char *, pnode *);
|
||||
|
||||
// growable binary buffer
|
||||
|
||||
typedef struct grow_buf {
|
||||
int n, size;
|
||||
unsigned i[1];
|
||||
} grow_buf;
|
||||
|
||||
grow_buf *gb_alloc(int);
|
||||
grow_buf *gb_append(grow_buf *, int, unsigned *);
|
||||
grow_buf *gb_add(grow_buf *, unsigned);
|
||||
grow_buf *gb_reg(grow_buf *, unsigned, unsigned);
|
||||
|
||||
// clause contents
|
||||
|
||||
struct clause_info {
|
||||
unsigned base;
|
||||
grow_buf *data;
|
||||
int type;
|
||||
};
|
||||
|
||||
void start_clause(Sp, int);
|
||||
void cb_emit(Sp, unsigned *, int);
|
||||
int cb_ptr(Sp);
|
||||
void cb_patch(Sp, int, int, unsigned);
|
||||
|
||||
int remap_clauses(Sp);
|
||||
|
||||
struct sp3_shader *gen_output(Sp);
|
||||
void convert_relocs(Sp);
|
||||
void perform_relocs(Sp);
|
||||
|
||||
pnode *shader_clos(Sp, pnode *); // call this to get a binary shader from closure
|
||||
pnode *shader_name(Sp, const char *); // call this to get a binary shader from name
|
||||
|
||||
void set_const(Sp, int idx, unsigned val);
|
||||
int find_const(Sp, unsigned val);
|
||||
|
||||
void set_kbuf(Sp, int kbuf, int idx, unsigned val);
|
||||
|
||||
const char *asic_name(Sp);
|
||||
int asic_id(Sp);
|
||||
int asic_capbyname(int, const char *);
|
||||
int asic_capbyid(int, int);
|
||||
|
||||
// register stream packer
|
||||
int sp3_guess_shader_type(struct sp3_state *S, struct sp3_shader *sh);
|
||||
int sp3_si_guess_shader_type(struct sp3_shader *sh);
|
||||
int sp3_ci_guess_shader_type(struct sp3_shader *sh);
|
||||
int sp3_gfx8_guess_shader_type(struct sp3_shader *sh);
|
||||
void sp3_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
|
||||
void sp3_si_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
|
||||
void sp3_ci_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
|
||||
void sp3_gfx8_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
|
||||
void unpack_reg_stream(Sp, struct sp3_shader *sh);
|
||||
|
||||
// instances
|
||||
|
||||
int new_instance(Sp, pnode *, int);
|
||||
void eval_instances(Sp);
|
||||
int get_instance_clause(Sp, int);
|
||||
int get_instance_type(Sp, int);
|
||||
|
||||
// error reporting
|
||||
|
||||
void et_parse_mode(Sp, int);
|
||||
int et_get_id(Sp);
|
||||
#ifdef _MSC_VER
|
||||
__declspec(noreturn)
|
||||
#endif
|
||||
void et_error(Sp, char *, char *, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__ ((__noreturn__))
|
||||
__attribute__ ((format(printf, 3, 4)))
|
||||
#endif
|
||||
;
|
||||
void et_warning(Sp, char *, char *, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__ ((format(printf, 3, 4)))
|
||||
#endif
|
||||
;
|
||||
void et_blame(Sp, pnode *);
|
||||
void et_blame_et(Sp, int);
|
||||
void et_print(Sp, pnode *);
|
||||
int et_get_blame(Sp);
|
||||
|
||||
// text buffer for disasm
|
||||
void bprintf(Sp, char *, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__ ((format(printf, 2, 3)))
|
||||
#endif
|
||||
;
|
||||
void bcmt(Sp, const char *cmt, const char *start, const char *line, const char *end);
|
||||
void btab(Sp, int);
|
||||
char *bget(Sp);
|
||||
|
||||
// state structure
|
||||
struct sp3_state {
|
||||
// flex
|
||||
void *scanner;
|
||||
void *yystate;
|
||||
|
||||
char *yyfile;
|
||||
int yyline;
|
||||
|
||||
// sp3-gc
|
||||
struct sp3_gc_state *gc;
|
||||
|
||||
// asic private
|
||||
struct sp3_asic_state *ap;
|
||||
|
||||
// sp3-eval
|
||||
int retflag;
|
||||
pnode *retval;
|
||||
|
||||
// sp3-int
|
||||
struct sp3_shader config;
|
||||
|
||||
int clause_id; // counts up during evaluation
|
||||
int clause_type;
|
||||
struct clause_info *clauses;
|
||||
int nclauses, sclauses;
|
||||
|
||||
int memsize, ctsizes[4];
|
||||
int in_shader;
|
||||
|
||||
char *disasm_text;
|
||||
int disasm_column;
|
||||
int disasm_len, disasm_maxlen;
|
||||
|
||||
sp3_vma *comment_map;
|
||||
void *comment_ctx;
|
||||
sp3_comment_cb comment_top, comment_right;
|
||||
|
||||
unsigned const_buf[1024];
|
||||
int const_vld[1024], const_vld_range;
|
||||
|
||||
unsigned *kval[16];
|
||||
int knum[16];
|
||||
|
||||
struct et_record {
|
||||
const char *file;
|
||||
int line;
|
||||
} *et_names;
|
||||
int et_node;
|
||||
int et_parsing;
|
||||
int net_names, set_names;
|
||||
|
||||
char *fname_last;
|
||||
struct name_tree *fnames;
|
||||
struct fsym {
|
||||
char *name;
|
||||
pnode *func;
|
||||
struct fref *refs;
|
||||
struct fsym *l, *r;
|
||||
} *fsymbols;
|
||||
int func_id; // counts up during parsing
|
||||
|
||||
struct instance {
|
||||
int type;
|
||||
int clause_id;
|
||||
pnode *call;
|
||||
} *instances;
|
||||
int ninstances, sinstances;
|
||||
|
||||
struct vstack {
|
||||
char *name;
|
||||
int tag;
|
||||
int vs_sp, vs_level;
|
||||
struct vstack *next;
|
||||
} *var_stack, *lbl_stack;
|
||||
int vs_max, vs_sp, vs_top, vs_topmax;
|
||||
int ls_max, ls_sp;
|
||||
|
||||
pnode **rl_stack;
|
||||
int rl_sp, rl_ss, rl_base, rl_id, rl_size;
|
||||
|
||||
pnode **rv_stack;
|
||||
int rv_sp, rv_ss, rv_base, rv_size;
|
||||
|
||||
int werror, wcount;
|
||||
const char *err_hdr;
|
||||
|
||||
unsigned entry_point_table_size;
|
||||
unsigned entry_point_table_alloc_size;
|
||||
sp3_vmaddr *entry_point_table;
|
||||
};
|
||||
struct sp3_state *sp3_new_state(void);
|
||||
void sp3_asic_attach_state(Sp);
|
||||
void sp3_new_parser(Sp);
|
||||
void sp3_free_parser(Sp);
|
||||
void sp3_free_state(Sp);
|
||||
|
||||
void reg_natives(Sp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,137 +0,0 @@
|
||||
//=====================================================================
|
||||
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
/// \author AMD Developer Tools Team
|
||||
/// \file
|
||||
///
|
||||
//=====================================================================
|
||||
|
||||
#ifndef SP3_TYPE_H
|
||||
#define SP3_TYPE_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/// @file sp3-type.h
|
||||
/// @brief sp3 types
|
||||
|
||||
enum sp3_shtype {
|
||||
SP3_SHTYPE_NONE = -1,
|
||||
SP3_SHTYPE_PS = 0,
|
||||
SP3_SHTYPE_VS = 1,
|
||||
SP3_SHTYPE_GS = 2,
|
||||
SP3_SHTYPE_ES = 3,
|
||||
SP3_SHTYPE_HS = 4,
|
||||
SP3_SHTYPE_LS = 5,
|
||||
SP3_SHTYPE_CS = 6,
|
||||
};
|
||||
|
||||
enum sp3_count {
|
||||
SP3_NUM_MRT = 8,
|
||||
SP3_NUM_STRM = 4,
|
||||
};
|
||||
|
||||
enum sp3_flag {
|
||||
SP3DIS_NO_STATE = 0x01,
|
||||
SP3DIS_NO_BINARY = 0x02,
|
||||
SP3DIS_COMMENTS = 0x04,
|
||||
SP3DIS_NO_GPR_COUNT = 0x08,
|
||||
SP3DIS_FORCEVALID = 0x10,
|
||||
SP3DIS_NO_ASIC = 0x20,
|
||||
};
|
||||
|
||||
/// @brief Shader context. Contains no user-visible fields.
|
||||
struct sp3_context;
|
||||
|
||||
/// @brief Storage entry for register streams.
|
||||
struct sp3_reg {
|
||||
unsigned index; ///< One of the mm* values from chip_enum.h.
|
||||
unsigned value;
|
||||
};
|
||||
|
||||
/// @brief Wrapped shader metadata.
|
||||
///
|
||||
/// After generation, shaders are encapsulated in sp3_shader structures.
|
||||
///
|
||||
/// Those structures contain the shader binary, its register stream,
|
||||
/// constants and constant buffers and metadata needed for SC compatibility.
|
||||
struct sp3_shader {
|
||||
int type; ///< One of the SHTYPE_* constants.
|
||||
int asic_int; ///< Internal ASIC index. Do not use.
|
||||
const char *asic; ///< ASIC name as a string ("RV870" etc).
|
||||
unsigned size; ///< Size of the compiled shader, in 32-bit words.
|
||||
unsigned nsgprs; ///< Number of scalar GPRs used.
|
||||
unsigned nvgprs; ///< Number of vector GPRs used.
|
||||
unsigned trap_present;
|
||||
unsigned user_sgpr_count;
|
||||
unsigned scratch_en;
|
||||
unsigned dispatch_draw_en;
|
||||
unsigned so_en;
|
||||
unsigned so_base0_en;
|
||||
unsigned so_base1_en;
|
||||
unsigned so_base2_en;
|
||||
unsigned so_base3_en;
|
||||
unsigned oc_lds_en;
|
||||
unsigned tg_size_en;
|
||||
unsigned tidig_comp_cnt; ///< Number of components(-1) enabled for thread id in group
|
||||
unsigned tgid_x_en;
|
||||
unsigned tgid_y_en;
|
||||
unsigned tgid_z_en;
|
||||
unsigned wave_cnt_en;
|
||||
unsigned sgpr_scratch;
|
||||
unsigned sgpr_psvs_state;
|
||||
unsigned sgpr_so_write_index;
|
||||
unsigned sgpr_so_base_offset0;
|
||||
unsigned sgpr_so_base_offset1;
|
||||
unsigned sgpr_so_base_offset2;
|
||||
unsigned sgpr_so_base_offset3;
|
||||
unsigned sgpr_offchip_lds;
|
||||
unsigned sgpr_is_offchip;
|
||||
unsigned sgpr_ring_offset;
|
||||
unsigned sgpr_gs_wave_id;
|
||||
unsigned sgpr_global_wave_id;
|
||||
unsigned sgpr_tg_size;
|
||||
unsigned sgpr_tgid_x;
|
||||
unsigned sgpr_tgid_y;
|
||||
unsigned sgpr_tgid_z;
|
||||
unsigned sgpr_tf_base;
|
||||
unsigned sgpr_wave_cnt;
|
||||
unsigned pc_exports; ///< Range of parameters exported (if VS).
|
||||
unsigned pos_export; ///< Shader executes a position export (if VS).
|
||||
unsigned cb_exports; ///< Range of MRTs exported (if PS).
|
||||
unsigned mrtz_export_format; ///< Export format of the mrtz export.
|
||||
unsigned z_export; ///< Shader executes a Z export (if PS).
|
||||
unsigned pops_en; ///< Shader is POPS (PS)
|
||||
unsigned load_collision_waveid; ///< Shader sets load collision waveid (if PS).
|
||||
unsigned stencil_test_export; ///< Shader exports stencil (if PS).
|
||||
unsigned stencil_op_export; ///< Shader exports stencil (if PS).
|
||||
unsigned kill_used; ///< Shader executes ALU KILL operations.
|
||||
unsigned cb_masks[SP3_NUM_MRT]; ///< Component masks for each MRT exported (if PS).
|
||||
unsigned emit_used; ///< EMIT opcodes used (if GS).
|
||||
unsigned covmask_export; ///< Shader exports coverage mask (if PS).
|
||||
unsigned mask_export; ///< Shader exports mask (if PS).
|
||||
unsigned strm_used[SP3_NUM_STRM]; ///< Streamout operations used (map).
|
||||
unsigned scratch_used; ///< Scratch SMX exports used.
|
||||
unsigned scratch_itemsize; ///< Scratch ring item size.
|
||||
unsigned reduction_used; ///< Reduction SMX exports used.
|
||||
unsigned ring_used; ///< ESGS/GSVS ring SMX exports used.
|
||||
unsigned ring_itemsize; ///< ESGS/GSVS ring item size (for ES/GS respectively).
|
||||
unsigned vertex_size[4]; ///< GSVS ring vertex size (for GS).
|
||||
unsigned mem_used; ///< Raw memory SMX exports used.
|
||||
unsigned rats_used; ///< Mask of RATs (UAVs) used
|
||||
unsigned group_size[3]; ///< Wavefront group size (for ELF files).
|
||||
unsigned alloc_lds; ///< Number of LDS bytes allocated for wave group. (translates to lds_size in CS and LS)
|
||||
unsigned *data; ///< Shader binary data.
|
||||
unsigned nregs; ///< Number of register writes in the stream.
|
||||
struct sp3_reg *regs; ///< Register writes (index-value pairs).
|
||||
};
|
||||
|
||||
/// @brief Comment callback.
|
||||
typedef const char *(*sp3_comment_cb)(void *, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,119 +0,0 @@
|
||||
//=====================================================================
|
||||
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
/// \author AMD Developer Tools Team
|
||||
/// \file
|
||||
///
|
||||
//=====================================================================
|
||||
|
||||
#ifndef SP3_VM_H
|
||||
#define SP3_VM_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined (WIN_OS) && !defined(SP3_STATIC_LIB)
|
||||
#if defined(DLL_EXPORT_SP3)
|
||||
#define SP3_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define SP3_EXPORT __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define SP3_EXPORT
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
typedef __int32 int32_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#else
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
|
||||
struct sp3_vma;
|
||||
|
||||
/// @file sp3-vm.h
|
||||
/// @brief sp3 VM API
|
||||
///
|
||||
/// The VM API is used to manage virtual memory maps. Those maps are
|
||||
/// used for binary storage for disassembly, as they can naturally
|
||||
/// mirror the GPU's memory map (so no register translation is needed).
|
||||
|
||||
#define SP3_VM_PAGESIZE 64
|
||||
|
||||
/// @brief VM addresses are 64-bit and the address unit is 32 bits
|
||||
///
|
||||
typedef uint64_t sp3_vmaddr;
|
||||
|
||||
/// @brief Callback function that will fill a VMA on demand
|
||||
///
|
||||
/// The VMA to be filled will be specified through the request address.
|
||||
/// The callback should fill the VMA using sp3_vm_write calls.
|
||||
typedef void (* sp3_vmfill)(struct sp3_vma *vm, sp3_vmaddr addr, void *ctx);
|
||||
|
||||
/// @brief VM area
|
||||
///
|
||||
/// VMAs are kept in a sorted list
|
||||
typedef struct sp3_vma {
|
||||
sp3_vmaddr base, len;
|
||||
sp3_vmfill fill;
|
||||
void *fill_ctx;
|
||||
uint32_t *data;
|
||||
struct sp3_vma *prev, *next;
|
||||
} sp3_vma;
|
||||
|
||||
/// @brief Create a new VM that is empty.
|
||||
///
|
||||
SP3_EXPORT
|
||||
sp3_vma *sp3_vm_new(void);
|
||||
|
||||
/// @brief Create a new VM that has a sp3_vmfill callback.
|
||||
///
|
||||
SP3_EXPORT
|
||||
sp3_vma *sp3_vm_new_fill(sp3_vmfill fill, void *ctx);
|
||||
|
||||
/// @brief Create a new VM from an array of words.
|
||||
/// @param base VM address to load array at.
|
||||
/// @param len Number of 32-bit words in the array.
|
||||
/// @param data Pointer to the array.
|
||||
///
|
||||
SP3_EXPORT
|
||||
sp3_vma *sp3_vm_new_ptr(sp3_vmaddr base, sp3_vmaddr len, const uint32_t *data);
|
||||
|
||||
/// @brief Find a VMA, optionally adding it.
|
||||
/// @param vm VM to search in.
|
||||
/// @param addr Address to search for.
|
||||
/// @param add Flag indicating whether a failure should result in adding a new VMA.
|
||||
///
|
||||
SP3_EXPORT
|
||||
sp3_vma *sp3_vm_find(sp3_vma *vm, sp3_vmaddr addr, int add);
|
||||
|
||||
/// @brief Write a word to a VM.
|
||||
///
|
||||
SP3_EXPORT
|
||||
void sp3_vm_write(sp3_vma *vm, sp3_vmaddr addr, uint32_t val);
|
||||
|
||||
/// @brief Read a word from a VM.
|
||||
///
|
||||
SP3_EXPORT
|
||||
uint32_t sp3_vm_read(sp3_vma *vm, sp3_vmaddr addr);
|
||||
|
||||
/// @brief Probe VM for presence.
|
||||
/// @return 1 if the specified address is backed in the VM, 0 otherwise.
|
||||
///
|
||||
SP3_EXPORT
|
||||
int sp3_vm_present(sp3_vma *vm, sp3_vmaddr addr);
|
||||
|
||||
/// @brief Free a VM and all its storage.
|
||||
///
|
||||
SP3_EXPORT
|
||||
void sp3_vm_free(sp3_vma *vm);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,198 +0,0 @@
|
||||
//=====================================================================
|
||||
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
/// \author AMD Developer Tools Team
|
||||
/// \file
|
||||
///
|
||||
//=====================================================================
|
||||
|
||||
#ifndef SP3_H
|
||||
#define SP3_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "sp3-vm.h"
|
||||
#include "sp3-type.h"
|
||||
|
||||
/// @file sp3.h
|
||||
/// @brief sp3 API
|
||||
|
||||
/// @brief Get version of the sp3 library.
|
||||
///
|
||||
/// @return String containing the version number.
|
||||
///
|
||||
SP3_EXPORT const char *sp3_version(void);
|
||||
|
||||
/// @brief Create a new sp3 context.
|
||||
///
|
||||
SP3_EXPORT struct sp3_context *sp3_new(void);
|
||||
|
||||
/// @brief Set option for sp3.
|
||||
///
|
||||
/// @param state sp3 context.
|
||||
/// @param option Option name. Unknown options will raise an error.
|
||||
/// @param value Option value. NULL is used to represent value-less options.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_option(struct sp3_context *state, const char *option, const char *value);
|
||||
|
||||
/// @brief Parse a file into a context.
|
||||
///
|
||||
/// If 'file' is NULL, parse stdin.
|
||||
///
|
||||
SP3_EXPORT void sp3_parse_file(struct sp3_context *state, const char *file);
|
||||
|
||||
/// @brief Parse a string into a context.
|
||||
///
|
||||
SP3_EXPORT void sp3_parse_string(struct sp3_context *state, const char *string);
|
||||
|
||||
/// @brief Parse a file from the standard library into a context.
|
||||
///
|
||||
SP3_EXPORT void sp3_parse_library(struct sp3_context *state, const char *name);
|
||||
|
||||
/// @brief Call a sp3 function.
|
||||
///
|
||||
SP3_EXPORT void sp3_call(struct sp3_context *state, const char *func);
|
||||
|
||||
/// @brief Call a sp3 CF clause.
|
||||
///
|
||||
/// @param state sp3 context.
|
||||
/// @param cffunc Name of clause to call. By convention, this is "main".
|
||||
///
|
||||
/// @return A compiled and linked shader. Free memory with sp3_free().
|
||||
///
|
||||
SP3_EXPORT struct sp3_shader *sp3_compile(struct sp3_context *state, const char *cffunc);
|
||||
|
||||
/// @brief Free a sp3_shader.
|
||||
///
|
||||
SP3_EXPORT void sp3_free_shader(struct sp3_shader *sh);
|
||||
|
||||
/// @brief Get current ASIC name set for a context.
|
||||
///
|
||||
SP3_EXPORT const char *sp3_getasic(struct sp3_context *state);
|
||||
|
||||
/// @brief Set current ASIC name for a context.
|
||||
///
|
||||
SP3_EXPORT void sp3_setasic(struct sp3_context *state, const char *chip);
|
||||
|
||||
/// @brief Set global variable in context to an integer.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_int(struct sp3_context *state, const char *name, int value);
|
||||
|
||||
/// @brief Set global variable in context to an integer vector.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_intvec(struct sp3_context *state, const char *name, int size, const int *value);
|
||||
|
||||
/// @brief Set global variable in context to a float.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_float(struct sp3_context *state, const char *name, float value);
|
||||
|
||||
/// @brief Set global variable in context to a float vector.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_floatvec(struct sp3_context *state, const char *name, int size, const float *value);
|
||||
|
||||
/// @brief Set error message header.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_error_header(struct sp3_context *state, const char *str);
|
||||
|
||||
/// @brief Get ASIC metrics for the ASIC in current state.
|
||||
///
|
||||
/// Used by ELF tools to fill in some CAL fields.
|
||||
///
|
||||
SP3_EXPORT int sp3_asicinfo(struct sp3_context *state, const char *name);
|
||||
|
||||
/// @brief Free a context allocated by sp3_new/open/parse.
|
||||
///
|
||||
SP3_EXPORT void sp3_close(struct sp3_context *state);
|
||||
|
||||
/// @brief Disassemble a shader.
|
||||
///
|
||||
/// This call is likely to change to something that will take a filled sp3_shader structure later on.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
/// @param bin Memory map with the opcodes (see sp3-vm.h).
|
||||
/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words).
|
||||
/// @param name Same to give the disassembled shader.
|
||||
/// @param shader_type One of the SHTYPE_* constants.
|
||||
/// @param include Literal text to include in the CF clause (NULL includes nothing).
|
||||
/// @param max_len Maximum length of CF clause. Matters if SP3DIS_FORCEVALID is set.
|
||||
/// @param flags A mask of SP3DIS_* flags.
|
||||
///
|
||||
/// @return Shader disassembly as a string (allocated with malloc()). Free memory with sp3_free().
|
||||
///
|
||||
SP3_EXPORT char *sp3_disasm(struct sp3_context *state, sp3_vma *bin, sp3_vmaddr base, const char *name, int shader_type, const char *include, unsigned max_len, unsigned flags);
|
||||
|
||||
/// @brief Disassemble a single shader instruction.
|
||||
///
|
||||
/// This call is likely to change to something that will take a filled sp3_shader structure later on.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
/// @param inst Pointer to dwords containing instruction (exact number of dwords required depends on instruction).
|
||||
/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words).
|
||||
/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words).
|
||||
/// @param shader_type One of the SHTYPE_* constants.
|
||||
/// @param flags A mask of SP3DIS_* flags.
|
||||
///
|
||||
/// @return Shader disassembly as a string (allocated with malloc()). Free memory with sp3_free().
|
||||
///
|
||||
SP3_EXPORT char *sp3_disasm_inst(struct sp3_context *state, const unsigned inst[2], sp3_vmaddr base, sp3_vmaddr addr, int shader_type, unsigned flags);
|
||||
|
||||
/// @brief Parse a register stream.
|
||||
///
|
||||
/// Can be called before sp3_disasm to preset things like ALU, boolean and loop constants.
|
||||
///
|
||||
/// This call is likely to merge with sp3_disasm later on.
|
||||
///
|
||||
/// @param state sp3 context to fill with state.
|
||||
/// @param nregs Number of register entries.
|
||||
/// @param regs Register stream to parse.
|
||||
/// @param shader_type One of the SHTYPE_* constants.
|
||||
///
|
||||
SP3_EXPORT void sp3_setregs(struct sp3_context *state, unsigned nregs, const struct sp3_reg *regs, int shader_type);
|
||||
|
||||
|
||||
/// @brief Set shader comments
|
||||
///
|
||||
/// @param state sp3 context.
|
||||
/// @param map Map of comments (0 for no comment, other values will be passed to the callback).
|
||||
/// @param f_top Callback returning comment to place above the opcode.
|
||||
/// @param f_right Callback returning comment to place to the right of the opcode.
|
||||
/// @param ctx Void pointer to pass to comment callbacks.
|
||||
///
|
||||
SP3_EXPORT void sp3_setcomments(struct sp3_context *state, sp3_vma *map, sp3_comment_cb f_top, sp3_comment_cb f_right, void *ctx);
|
||||
|
||||
/// @brief Set alternate shader entry points
|
||||
///
|
||||
/// Used for disassembly; this marks an additional location in memory
|
||||
/// (besides the start address) where shader code may be found. Generally
|
||||
/// required for jump tables and any case where the shader may perform
|
||||
/// indirect jumps to ensure that disassembly locates all shader
|
||||
/// instructions.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words).
|
||||
///
|
||||
SP3_EXPORT void sp3_setentrypoint(struct sp3_context *state, sp3_vmaddr addr);
|
||||
|
||||
/// @brief Clear alternate shader entry points
|
||||
///
|
||||
/// Clear all entry points previously set with sp3_setentrypoint.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
///
|
||||
SP3_EXPORT void sp3_clearentrypoints(struct sp3_context *state);
|
||||
|
||||
/// @brief Free memory allocated by sp3.
|
||||
///
|
||||
/// Windows DLLs that allocate memory have to free it. This function
|
||||
/// should be used to free the result of sp3_disasm, sp3_compile etc.
|
||||
///
|
||||
SP3_EXPORT void sp3_free(void *ptr);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
Referência em uma Nova Issue
Bloquear um usuário