diff --git a/projects/hip/CMakeLists.txt b/projects/hip/CMakeLists.txt index b56c47af30..9c64390d18 100644 --- a/projects/hip/CMakeLists.txt +++ b/projects/hip/CMakeLists.txt @@ -1,5 +1,15 @@ cmake_minimum_required(VERSION 3.4.3) project(hip) +# sample command for hip-vdi, you'll need to have vdi installed +# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi .. +# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi -DVDI_DIR=/extra/lmoriche/hip-vdi/vdi -DOPENCL_DIR=/extra/lmoriche/clients/lmoriche_opencl_dev2/drivers/opencl/api/opencl -DLIBVDI_STATIC_DIR=/extra/lmoriche/hip-vdi/build/vdi .. + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + +############################# +# Options +############################# +option(BUILD_HIPIFY_CLANG "Enable building the CUDA->HIP converter" OFF) ############################# # Setup config generation @@ -100,12 +110,19 @@ add_to_config(_buildInfo HIP_COMPILER) # Determine HIP_RUNTIME # Either HCC or VDI; default is HCC if(NOT DEFINED ENV{HIP_RUNTIME}) + if(HIP_PLATFORM STREQUAL "hcc") set(HIP_RUNTIME "HCC" CACHE STRING "HIP Runtime") -else() - set(HIP_RUNTIME $ENV{HIP_RUNTIME} CACHE STRING "HIP Runtime") + elseif (HIP_PLATFORM STREQUAL "vdi") + set(HIP_RUNTIME "VDI" CACHE STRING "HIP Runtime") + elseif (HIP_PLATFORM STREQUAL "nvcc") + set(HIP_RUNTIME "CUDA" CACHE STRING "HIP Runtime") + endif() endif() add_to_config(_buildInfo HIP_RUNTIME) +if(HIP_PLATFORM STREQUAL "vdi") + set(USE_PROF_API "1") +endif() # If HIP_PLATFORM is hcc, we need HCC_HOME and HSA_PATH to be defined if(HIP_PLATFORM STREQUAL "hcc") @@ -190,12 +207,14 @@ message (STATUS "ROCM Installation path(ROCM_PATH): ${ROCM_PATH}") set(CPACK_SET_DESTDIR ON CACHE BOOL "Installer package will install hip to CMAKE_INSTALL_PREFIX instead of CPACK_PACKAGING_INSTALL_PREFIX") if (NOT CPACK_SET_DESTDIR) set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm/hip" CACHE PATH "Default installation path of hcc installer package") -endif (CPACK_SET_DESTDIR) +endif (NOT CPACK_SET_DESTDIR) ############################# # Profiling API support ############################# # Generate profiling API macros/structures header +if(HIP_PLATFORM STREQUAL "hcc") +if(USE_PROF_API EQUAL 1) set(PROF_API_STR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_prof_str.h") set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_runtime_api.h") set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}/src") @@ -207,7 +226,6 @@ execute_process(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}") set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${PROF_API_GEN} ${PROF_API_HDR} ${PROF_API_STR}) # Enable profiling API -if(USE_PROF_API EQUAL 1) find_path(PROF_API_HEADER_DIR prof_protocol.h HINTS ${PROF_API_HEADER_PATH} @@ -224,6 +242,7 @@ if(USE_PROF_API EQUAL 1) MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}") endif() endif() +endif() ############################# # Build steps @@ -233,13 +252,48 @@ set(LIB_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib) set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/include) set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip) +# Build clang hipify if enabled +if (BUILD_HIPIFY_CLANG) + add_subdirectory(hipify-clang) +endif() + # Build LPL an CA (fat binary generation / fat binary decomposition tools) if # platform is hcc; do this before the ugly hijacking of the compiler, since no # HC code is involved. -if (HIP_PLATFORM STREQUAL "hcc") - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca) -endif () +#if (HIP_PLATFORM STREQUAL "hcc") +# add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca) +#endif () +if(HIP_PLATFORM STREQUAL "vdi") + # Determine HSA_PATH + if(NOT DEFINED HSA_PATH) + if(NOT DEFINED ENV{HSA_PATH}) + set(HSA_PATH "/opt/rocm/hsa" CACHE PATH "Path to which HSA runtime has been installed") + else() + set(HSA_PATH $ENV{HSA_PATH} CACHE PATH "Path to which HSA runtime has been installed") + endif() + endif() + if(IS_ABSOLUTE ${HSA_PATH} AND EXISTS ${HSA_PATH} AND IS_DIRECTORY ${HSA_PATH}) + message(STATUS "Looking for HSA runtime in: " ${HSA_PATH}) + else() + message(FATAL_ERROR "Don't know where to find HSA runtime. Please specify absolute path using -DHSA_PATH") + endif() + + include_directories(${PROJECT_SOURCE_DIR}/include) + add_subdirectory(vdi) + file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) + + +# set(VDI_CXX_FLAGS "-hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ") + set(HIP_VDI_BUILD_FLAGS "${HIP_VDI_BUILD_FLAGS} -fPIC ${VDI_CXX_FLAGS} -I${HSA_PATH}/include") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_VDI_BUILD_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_VDI_BUILD_FLAGS}") + set(HCC_CXX_FLAGS "-hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ") + set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC ${HCC_CXX_FLAGS} -I${HSA_PATH}/include") + +endif() + +message(STATUS "\nHSA runtime in: " ${HSA_PATH}) # Build hip_hcc if platform is hcc if(HIP_PLATFORM STREQUAL "hcc") include_directories(${PROJECT_SOURCE_DIR}/include) @@ -300,18 +354,18 @@ if(HIP_PLATFORM STREQUAL "hcc") set_property ( TARGET hip_hcc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" ) set_property ( TARGET hip_hcc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" ) - if(HIP_COMPILER STREQUAL "hcc") - target_link_libraries(hip_hcc PRIVATE hc_am) - target_link_libraries(hip_hcc_static PRIVATE hc_am) + target_link_libraries(hip_hcc PRIVATE hc_am) + target_link_libraries(hip_hcc_static PRIVATE hc_am) + + add_library(hiprtc SHARED src/hiprtc.cpp) + target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY) + set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" ) + set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" ) + + target_include_directories( + hiprtc SYSTEM + PRIVATE ${PROJECT_SOURCE_DIR}/include ${HSA_PATH}/include) - add_library(hiprtc SHARED src/hiprtc.cpp) - target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY) - set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" ) - set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" ) - target_include_directories( - hiprtc SYSTEM - PRIVATE ${PROJECT_SOURCE_DIR}/include ${HSA_PATH}/include) - endif() set_target_properties(hip_hcc PROPERTIES CXX_VISIBILITY_PRESET hidden) set_target_properties(hip_hcc PROPERTIES VISIBILITY_INLINES_HIDDEN 1) set_target_properties(hiprtc PROPERTIES CXX_VISIBILITY_PRESET hidden) @@ -349,6 +403,9 @@ if(HIP_PLATFORM STREQUAL "hcc") file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) endif() +if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "vdi") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca) +endif() # Generate .hipVersion file(WRITE "${PROJECT_BINARY_DIR}/.hipVersion" ${_versionInfo}) @@ -377,13 +434,11 @@ endif() ############################# # Install hip_hcc if platform is hcc if(HIP_PLATFORM STREQUAL "hcc") - if(HIP_COMPILER STREQUAL "hcc") - install(TARGETS hip_hcc_static hip_hcc hiprtc DESTINATION lib) - else() - install(TARGETS hip_hcc_static hip_hcc DESTINATION lib) - endif() + install(TARGETS hip_hcc_static hip_hcc hiprtc DESTINATION lib) +endif() - # Install .hipInfo +# Install .hipInfo +if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "vdi") install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib) endif() @@ -406,6 +461,9 @@ endif() if(HIP_PLATFORM STREQUAL "hcc") install(TARGETS hip_hcc_static hip_hcc host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) +elseif( HIP_PLATFORM STREQUAL "vdi") +# install(TARGETS hip_on_vdi host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) +endif() include(CMakePackageConfigHelpers) configure_package_config_file( @@ -427,13 +485,12 @@ if(HIP_PLATFORM STREQUAL "hcc") DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} ) -endif() ############################# # Packaging steps ############################# # Package: hip_base -set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_base) +set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip-base) configure_file(packaging/hip-base.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) configure_file(packaging/hip-base.postinst ${BUILD_DIR}/postinst @ONLY) configure_file(packaging/hip-base.prerm ${BUILD_DIR}/prerm @ONLY) @@ -447,12 +504,19 @@ add_custom_target(pkg_hip_base COMMAND ${CMAKE_COMMAND} . WORKING_DIRECTORY ${BUILD_DIR} DEPENDS lpl ca) -# Package: hip_hcc -set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_hcc) -configure_file(packaging/hip-hcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) -configure_file(packaging/hip-hcc.postinst ${BUILD_DIR}/postinst @ONLY) -configure_file(packaging/hip-hcc.prerm ${BUILD_DIR}/prerm @ONLY) -add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} . +# Packaging needs to wait for hipify-clang to build if it's enabled... +if (BUILD_HIPIFY_CLANG) + add_dependencies(pkg_hip_base hipify-clang) +endif() + +if(HIP_PLATFORM STREQUAL "hcc") + message("HCC Package\n") + # Package: hip_hcc + set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_hcc) + configure_file(packaging/hip-hcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) + configure_file(packaging/hip-hcc.postinst ${BUILD_DIR}/postinst @ONLY) + configure_file(packaging/hip-hcc.prerm ${BUILD_DIR}/prerm @ONLY) + add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} . COMMAND rm -rf *.deb *.rpm *.tar.gz COMMAND make package COMMAND cp *.deb ${PROJECT_BINARY_DIR} @@ -460,12 +524,23 @@ add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} . COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} WORKING_DIRECTORY ${BUILD_DIR} DEPENDS hip_hcc hip_hcc_static hiprtc) +else() + set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/vdi) + configure_file(packaging/hip-vdi.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) + configure_file(packaging/hip-vdi.postinst ${BUILD_DIR}/postinst @ONLY) + configure_file(packaging/hip-vdi.prerm ${BUILD_DIR}/prerm @ONLY) + add_custom_target(hip_on_vdi COMMAND ${CMAKE_COMMAND} . + COMMAND rm -rf *.deb *.rpm *.tar.gz + COMMAND make package + COMMAND cp *.deb ${PROJECT_BINARY_DIR} + COMMAND cp *.rpm ${PROJECT_BINARY_DIR} + COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} + WORKING_DIRECTORY ${BUILD_DIR} ) +endif() # Package: hip_nvcc -set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_nvcc) +set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip-nvcc) configure_file(packaging/hip-nvcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) -configure_file(packaging/hip-nvcc.postinst ${BUILD_DIR}/postinst @ONLY) -configure_file(packaging/hip-nvcc.prerm ${BUILD_DIR}/prerm @ONLY) add_custom_target(pkg_hip_nvcc COMMAND ${CMAKE_COMMAND} . COMMAND rm -rf *.deb *.rpm *.tar.gz COMMAND make package @@ -475,7 +550,7 @@ add_custom_target(pkg_hip_nvcc COMMAND ${CMAKE_COMMAND} . WORKING_DIRECTORY ${BUILD_DIR}) # Package: hip_doc -set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_doc) +set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip-doc) configure_file(packaging/hip-doc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) add_custom_target(pkg_hip_doc COMMAND ${CMAKE_COMMAND} . COMMAND rm -rf *.deb *.rpm *.tar.gz @@ -496,6 +571,7 @@ add_custom_target(pkg_hip_samples COMMAND ${CMAKE_COMMAND} . COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} WORKING_DIRECTORY ${BUILD_DIR}) + # Package: all if(POLICY CMP0037) cmake_policy(PUSH) @@ -505,10 +581,18 @@ file(GENERATE OUTPUT ${PROJECT_BINARY_DIR}/fixnames CONTENT "pwd; for i in *.deb; do mv \"\$i\" \"\${i/.deb/-amd64.deb}\" ; done for i in *.rpm ; do mv \$i \${i/.rpm/.x86_64.rpm} ; done ") -add_custom_target(package +if(HIP_PLATFORM STREQUAL "hcc") + add_custom_target(package COMMAND bash ${PROJECT_BINARY_DIR}/fixnames WORKING_DIRECTORY ${PROJECT_BINARY_DIR} DEPENDS pkg_hip_base pkg_hip_hcc pkg_hip_nvcc pkg_hip_doc pkg_hip_samples) +elseif(HIP_PLATFORM STREQUAL "vdi") + add_custom_target(package + COMMAND bash ${PROJECT_BINARY_DIR}/fixnames + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS pkg_hip_base hip_on_vdi pkg_hip_nvcc pkg_hip_doc pkg_hip_samples) +endif() + if(POLICY CMP0037) cmake_policy(POP) endif() diff --git a/projects/hip/LICENSE b/projects/hip/LICENSE.txt similarity index 85% rename from projects/hip/LICENSE rename to projects/hip/LICENSE.txt index 586fbd5a39..e44ba39fd0 100644 --- a/projects/hip/LICENSE +++ b/projects/hip/LICENSE.txt @@ -1,5 +1,4 @@ -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2008-2020 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -13,11 +12,9 @@ all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - diff --git a/projects/hip/amdocl/CL/cl.h b/projects/hip/amdocl/CL/cl.h new file mode 100644 index 0000000000..cea6dc2405 --- /dev/null +++ b/projects/hip/amdocl/CL/cl.h @@ -0,0 +1,1836 @@ +/******************************************************************************* + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_device_svm_capabilities; +#endif +typedef cl_bitfield cl_command_queue_properties; +#ifdef CL_VERSION_1_2 +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; +#endif + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_queue_properties; +#endif +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_svm_mem_flags; +#endif +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +#ifdef CL_VERSION_1_2 +typedef cl_bitfield cl_mem_migration_flags; +#endif +typedef cl_uint cl_image_info; +#ifdef CL_VERSION_1_1 +typedef cl_uint cl_buffer_create_type; +#endif +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +#ifdef CL_VERSION_2_0 +typedef intptr_t cl_pipe_properties; +typedef cl_uint cl_pipe_info; +#endif +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_program_binary_type; +#endif +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +#endif +typedef cl_uint cl_kernel_work_group_info; +#ifdef CL_VERSION_2_1 +typedef cl_uint cl_kernel_sub_group_info; +#endif +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_sampler_properties; +typedef cl_uint cl_kernel_exec_info; +#endif +#ifdef CL_EXPERIMENTAL +typedef cl_bitfield cl_device_atomic_capabilities; +typedef cl_uint cl_khronos_vendor_id; +#endif + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +#ifdef CL_VERSION_1_2 + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; +#ifdef CL_VERSION_2_0 +#ifdef __GNUC__ + __extension__ /* Prevents warnings about anonymous union in -pedantic builds */ +#endif +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 /Za builds */ +#endif + union { +#endif + cl_mem buffer; +#ifdef CL_VERSION_2_0 + cl_mem mem_object; + }; +#ifdef _MSC_VER +#pragma warning( pop ) +#endif +#endif +} cl_image_desc; + +#endif + +#ifdef CL_VERSION_1_1 + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + +#endif + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#ifdef CL_VERSION_1_1 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 +#endif + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#ifdef CL_VERSION_1_1 +#define CL_INVALID_PROPERTY -64 +#endif +#ifdef CL_VERSION_1_2 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 +#endif +#ifdef CL_VERSION_2_0 +#define CL_INVALID_PIPE_SIZE -69 +#define CL_INVALID_DEVICE_QUEUE -70 +#endif +#ifdef CL_VERSION_2_2 +#define CL_INVALID_SPEC_ID -71 +#define CL_MAX_SIZE_RESTRICTION_EXCEEDED -72 +#endif + + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 +#ifdef CL_VERSION_1_2 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE +#endif + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 +#ifdef CL_VERSION_2_1 +#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 +#endif + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#endif +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */ +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A +#endif +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif +/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */ +#ifdef CL_VERSION_1_1 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#endif +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B +#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C +#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D +#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E +#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F +#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 +#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 +#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 +#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 +#define CL_DEVICE_MAX_PIPE_ARGS 0x1055 +#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 +#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 +#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 +#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A +#endif +#ifdef CL_VERSION_2_1 +#define CL_DEVICE_IL_VERSION 0x105B +#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C +#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D +#endif + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#ifdef CL_VERSION_1_1 +#define CL_FP_SOFT_FLOAT (1 << 6) +#endif +#ifdef CL_VERSION_1_2 +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) +#endif + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_ON_DEVICE (1 << 2) +#define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3) +#endif + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#ifdef CL_VERSION_1_1 +#define CL_CONTEXT_NUM_DEVICES 0x1083 +#endif + +/* cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 +#ifdef CL_VERSION_1_2 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_device_svm_capabilities */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) + +#endif + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_SIZE 0x1094 +#endif +#ifdef CL_VERSION_2_1 +#define CL_QUEUE_DEVICE_DEFAULT 0x1095 +#endif + +/* cl_mem_flags and cl_svm_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +/* reserved (1 << 6) */ +#ifdef CL_VERSION_1_2 +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */ +#define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */ +#define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12) +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +#endif + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#ifdef CL_VERSION_1_1 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE +#endif +#ifdef CL_VERSION_2_0 +#define CL_sRGB 0x10BF +#define CL_sRGBx 0x10C0 +#define CL_sRGBA 0x10C1 +#define CL_sBGRA 0x10C2 +#define CL_ABGR 0x10C3 +#endif + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#ifdef CL_VERSION_1_2 +#define CL_UNORM_INT24 0x10DF +#endif +#ifdef CL_VERSION_2_1 +#define CL_UNORM_INT_101010_2 0x10E0 +#endif + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#ifdef CL_VERSION_1_2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_OBJECT_PIPE 0x10F7 +#endif + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#ifdef CL_VERSION_1_1 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_USES_SVM_POINTER 0x1109 +#endif + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#ifdef CL_VERSION_1_2 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_pipe_info */ +#define CL_PIPE_PACKET_SIZE 0x1120 +#define CL_PIPE_MAX_PACKETS 0x1121 + +#endif + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#ifdef CL_VERSION_1_1 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 +#endif + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 +#ifdef CL_VERSION_2_0 +/* These enumerants are for the cl_khr_mipmap_image extension. + They have since been added to cl_ext.h with an appropriate + KHR suffix, but are left here for backwards compatibility. */ +#define CL_SAMPLER_MIP_FILTER_MODE 0x1155 +#define CL_SAMPLER_LOD_MIN 0x1156 +#define CL_SAMPLER_LOD_MAX 0x1157 +#endif + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#ifdef CL_VERSION_1_2 +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) +#endif + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 +#endif +#ifdef CL_VERSION_2_1 +#define CL_PROGRAM_IL 0x1169 +#endif +#ifdef CL_VERSION_2_2 +#define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT 0x116A +#define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT 0x116B +#endif + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_BINARY_TYPE 0x1184 +#endif +#ifdef CL_VERSION_2_0 +#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +#endif + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_ATTRIBUTES 0x1195 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_type_qualifier */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) +#ifdef CL_VERSION_2_0 +#define CL_KERNEL_ARG_TYPE_PIPE (1 << 3) +#endif + +#endif + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 +#endif + +#ifdef CL_VERSION_2_1 + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 0x2034 +#define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT 0x11B8 +#define CL_KERNEL_MAX_NUM_SUB_GROUPS 0x11B9 +#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS 0x11BA + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7 + +#endif + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#ifdef CL_VERSION_1_1 +#define CL_EVENT_CONTEXT 0x11D4 +#endif + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#ifdef CL_VERSION_1_1 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 +#endif +#ifdef CL_VERSION_2_0 +#define CL_COMMAND_SVM_FREE 0x1209 +#define CL_COMMAND_SVM_MEMCPY 0x120A +#define CL_COMMAND_SVM_MEMFILL 0x120B +#define CL_COMMAND_SVM_MAP 0x120C +#define CL_COMMAND_SVM_UNMAP 0x120D +#endif + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +#ifdef CL_VERSION_1_1 + +/* cl_buffer_create_type */ +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 + +#endif + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 +#ifdef CL_VERSION_2_0 +#define CL_PROFILING_COMMAND_COMPLETE 0x1284 +#endif + +#ifdef CL_EXPERIMENTAL + +/* cl_device_atomic_capabilities - bitfield */ +#define CL_DEVICE_ATOMIC_ORDER_RELAXED (1 << 0) +#define CL_DEVICE_ATOMIC_ORDER_ACQ_REL (1 << 1) +#define CL_DEVICE_ATOMIC_ORDER_SEQ_CST (1 << 2) +#define CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM (1 << 3) +#define CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP (1 << 4) +#define CL_DEVICE_ATOMIC_SCOPE_DEVICE (1 << 5) +#define CL_DEVICE_ATOMIC_SCOPE_ALL_SVM_DEVICES (1 << 6) + +/* cl_device_info */ +#define CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES 0x1063 +#define CL_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 +#define CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT 0x1065 +#define CL_DEVICE_OPENCL_C_VERSIONS 0x1066 +#define CL_DEVICE_MAX_WRITE_IMAGE3D_ARGS 0x1067 +#define CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT 0x1068 +#define CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT 0x1069 +/* 0x106A to 0x106E - Reserved for upcoming KHR extension */ +#define CL_DEVICE_OPENCL_C_FEATURES 0x106F + +/* cl_command_type */ +#define CL_COMMAND_SVM_MIGRATE_MEM 0x120E + +#endif + +/* cl_khronos_vendor_id */ +#define CL_KHRONOS_VENDOR_ID_CODEPLAY 0x10004 + +/********************************************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id in_device, + const cl_device_partition_property * properties, + cl_uint num_devices, + cl_device_id * out_devices, + cl_uint * num_devices_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetDefaultDeviceCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceAndHostTimer(cl_device_id device, + cl_ulong* device_timestamp, + cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetHostTimer(cl_device_id device, + cl_ulong * host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * properties, + cl_device_type device_type, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context context, + cl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithProperties(cl_context context, + cl_device_id device, + const cl_queue_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreatePipe(cl_context context, + cl_mem_flags flags, + cl_uint pipe_packet_size, + cl_uint pipe_max_packets, + const cl_pipe_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format * image_formats, + cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPipeInfo(cl_mem pipe, + cl_pipe_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback(cl_mem memobj, + void (CL_CALLBACK * pfn_notify)(cl_mem memobj, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* SVM Allocation APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY void * CL_API_CALL +clSVMAlloc(cl_context context, + cl_svm_mem_flags flags, + size_t size, + cl_uint alignment) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFree(cl_context context, + void * svm_pointer) CL_API_SUFFIX__VERSION_2_0; + +#endif + +/* Sampler APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSamplerWithProperties(cl_context context, + const cl_sampler_properties * sampler_properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler sampler, + cl_sampler_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const size_t * lengths, + const unsigned char ** binaries, + cl_int * binary_status, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * kernel_names, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithIL(cl_context context, + const void* il, + size_t length, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_headers, + const cl_program * input_headers, + const char ** header_include_names, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_programs, + const cl_program * input_programs, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramReleaseCallback(cl_program program, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_2_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramSpecializationConstant(cl_program program, + cl_uint spec_id, + size_t spec_size, + const void* spec_value) CL_API_SUFFIX__VERSION_2_2; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program program, + const char * kernel_name, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program program, + cl_uint num_kernels, + cl_kernel * kernels, + cl_uint * num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCloneKernel(cl_kernel source_kernel, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointer(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfo(cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void * param_value) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint num_events, + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event event, + cl_event_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context context, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event event, + cl_int execution_status) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback(cl_event event, + cl_int command_exec_callback_type, + void (CL_CALLBACK * pfn_notify)(cl_event event, + cl_int event_command_status, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t size, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t size, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue command_queue, + cl_mem buffer, + const void * pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t * origin, + const size_t * region, + size_t input_row_pitch, + size_t input_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * region, + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t * origin, + const size_t * region, + size_t * image_row_pitch, + size_t * image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue command_queue, + cl_mem memobj, + void * mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue command_queue, + void (CL_CALLBACK * user_func)(void *), + void * args, + size_t cb_args, + cl_uint num_mem_objects, + const cl_mem * mem_list, + const void ** args_mem_loc, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFree(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpy(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFill(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMap(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmap(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMigrateMem(cl_command_queue command_queue, + cl_uint num_svm_pointers, + const void ** svm_pointers, + const size_t * sizes, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_1; + +#endif + +#ifdef CL_VERSION_1_2 + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, + const char * func_name) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + /* + * WARNING: + * This API introduces mutable state into the OpenCL implementation. It has been REMOVED + * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the + * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. + * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. + * + * Software developers previously relying on this API are instructed to set the command queue + * properties when creating the queue, instead. + */ + extern CL_API_ENTRY cl_int CL_API_CALL + clSetCommandQueueProperty(cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties * old_properties) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_row_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue command_queue, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue command_queue, + cl_uint num_events, + const cl_event * event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * func_name) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* Deprecated OpenCL 2.0 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +clCreateSampler(cl_context context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +clEnqueueTask(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ diff --git a/projects/hip/amdocl/CL/cl_egl.h b/projects/hip/amdocl/CL/cl_egl.h new file mode 100644 index 0000000000..bc4d998eb3 --- /dev/null +++ b/projects/hip/amdocl/CL/cl_egl.h @@ -0,0 +1,132 @@ +/******************************************************************************* + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_EGL_H +#define __OPENCL_CL_EGL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */ +#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F +#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D +#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E + +/* Error type for clCreateFromEGLImageKHR */ +#define CL_INVALID_EGL_OBJECT_KHR -1093 +#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092 + +/* CLeglImageKHR is an opaque handle to an EGLImage */ +typedef void* CLeglImageKHR; + +/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */ +typedef void* CLeglDisplayKHR; + +/* CLeglSyncKHR is an opaque handle to an EGLSync object */ +typedef void* CLeglSyncKHR; + +/* properties passed to clCreateFromEGLImageKHR */ +typedef intptr_t cl_egl_image_properties_khr; + + +#define cl_khr_egl_image 1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromEGLImageKHR(cl_context context, + CLeglDisplayKHR egldisplay, + CLeglImageKHR eglimage, + cl_mem_flags flags, + const cl_egl_image_properties_khr * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( + cl_context context, + CLeglDisplayKHR egldisplay, + CLeglImageKHR eglimage, + cl_mem_flags flags, + const cl_egl_image_properties_khr * properties, + cl_int * errcode_ret); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +#define cl_khr_egl_event 1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromEGLSyncKHR(cl_context context, + CLeglSyncKHR sync, + CLeglDisplayKHR display, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( + cl_context context, + CLeglSyncKHR sync, + CLeglDisplayKHR display, + cl_int * errcode_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_EGL_H */ diff --git a/projects/hip/amdocl/CL/cl_ext.h b/projects/hip/amdocl/CL/cl_ext.h new file mode 100644 index 0000000000..4d6d8c093a --- /dev/null +++ b/projects/hip/amdocl/CL/cl_ext.h @@ -0,0 +1,1051 @@ +/******************************************************************************* + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* cl_ext.h contains OpenCL extensions which don't have external */ +/* (OpenGL, D3D) dependencies. */ + +#ifndef __CL_EXT_H +#define __CL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* cl_khr_fp64 extension - no extension #define since it has no functions */ +/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */ + +#if CL_TARGET_OPENCL_VERSION <= 110 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif + +/* cl_khr_fp16 extension - no extension #define since it has no functions */ +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 + +/* Memory object destruction + * + * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR + * + * Registers a user callback function that will be called when the memory object is deleted and its resources + * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback + * stack associated with memobj. The registered user callback functions are called in the reverse order in + * which they were registered. The user callback functions are called and then the memory object is deleted + * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be + * notified when the memory referenced by host_ptr, specified when the memory object is created and used as + * the storage bits for the memory object, can be reused or freed. + * + * The application may not call CL api's with the cl_mem object passed to the pfn_notify. + * + * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + */ +#define cl_APPLE_SetMemObjectDestructor 1 +cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj, + void (* pfn_notify)(cl_mem memobj, void * user_data), + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + + +/* Context Logging Functions + * + * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). + * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + * + * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger + */ +#define cl_APPLE_ContextLoggingFunctions 1 +extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ +extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ +extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + + +/************************ +* cl_khr_icd extension * +************************/ +#define cl_khr_icd 1 + +/* cl_platform_info */ +#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 + +/* Additional Error Codes */ +#define CL_PLATFORM_NOT_FOUND_KHR -1001 + +extern CL_API_ENTRY cl_int CL_API_CALL +clIcdGetPlatformIDsKHR(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms); + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms); + + +/******************************* + * cl_khr_il_program extension * + *******************************/ +#define cl_khr_il_program 1 + +/* New property to clGetDeviceInfo for retrieving supported intermediate + * languages + */ +#define CL_DEVICE_IL_VERSION_KHR 0x105B + +/* New property to clGetProgramInfo for retrieving for retrieving the IL of a + * program + */ +#define CL_PROGRAM_IL_KHR 0x1169 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithILKHR(cl_context context, + const void * il, + size_t length, + cl_int * errcode_ret); + +typedef CL_API_ENTRY cl_program +(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context, + const void * il, + size_t length, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +/* Extension: cl_khr_image2d_from_buffer + * + * This extension allows a 2D image to be created from a cl_mem buffer without + * a copy. The type associated with a 2D image created from a buffer in an + * OpenCL program is image2d_t. Both the sampler and sampler-less read_image + * built-in functions are supported for 2D images and 2D images created from + * a buffer. Similarly, the write_image built-ins are also supported for 2D + * images created from a buffer. + * + * When the 2D image from buffer is created, the client must specify the + * width, height, image format (i.e. channel order and channel data type) + * and optionally the row pitch. + * + * The pitch specified must be a multiple of + * CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels. + * The base address of the buffer must be aligned to + * CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels. + */ + +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B + + +/************************************** + * cl_khr_initialize_memory extension * + **************************************/ + +#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030 + + +/************************************** + * cl_khr_terminate_context extension * + **************************************/ + +#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031 +#define CL_CONTEXT_TERMINATE_KHR 0x2032 + +#define cl_khr_terminate_context 1 +extern CL_API_ENTRY cl_int CL_API_CALL +clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; + + +/* + * Extension: cl_khr_spir + * + * This extension adds support to create an OpenCL program object from a + * Standard Portable Intermediate Representation (SPIR) instance + */ + +#define CL_DEVICE_SPIR_VERSIONS 0x40E0 +#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 + + +/***************************************** + * cl_khr_create_command_queue extension * + *****************************************/ +#define cl_khr_create_command_queue 1 + +typedef cl_bitfield cl_queue_properties_khr; + +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithPropertiesKHR(cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_command_queue +(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + + +/****************************************** +* cl_nv_device_attribute_query extension * +******************************************/ + +/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ +#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 +#define CL_DEVICE_WARP_SIZE_NV 0x4003 +#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 + +/********************************* +* cl_amd_device_memory_flags * +*********************************/ +#define cl_amd_device_memory_flags 1 +#define CL_MEM_USE_PERSISTENT_MEM_AMD (1 << 6) // Alloc from GPU's CPU visible heap + +/* cl_device_info */ +#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 + +/********************************* +* cl_amd_device_attribute_query * +*********************************/ + +#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#define CL_DEVICE_BOARD_NAME_AMD 0x4038 +#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 +#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 +#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 +#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 +#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 +#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 +#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 +#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 +#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 +#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A +#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B +#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C +#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031 +#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033 +#define CL_DEVICE_PCIE_ID_AMD 0x4034 + +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_uchar unused[17]; cl_uchar bus; cl_uchar device; cl_uchar function; } pcie; +} cl_device_topology_amd; + +#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 + +/************************** +* cl_amd_offline_devices * +**************************/ +#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F + +/******************************** +* cl_amd_bus_addressable_memory * +********************************/ + +/* cl_mem flag - bitfield */ +#define CL_MEM_BUS_ADDRESSABLE_AMD (1<<30) +#define CL_MEM_EXTERNAL_PHYSICAL_AMD (1<<31) + +#define CL_COMMAND_WAIT_SIGNAL_AMD 0x4080 +#define CL_COMMAND_WRITE_SIGNAL_AMD 0x4081 +#define CL_COMMAND_MAKE_BUFFERS_RESIDENT_AMD 0x4082 + +typedef struct _cl_bus_address_amd +{ + cl_ulong surface_bus_address; + cl_ulong marker_bus_address; +} cl_bus_address_amd; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueWaitSignalAMD_fn)( cl_command_queue /*command_queue*/, + cl_mem /*mem_object*/, + cl_uint /*value*/, + cl_uint /*num_events*/, + const cl_event * /*event_wait_list*/, + cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueWriteSignalAMD_fn)( cl_command_queue /*command_queue*/, + cl_mem /*mem_object*/, + cl_uint /*value*/, + cl_ulong /*offset*/, + cl_uint /*num_events*/, + const cl_event * /*event_list*/, + cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueMakeBuffersResidentAMD_fn)( cl_command_queue /*command_queue*/, + cl_uint /*num_mem_objs*/, + cl_mem * /*mem_objects*/, + cl_bool /*blocking_make_resident*/, + cl_bus_address_amd * /*bus_addresses*/, + cl_uint /*num_events*/, + const cl_event * /*event_list*/, + cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +/********************** +* cl_amd_liquid_flash * +***********************/ +#define cl_amd_liquid_flash 1 + +#define CL_COMMAND_READ_SSG_FILE_AMD 0x4083 +#define CL_COMMAND_WRITE_SSG_FILE_AMD 0x4087 + +#define CL_INVALID_FILE_OBJECT_AMD 0x4084 + +typedef struct _cl_file_amd * cl_file_amd; + +typedef cl_uint cl_file_flags_amd; +#define CL_FILE_READ_ONLY_AMD (1 << 0) +#define CL_FILE_WRITE_ONLY_AMD (1 << 1) +#define CL_FILE_READ_WRITE_AMD (1 << 2) + +typedef cl_uint cl_file_info_amd; +#define CL_FILE_BLOCK_SIZE_AMD 0x4085 +#define CL_FILE_SIZE_AMD 0x4086 + +typedef CL_API_ENTRY cl_file_amd +(CL_API_CALL * clCreateSsgFileObjectAMD_fn)(cl_context /*context*/, + cl_file_flags_amd /*flags*/, + const wchar_t * /*file_name*/, + cl_int * /*errcode_ret*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clGetSsgFileObjectInfoAMD_fn)(cl_file_amd /* file */, + cl_file_info_amd /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clRetainSsgFileObjectAMD_fn)( cl_file_amd /*file*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clReleaseSsgFileObjectAMD_fn)( cl_file_amd /*file*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueReadSsgFileAMD_fn)(cl_command_queue /*command_queue*/, + cl_mem /*buffer*/, + cl_bool /*blocking_write*/, + size_t /*buffer_offset*/, + size_t /*cb*/, + cl_file_amd /*file*/, + size_t /*file_offset*/, + cl_uint /*num_events_in_wait_list*/, + const cl_event * /*event_wait_list*/, + cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueWriteSsgFileAMD_fn)(cl_command_queue /*command_queue*/, + cl_mem /*buffer*/, + cl_bool /*blocking_read*/, + size_t /*buffer_offset*/, + size_t /*cb*/, + cl_file_amd /*file*/, + size_t /*file_offset*/, + cl_uint /*num_events_in_wait_list*/, + const cl_event * /*event_wait_list*/, + cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +/************************* +* cl_amd_copy_buffer_p2p * +**************************/ +#define CL_DEVICE_NUM_P2P_DEVICES_AMD 0x4088 +#define CL_DEVICE_P2P_DEVICES_AMD 0x4089 + +#define cl_amd_copy_buffer_p2p 1 + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueCopyBufferP2PAMD_fn)(cl_command_queue /*command_queue*/, + cl_mem /*src_buffer*/, + cl_mem /*dst_buffer*/, + size_t /*src_offset*/, + size_t /*dst_offset*/, + size_t /*cb*/, + cl_uint /*num_events_in_wait_list*/, + const cl_event* /*event_wait_list*/, + cl_event* /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +/*********************************** +* cl_amd_assembly_program extension * +***********************************/ +#define cl_amd_assembly_program 1 + +typedef CL_API_ENTRY cl_program (CL_API_CALL * clCreateProgramWithAssemblyAMD_fn) ( + cl_context /* context */, + cl_uint /* count */, + const char** /* strings */, + const size_t* /* lengths */, + cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2; + +#ifdef CL_VERSION_2_0 +/******************************** +* cl_amd_planar_yuv * +********************************/ + +/* cl_mem flag - bitfield */ +#define CL_YUV_IMAGE_Y_PLANE_AMD 0x0 +#define CL_YUV_IMAGE_UV_PLANE_AMD 0x1 + +typedef CL_API_ENTRY cl_mem +(CL_API_CALL * clGetPlaneFromImageAMD_fn)(cl_context /*context*/, + cl_mem /*mem*/, + cl_uint /*plane*/, + cl_int * /*errcode_ret*/) CL_EXT_SUFFIX__VERSION_2_0; +#endif + +// +/************************** +* cl_amd_command_queue_info * +**************************/ +#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E + +/* cl_kernel_exec_info for DVR DOPP texture support */ +#define CL_KERNEL_EXEC_INFO_NEW_VCOP_AMD 0x4120 +#define CL_KERNEL_EXEC_INFO_PFPA_VCOP_AMD 0x4121 + +/************************* +* cl_amd_object_metadata * +**************************/ +#define cl_amd_object_metadata 1 + +typedef size_t cl_key_amd; + +#define CL_INVALID_OBJECT_AMD 0x403A +#define CL_INVALID_KEY_AMD 0x403B +#define CL_PLATFORM_MAX_KEYS_AMD 0x403C + +typedef CL_API_ENTRY cl_key_amd (CL_API_CALL * clCreateKeyAMD_fn)( + cl_platform_id /* platform */, + void (CL_CALLBACK * /* destructor */)( void* /* old_value */), + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * clObjectGetValueForKeyAMD_fn)( + void * /* object */, + cl_key_amd /* key */, + void ** /* ret_val */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * clObjectSetValueForKeyAMD_fn)( + void * /* object */, + cl_key_amd /* key */, + void * /* value */) CL_API_SUFFIX__VERSION_1_1; +// + + +/********************************* +* cl_arm_printf extension +*********************************/ + +#define CL_PRINTF_CALLBACK_ARM 0x40B0 +#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1 + + +/*********************************** +* cl_ext_device_fission extension +***********************************/ +#define cl_ext_device_fission 1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +typedef cl_ulong cl_device_partition_property_ext; +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevicesEXT(cl_device_id in_device, + const cl_device_partition_property_ext * properties, + cl_uint num_entries, + cl_device_id * out_devices, + cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device, + const cl_device_partition_property_ext * properties, + cl_uint num_entries, + cl_device_id * out_devices, + cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; + +/* cl_device_partition_property_ext */ +#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 +#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 +#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 + +/* clDeviceGetInfo selectors */ +#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 +#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 +#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 +#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 +#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 + +/* clGetImageInfo enum */ +#define CL_IMAGE_BYTE_PITCH_AMD 0x4059 + +/* error codes */ +#define CL_DEVICE_PARTITION_FAILED_EXT -1057 +#define CL_INVALID_PARTITION_COUNT_EXT -1058 +#define CL_INVALID_PARTITION_NAME_EXT -1059 + +/* CL_AFFINITY_DOMAINs */ +#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 +#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 +#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 +#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 +#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 +#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 + +/* cl_device_partition_property_ext list terminators */ +#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) +#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) +#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) + + +/*********************************** + * cl_ext_migrate_memobject extension definitions + ***********************************/ +#define cl_ext_migrate_memobject 1 + +typedef cl_bitfield cl_mem_migration_flags_ext; + +#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1 + +#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags_ext flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags_ext flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +/********************************* +* cl_qcom_ext_host_ptr extension +*********************************/ +#define cl_qcom_ext_host_ptr 1 + +#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) + +#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 +#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 +#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 +#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 +#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 +#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 +#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 +#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 + +typedef cl_uint cl_image_pitch_info_qcom; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceImageInfoQCOM(cl_device_id device, + size_t image_width, + size_t image_height, + const cl_image_format *image_format, + cl_image_pitch_info_qcom param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +typedef struct _cl_mem_ext_host_ptr +{ + /* Type of external memory allocation. */ + /* Legal values will be defined in layered extensions. */ + cl_uint allocation_type; + + /* Host cache policy for this external memory allocation. */ + cl_uint host_cache_policy; + +} cl_mem_ext_host_ptr; + + +/******************************************* +* cl_qcom_ext_host_ptr_iocoherent extension +********************************************/ + +/* Cache policy specifying io-coherence */ +#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9 + + +/********************************* +* cl_qcom_ion_host_ptr extension +*********************************/ + +#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 + +typedef struct _cl_mem_ion_host_ptr +{ + /* Type of external memory allocation. */ + /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */ + cl_mem_ext_host_ptr ext_host_ptr; + + /* ION file descriptor */ + int ion_filedesc; + + /* Host pointer to the ION allocated memory */ + void* ion_hostptr; + +} cl_mem_ion_host_ptr; + + +/********************************* +* cl_qcom_android_native_buffer_host_ptr extension +*********************************/ + +#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6 + +typedef struct _cl_mem_android_native_buffer_host_ptr +{ + /* Type of external memory allocation. */ + /* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */ + cl_mem_ext_host_ptr ext_host_ptr; + + /* Virtual pointer to the android native buffer */ + void* anb_ptr; + +} cl_mem_android_native_buffer_host_ptr; + + +/****************************************** + * cl_img_yuv_image extension * + ******************************************/ + +/* Image formats used in clCreateImage */ +#define CL_NV21_IMG 0x40D0 +#define CL_YV12_IMG 0x40D1 + + +/****************************************** + * cl_img_cached_allocations extension * + ******************************************/ + +/* Flag values used by clCreateBuffer */ +#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26) +#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27) + + +/****************************************** + * cl_img_use_gralloc_ptr extension * + ******************************************/ +#define cl_img_use_gralloc_ptr 1 + +/* Flag values used by clCreateBuffer */ +#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28) + +/* To be used by clGetEventInfo: */ +#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2 +#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3 + +/* Error code from clEnqueueReleaseGrallocObjectsIMG */ +#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + + +/********************************* +* cl_khr_subgroups extension +*********************************/ +#define cl_khr_subgroups 1 + +#if !defined(CL_VERSION_2_1) +/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h. + In hindsight, there should have been a khr suffix on this type for + the extension, but keeping it un-suffixed to maintain backwards + compatibility. */ +typedef cl_uint cl_kernel_sub_group_info; +#endif + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfoKHR(cl_kernel in_kernel, + cl_device_id in_device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void * input_value, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel, + cl_device_id in_device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void * input_value, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + + +/********************************* +* cl_khr_mipmap_image extension +*********************************/ + +/* cl_sampler_properties */ +#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155 +#define CL_SAMPLER_LOD_MIN_KHR 0x1156 +#define CL_SAMPLER_LOD_MAX_KHR 0x1157 + + +/********************************* +* cl_khr_priority_hints extension +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_priority_hints 1 + +typedef cl_uint cl_queue_priority_khr; + +/* cl_command_queue_properties */ +#define CL_QUEUE_PRIORITY_KHR 0x1096 + +/* cl_queue_priority_khr */ +#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0) +#define CL_QUEUE_PRIORITY_MED_KHR (1<<1) +#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2) + + +/********************************* +* cl_khr_throttle_hints extension +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_throttle_hints 1 + +typedef cl_uint cl_queue_throttle_khr; + +/* cl_command_queue_properties */ +#define CL_QUEUE_THROTTLE_KHR 0x1097 + +/* cl_queue_throttle_khr */ +#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0) +#define CL_QUEUE_THROTTLE_MED_KHR (1<<1) +#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2) + + +/********************************* +* cl_khr_subgroup_named_barrier +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_subgroup_named_barrier 1 + +/* cl_device_info */ +#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035 + + +/********************************* +* cl_khr_extended_versioning +*********************************/ + +#define CL_VERSION_MAJOR_BITS_KHR (10) +#define CL_VERSION_MINOR_BITS_KHR (10) +#define CL_VERSION_PATCH_BITS_KHR (12) + +#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) +#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) +#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1) + +#define CL_VERSION_MAJOR_KHR(version) ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) +#define CL_VERSION_MINOR_KHR(version) (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR) +#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR) + +#define CL_MAKE_VERSION_KHR(major, minor, patch) \ + ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \ + (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ + ((patch) & CL_VERSION_PATCH_MASK_KHR)) + +typedef cl_uint cl_version_khr; + +#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64 + +typedef struct _cl_name_version_khr +{ + cl_version_khr version; + char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR]; +} cl_name_version_khr; + +/* cl_platform_info */ +#define CL_PLATFORM_NUMERIC_VERSION_KHR 0x0906 +#define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR 0x0907 + +/* cl_device_info */ +#define CL_DEVICE_NUMERIC_VERSION_KHR 0x105E +#define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR 0x105F +#define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR 0x1060 +#define CL_DEVICE_ILS_WITH_VERSION_KHR 0x1061 +#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR 0x1062 + + +/********************************** + * cl_arm_import_memory extension * + **********************************/ +#define cl_arm_import_memory 1 + +typedef intptr_t cl_import_properties_arm; + +/* Default and valid proporties name for cl_arm_import_memory */ +#define CL_IMPORT_TYPE_ARM 0x40B2 + +/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_HOST_ARM 0x40B3 + +/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4 + +/* Protected memory property */ +#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5 + +/* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_ANDROID_HARDWARE_BUFFER_ARM 0x41E2 + +/* Data consistency with host property */ +#define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3 + +/* Import memory size value to indicate a size for the whole buffer */ +#define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX + +/* This extension adds a new function that allows for direct memory import into + * OpenCL via the clImportMemoryARM function. + * + * Memory imported through this interface will be mapped into the device's page + * tables directly, providing zero copy access. It will never fall back to copy + * operations and aliased buffers. + * + * Types of memory supported for import are specified as additional extension + * strings. + * + * This extension produces cl_mem allocations which are compatible with all other + * users of cl_mem in the standard API. + * + * This extension maps pages with the same properties as the normal buffer creation + * function clCreateBuffer. + */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clImportMemoryARM( cl_context context, + cl_mem_flags flags, + const cl_import_properties_arm *properties, + void *memory, + size_t size, + cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; + + +/****************************************** + * cl_arm_shared_virtual_memory extension * + ******************************************/ +#define cl_arm_shared_virtual_memory 1 + +/* Used by clGetDeviceInfo */ +#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6 + +/* Used by clGetMemObjectInfo */ +#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7 + +/* Used by clSetKernelExecInfoARM: */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9 + +/* To be used by clGetEventInfo: */ +#define CL_COMMAND_SVM_FREE_ARM 0x40BA +#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB +#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC +#define CL_COMMAND_SVM_MAP_ARM 0x40BD +#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE + +/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3) + +/* Flag values used by clSVMAllocARM: */ +#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10) +#define CL_MEM_SVM_ATOMICS_ARM (1 << 11) + +typedef cl_bitfield cl_svm_mem_flags_arm; +typedef cl_uint cl_kernel_exec_info_arm; +typedef cl_bitfield cl_device_svm_capabilities_arm; + +extern CL_API_ENTRY void * CL_API_CALL +clSVMAllocARM(cl_context context, + cl_svm_mem_flags_arm flags, + size_t size, + cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFreeARM(cl_context context, + void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFreeARM(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpyARM(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFillARM(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMapARM(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmapARM(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointerARM(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfoARM(cl_kernel kernel, + cl_kernel_exec_info_arm param_name, + size_t param_value_size, + const void * param_value) CL_EXT_SUFFIX__VERSION_1_2; + +/******************************** + * cl_arm_get_core_id extension * + ********************************/ + +#ifdef CL_VERSION_1_2 + +#define cl_arm_get_core_id 1 + +/* Device info property for bitfield of cores present */ +#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF + +#endif /* CL_VERSION_1_2 */ + +/********************************* +* cl_arm_job_slot_selection +*********************************/ + +#define cl_arm_job_slot_selection 1 + +/* cl_device_info */ +#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0 + +/* cl_command_queue_properties */ +#define CL_QUEUE_JOB_SLOT_ARM 0x41E1 + +#ifdef __cplusplus +} +#endif + + +#endif /* __CL_EXT_H */ diff --git a/projects/hip/amdocl/CL/cl_gl.h b/projects/hip/amdocl/CL/cl_gl.h new file mode 100644 index 0000000000..fbdaf62977 --- /dev/null +++ b/projects/hip/amdocl/CL/cl_gl.h @@ -0,0 +1,171 @@ +/********************************************************************************** + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __OPENCL_CL_GL_H +#define __OPENCL_CL_GL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; +typedef struct __GLsync *cl_GLsync; + +/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 +#ifdef CL_VERSION_1_2 +#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E +#define CL_GL_OBJECT_TEXTURE1D 0x200F +#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 +#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 +#endif + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 +#ifdef CL_VERSION_1_2 +#define CL_GL_NUM_SAMPLES 0x2012 +#endif + + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLBuffer(cl_context context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLRenderbuffer(cl_context context, + cl_mem_flags flags, + cl_GLuint renderbuffer, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLObjectInfo(cl_mem memobj, + cl_gl_object_type * gl_object_type, + cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLTextureInfo(cl_mem memobj, + cl_gl_texture_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGLObjects(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGLObjects(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture2D(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture3D(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* cl_khr_gl_sharing extension */ + +#define cl_khr_gl_sharing 1 + +typedef cl_uint cl_gl_context_info; + +/* Additional Error Codes */ +#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 + +/* cl_gl_context_info */ +#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 +#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 + +/* Additional cl_context_properties */ +#define CL_GL_CONTEXT_KHR 0x2008 +#define CL_EGL_DISPLAY_KHR 0x2009 +#define CL_GLX_DISPLAY_KHR 0x200A +#define CL_WGL_HDC_KHR 0x200B +#define CL_CGL_SHAREGROUP_KHR 0x200C + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLContextInfoKHR(const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( + const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_H */ diff --git a/projects/hip/amdocl/CL/cl_gl_ext.h b/projects/hip/amdocl/CL/cl_gl_ext.h new file mode 100644 index 0000000000..c26d31abed --- /dev/null +++ b/projects/hip/amdocl/CL/cl_gl_ext.h @@ -0,0 +1,52 @@ +/********************************************************************************** + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __OPENCL_CL_GL_EXT_H +#define __OPENCL_CL_GL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* + * cl_khr_gl_event extension + */ +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromGLsyncKHR(cl_context context, + cl_GLsync cl_GLsync, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_EXT_H */ diff --git a/projects/hip/amdocl/CL/cl_icd.h b/projects/hip/amdocl/CL/cl_icd.h new file mode 100644 index 0000000000..2be64719b6 --- /dev/null +++ b/projects/hip/amdocl/CL/cl_icd.h @@ -0,0 +1,1269 @@ +/******************************************************************************* + * Copyright (c) 2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef OPENCL_CL_ICD_H +#define OPENCL_CL_ICD_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This file contains pointer type definitions for each of the CL API calls as + * well as a type definition for the dispatch table used by the Khronos ICD + * loader (see cl_khr_icd extension specification for background). + */ + +/* API function pointer definitions */ + +// Platform APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( + cl_uint num_entries, cl_platform_id *platforms, + cl_uint *num_platforms) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( + cl_platform_id platform, cl_platform_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Device APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( + cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( + cl_device_id device, cl_device_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( + cl_device_id in_device, + const cl_device_partition_property *partition_properties, + cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDevice)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDevice)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateSubDevices; +typedef void *cl_api_clRetainDevice; +typedef void *cl_api_clReleaseDevice; + +#endif + +// Context APIs +typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContext)( + const cl_context_properties *properties, cl_uint num_devices, + const cl_device_id *devices, + void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( + const cl_context_properties *properties, cl_device_type device_type, + void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainContext)( + cl_context context) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseContext)( + cl_context context) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetContextInfo)( + cl_context context, cl_context_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Command Queue APIs +typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( + cl_context context, cl_device_id device, + cl_command_queue_properties properties, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY +cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueueWithProperties)( + cl_context /* context */, cl_device_id /* device */, + const cl_queue_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreateCommandQueueWithProperties; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( + cl_command_queue command_queue, cl_command_queue_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Memory Object APIs +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( + cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + const cl_image_desc *image_desc, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateImage; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainMemObject)( + cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( + cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( + cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, + cl_uint num_entries, cl_image_format *image_formats, + cl_uint *num_image_formats) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( + cl_mem memobj, cl_mem_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetImageInfo)( + cl_mem image, cl_image_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreatePipe)( + cl_context /* context */, cl_mem_flags /* flags */, + cl_uint /* pipe_packet_size */, cl_uint /* pipe_max_packets */, + const cl_pipe_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( + cl_mem /* pipe */, cl_pipe_info /* param_name */, + size_t /* param_value_size */, void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clSVMAlloc)( + cl_context /* context */, cl_svm_mem_flags /* flags */, size_t /* size */, + unsigned int /* alignment */)CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY void(CL_API_CALL *cl_api_clSVMFree)( + cl_context /* context */, + void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreatePipe; +typedef void *cl_api_clGetPipeInfo; +typedef void *cl_api_clSVMAlloc; +typedef void *cl_api_clSVMFree; + +#endif + +// Sampler APIs +typedef CL_API_ENTRY cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( + cl_context context, cl_bool normalized_coords, + cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainSampler)( + cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseSampler)( + cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( + cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY +cl_sampler(CL_API_CALL *cl_api_clCreateSamplerWithProperties)( + cl_context /* context */, + const cl_sampler_properties * /* sampler_properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreateSamplerWithProperties; + +#endif + +// Program Object APIs +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( + cl_context context, cl_uint count, const char **strings, + const size_t *lengths, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const size_t *lengths, const unsigned char **binaries, + cl_int *binary_status, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY +cl_program(CL_API_CALL *cl_api_clCreateProgramWithBuiltInKernels)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const char *kernel_names, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateProgramWithBuiltInKernels; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainProgram)( + cl_program program) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseProgram)( + cl_program program) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clBuildProgram)( + cl_program program, cl_uint num_devices, const cl_device_id *device_list, + const char *options, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCompileProgram)( + cl_program program, cl_uint num_devices, const cl_device_id *device_list, + const char *options, cl_uint num_input_headers, + const cl_program *input_headers, const char **header_include_names, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clLinkProgram)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const char *options, cl_uint num_input_programs, + const cl_program *input_programs, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCompileProgram; +typedef void *cl_api_clLinkProgram; + +#endif + +#ifdef CL_VERSION_2_2 + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clSetProgramSpecializationConstant)( + cl_program program, cl_uint spec_id, size_t spec_size, + const void *spec_value) CL_API_SUFFIX__VERSION_2_2; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( + cl_program program, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_2_2; + +#else + +typedef void *cl_api_clSetProgramSpecializationConstant; +typedef void *cl_api_clSetProgramReleaseCallback; + +#endif + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( + cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clUnloadPlatformCompiler; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( + cl_program program, cl_program_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( + cl_program program, cl_device_id device, cl_program_build_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Kernel Object APIs +typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( + cl_program program, const char *kernel_name, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( + cl_program program, cl_uint num_kernels, cl_kernel *kernels, + cl_uint *num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainKernel)( + cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseKernel)( + cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArg)( + cl_kernel kernel, cl_uint arg_index, size_t arg_size, + const void *arg_value) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( + cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( + cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clGetKernelArgInfo; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( + cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( + cl_kernel /* kernel */, cl_uint /* arg_index */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( + cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, + size_t /* param_value_size */, + const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( + cl_kernel /* in_kernel */, cl_device_id /*in_device*/, + cl_kernel_sub_group_info /* param_name */, size_t /*input_value_size*/, + const void * /*input_value*/, size_t /*param_value_size*/, + void * /*param_value*/, + size_t * /*param_value_size_ret*/) CL_EXT_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clSetKernelArgSVMPointer; +typedef void *cl_api_clSetKernelExecInfo; +typedef void *cl_api_clGetKernelSubGroupInfoKHR; + +#endif + +// Event Object APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clWaitForEvents)( + cl_uint num_events, const cl_event *event_list) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventInfo)( + cl_event event, cl_event_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) + CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) + CL_API_SUFFIX__VERSION_1_0; + +// Profiling APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( + cl_event event, cl_profiling_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Flush and Finish APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFlush)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFinish)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +// Enqueued Commands APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + size_t offset, size_t cb, void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + const size_t *buffer_origin, const size_t *host_origin, + const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, void *ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueReadBufferRect; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + const size_t *buffer_origin, const size_t *host_origin, + const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueWriteBufferRect; + +#endif + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( + cl_command_queue command_queue, cl_mem buffer, const void *pattern, + size_t pattern_size, size_t offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueFillBuffer; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + size_t src_offset, size_t dst_offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + const size_t *src_origin, const size_t *dst_origin, const size_t *region, + size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, + size_t dst_slice_pitch, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueCopyBufferRect; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, + const size_t *origin, const size_t *region, size_t row_pitch, + size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, + const size_t *origin, const size_t *region, size_t input_row_pitch, + size_t input_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( + cl_command_queue command_queue, cl_mem image, const void *fill_color, + const size_t origin[3], const size_t region[3], + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueFillImage; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( + cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, + const size_t *src_origin, const size_t *dst_origin, const size_t *region, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( + cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, + const size_t *src_origin, const size_t *region, size_t dst_offset, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, + size_t src_offset, const size_t *dst_origin, const size_t *region, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, + cl_map_flags map_flags, size_t offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, + cl_map_flags map_flags, const size_t *origin, const size_t *region, + size_t *image_row_pitch, size_t *image_slice_pitch, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( + cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( + cl_command_queue command_queue, cl_uint num_mem_objects, + const cl_mem *mem_objects, cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueMigrateMemObjects; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( + cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, + const size_t *global_work_offset, const size_t *global_work_size, + const size_t *local_work_size, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueTask)( + cl_command_queue command_queue, cl_kernel kernel, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( + cl_command_queue command_queue, void(CL_CALLBACK *user_func)(void *), + void *args, size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list, + const void **args_mem_loc, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( + cl_command_queue command_queue, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( + cl_command_queue command_queue, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY void *( + CL_API_CALL *cl_api_clGetExtensionFunctionAddressForPlatform)( + cl_platform_id platform, + const char *function_name)CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueMarkerWithWaitList; +typedef void *cl_api_clEnqueueBarrierWithWaitList; +typedef void *cl_api_clGetExtensionFunctionAddressForPlatform; + +#endif + +// Shared Virtual Memory APIs + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( + cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, + void ** /* svm_pointers */, + void(CL_CALLBACK *pfn_free_func)(cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void ** /* svm_pointers[] */, + void * /* user_data */), + void * /* user_data */, cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( + cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, + void * /* dst_ptr */, const void * /* src_ptr */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( + cl_command_queue /* command_queue */, void * /* svm_ptr */, + const void * /* pattern */, size_t /* pattern_size */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( + cl_command_queue /* command_queue */, cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, void * /* svm_ptr */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( + cl_command_queue /* command_queue */, void * /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clEnqueueSVMFree; +typedef void *cl_api_clEnqueueSVMMemcpy; +typedef void *cl_api_clEnqueueSVMMemFill; +typedef void *cl_api_clEnqueueSVMMap; +typedef void *cl_api_clEnqueueSVMUnmap; + +#endif + +// Deprecated APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( + cl_command_queue command_queue, cl_command_queue_properties properties, + cl_bool enable, cl_command_queue_properties *old_properties) + CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + size_t image_width, size_t image_height, size_t image_row_pitch, + void *host_ptr, cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + size_t image_width, size_t image_height, size_t image_depth, + size_t image_row_pitch, size_t image_slice_pitch, void *host_ptr, + cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) + CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( + cl_command_queue command_queue, + cl_event *event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( + cl_command_queue command_queue, cl_uint num_events, + const cl_event *event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( + cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( + const char *function_name)CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +// GL and other APIs +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( + cl_context context, cl_mem_flags flags, cl_GLuint bufobj, + int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( + cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( + cl_mem memobj, cl_gl_object_type *gl_object_type, + cl_GLuint *gl_object_name) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( + cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +/* cl_khr_gl_sharing */ +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( + const cl_context_properties *properties, cl_gl_context_info param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret); + +/* cl_khr_gl_event */ +typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( + cl_context context, cl_GLsync sync, cl_int *errcode_ret); + +#if defined(_WIN32) + +/* cl_khr_d3d10_sharing */ + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, + cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( + cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( + cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( + cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromD3D10BufferKHR(cl_context context, cl_mem_flags flags, + ID3D10Buffer *resource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( + cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( + cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_d3d11_sharing */ +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( + cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, + cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( + cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( + cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D11ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +/* cl_khr_dx9_media_sharing */ +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR)( + cl_platform_id platform, cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr *media_adapters_type, void *media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( + cl_context context, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, + cl_uint plane, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +/* cl_khr_d3d11_sharing */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( + cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromD3D11BufferKHR(cl_context context, cl_mem_flags flags, + ID3D11Buffer *resource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( + cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( + cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_dx9_media_sharing */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( + cl_platform_id platform, cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr *media_adapter_type, void *media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( + cl_context context, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, + cl_uint plane, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +#else + +/* cl_khr_d3d10_sharing */ +typedef void *cl_api_clGetDeviceIDsFromD3D10KHR; +typedef void *cl_api_clCreateFromD3D10BufferKHR; +typedef void *cl_api_clCreateFromD3D10Texture2DKHR; +typedef void *cl_api_clCreateFromD3D10Texture3DKHR; +typedef void *cl_api_clEnqueueAcquireD3D10ObjectsKHR; +typedef void *cl_api_clEnqueueReleaseD3D10ObjectsKHR; + +/* cl_khr_d3d11_sharing */ +typedef void *cl_api_clGetDeviceIDsFromD3D11KHR; +typedef void *cl_api_clCreateFromD3D11BufferKHR; +typedef void *cl_api_clCreateFromD3D11Texture2DKHR; +typedef void *cl_api_clCreateFromD3D11Texture3DKHR; +typedef void *cl_api_clEnqueueAcquireD3D11ObjectsKHR; +typedef void *cl_api_clEnqueueReleaseD3D11ObjectsKHR; + +/* cl_khr_dx9_media_sharing */ +typedef void *cl_api_clCreateFromDX9MediaSurfaceKHR; +typedef void *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR; +typedef void *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR; +typedef void *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR; + +#endif + +/* OpenCL 1.1 */ + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetEventCallback)( + cl_event /* event */, cl_int /* command_exec_callback_type */, + void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( + cl_mem /* buffer */, cl_mem_flags /* flags */, + cl_buffer_create_type /* buffer_create_type */, + const void * /* buffer_create_info */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clSetMemObjectDestructorCallback)( + cl_mem /* memobj */, + void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, + void * /*user_data*/), + void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( + cl_context /* context */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( + cl_event /* event */, + cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clSetEventCallback; +typedef void *cl_api_clCreateSubBuffer; +typedef void *cl_api_clSetMemObjectDestructorCallback; +typedef void *cl_api_clCreateUserEvent; +typedef void *cl_api_clSetUserEventStatus; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( + cl_device_id in_device, + const cl_device_partition_property_ext *partition_properties, + cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_0; + +/* cl_khr_egl_image */ +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( + cl_context context, CLeglDisplayKHR display, CLeglImageKHR image, + cl_mem_flags flags, const cl_egl_image_properties_khr *properties, + cl_int *errcode_ret); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_egl_event */ +typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( + cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, + cl_int *errcode_ret); + +#ifdef CL_VERSION_2_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( + cl_context context, cl_device_id device, + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( + cl_context context, const void *il, size_t length, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( + cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, + size_t input_value_size, const void *input_value, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( + cl_kernel source_kernel, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( + cl_command_queue command_queue, cl_uint num_svm_pointers, + const void **svm_pointers, const size_t *sizes, + cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( + cl_device_id device, cl_ulong *device_timestamp, + cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetHostTimer)( + cl_device_id device, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +#else + +typedef void *cl_api_clSetDefaultDeviceCommandQueue; +typedef void *cl_api_clCreateProgramWithIL; +typedef void *cl_api_clGetKernelSubGroupInfo; +typedef void *cl_api_clCloneKernel; +typedef void *cl_api_clEnqueueSVMMigrateMem; +typedef void *cl_api_clGetDeviceAndHostTimer; +typedef void *cl_api_clGetHostTimer; + +#endif + +/* Vendor dispatch table struture */ + +typedef struct _cl_icd_dispatch { + /* OpenCL 1.0 */ + cl_api_clGetPlatformIDs clGetPlatformIDs; + cl_api_clGetPlatformInfo clGetPlatformInfo; + cl_api_clGetDeviceIDs clGetDeviceIDs; + cl_api_clGetDeviceInfo clGetDeviceInfo; + cl_api_clCreateContext clCreateContext; + cl_api_clCreateContextFromType clCreateContextFromType; + cl_api_clRetainContext clRetainContext; + cl_api_clReleaseContext clReleaseContext; + cl_api_clGetContextInfo clGetContextInfo; + cl_api_clCreateCommandQueue clCreateCommandQueue; + cl_api_clRetainCommandQueue clRetainCommandQueue; + cl_api_clReleaseCommandQueue clReleaseCommandQueue; + cl_api_clGetCommandQueueInfo clGetCommandQueueInfo; + cl_api_clSetCommandQueueProperty clSetCommandQueueProperty; + cl_api_clCreateBuffer clCreateBuffer; + cl_api_clCreateImage2D clCreateImage2D; + cl_api_clCreateImage3D clCreateImage3D; + cl_api_clRetainMemObject clRetainMemObject; + cl_api_clReleaseMemObject clReleaseMemObject; + cl_api_clGetSupportedImageFormats clGetSupportedImageFormats; + cl_api_clGetMemObjectInfo clGetMemObjectInfo; + cl_api_clGetImageInfo clGetImageInfo; + cl_api_clCreateSampler clCreateSampler; + cl_api_clRetainSampler clRetainSampler; + cl_api_clReleaseSampler clReleaseSampler; + cl_api_clGetSamplerInfo clGetSamplerInfo; + cl_api_clCreateProgramWithSource clCreateProgramWithSource; + cl_api_clCreateProgramWithBinary clCreateProgramWithBinary; + cl_api_clRetainProgram clRetainProgram; + cl_api_clReleaseProgram clReleaseProgram; + cl_api_clBuildProgram clBuildProgram; + cl_api_clUnloadCompiler clUnloadCompiler; + cl_api_clGetProgramInfo clGetProgramInfo; + cl_api_clGetProgramBuildInfo clGetProgramBuildInfo; + cl_api_clCreateKernel clCreateKernel; + cl_api_clCreateKernelsInProgram clCreateKernelsInProgram; + cl_api_clRetainKernel clRetainKernel; + cl_api_clReleaseKernel clReleaseKernel; + cl_api_clSetKernelArg clSetKernelArg; + cl_api_clGetKernelInfo clGetKernelInfo; + cl_api_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; + cl_api_clWaitForEvents clWaitForEvents; + cl_api_clGetEventInfo clGetEventInfo; + cl_api_clRetainEvent clRetainEvent; + cl_api_clReleaseEvent clReleaseEvent; + cl_api_clGetEventProfilingInfo clGetEventProfilingInfo; + cl_api_clFlush clFlush; + cl_api_clFinish clFinish; + cl_api_clEnqueueReadBuffer clEnqueueReadBuffer; + cl_api_clEnqueueWriteBuffer clEnqueueWriteBuffer; + cl_api_clEnqueueCopyBuffer clEnqueueCopyBuffer; + cl_api_clEnqueueReadImage clEnqueueReadImage; + cl_api_clEnqueueWriteImage clEnqueueWriteImage; + cl_api_clEnqueueCopyImage clEnqueueCopyImage; + cl_api_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; + cl_api_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; + cl_api_clEnqueueMapBuffer clEnqueueMapBuffer; + cl_api_clEnqueueMapImage clEnqueueMapImage; + cl_api_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; + cl_api_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; + cl_api_clEnqueueTask clEnqueueTask; + cl_api_clEnqueueNativeKernel clEnqueueNativeKernel; + cl_api_clEnqueueMarker clEnqueueMarker; + cl_api_clEnqueueWaitForEvents clEnqueueWaitForEvents; + cl_api_clEnqueueBarrier clEnqueueBarrier; + cl_api_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; + cl_api_clCreateFromGLBuffer clCreateFromGLBuffer; + cl_api_clCreateFromGLTexture2D clCreateFromGLTexture2D; + cl_api_clCreateFromGLTexture3D clCreateFromGLTexture3D; + cl_api_clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer; + cl_api_clGetGLObjectInfo clGetGLObjectInfo; + cl_api_clGetGLTextureInfo clGetGLTextureInfo; + cl_api_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; + cl_api_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; + cl_api_clGetGLContextInfoKHR clGetGLContextInfoKHR; + + /* cl_khr_d3d10_sharing */ + cl_api_clGetDeviceIDsFromD3D10KHR clGetDeviceIDsFromD3D10KHR; + cl_api_clCreateFromD3D10BufferKHR clCreateFromD3D10BufferKHR; + cl_api_clCreateFromD3D10Texture2DKHR clCreateFromD3D10Texture2DKHR; + cl_api_clCreateFromD3D10Texture3DKHR clCreateFromD3D10Texture3DKHR; + cl_api_clEnqueueAcquireD3D10ObjectsKHR clEnqueueAcquireD3D10ObjectsKHR; + cl_api_clEnqueueReleaseD3D10ObjectsKHR clEnqueueReleaseD3D10ObjectsKHR; + + /* OpenCL 1.1 */ + cl_api_clSetEventCallback clSetEventCallback; + cl_api_clCreateSubBuffer clCreateSubBuffer; + cl_api_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; + cl_api_clCreateUserEvent clCreateUserEvent; + cl_api_clSetUserEventStatus clSetUserEventStatus; + cl_api_clEnqueueReadBufferRect clEnqueueReadBufferRect; + cl_api_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; + cl_api_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; + + /* cl_ext_device_fission */ + cl_api_clCreateSubDevicesEXT clCreateSubDevicesEXT; + cl_api_clRetainDeviceEXT clRetainDeviceEXT; + cl_api_clReleaseDeviceEXT clReleaseDeviceEXT; + + /* cl_khr_gl_event */ + cl_api_clCreateEventFromGLsyncKHR clCreateEventFromGLsyncKHR; + + /* OpenCL 1.2 */ + cl_api_clCreateSubDevices clCreateSubDevices; + cl_api_clRetainDevice clRetainDevice; + cl_api_clReleaseDevice clReleaseDevice; + cl_api_clCreateImage clCreateImage; + cl_api_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; + cl_api_clCompileProgram clCompileProgram; + cl_api_clLinkProgram clLinkProgram; + cl_api_clUnloadPlatformCompiler clUnloadPlatformCompiler; + cl_api_clGetKernelArgInfo clGetKernelArgInfo; + cl_api_clEnqueueFillBuffer clEnqueueFillBuffer; + cl_api_clEnqueueFillImage clEnqueueFillImage; + cl_api_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; + cl_api_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; + cl_api_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; + cl_api_clGetExtensionFunctionAddressForPlatform + clGetExtensionFunctionAddressForPlatform; + cl_api_clCreateFromGLTexture clCreateFromGLTexture; + + /* cl_khr_d3d11_sharing */ + cl_api_clGetDeviceIDsFromD3D11KHR clGetDeviceIDsFromD3D11KHR; + cl_api_clCreateFromD3D11BufferKHR clCreateFromD3D11BufferKHR; + cl_api_clCreateFromD3D11Texture2DKHR clCreateFromD3D11Texture2DKHR; + cl_api_clCreateFromD3D11Texture3DKHR clCreateFromD3D11Texture3DKHR; + cl_api_clCreateFromDX9MediaSurfaceKHR clCreateFromDX9MediaSurfaceKHR; + cl_api_clEnqueueAcquireD3D11ObjectsKHR clEnqueueAcquireD3D11ObjectsKHR; + cl_api_clEnqueueReleaseD3D11ObjectsKHR clEnqueueReleaseD3D11ObjectsKHR; + + /* cl_khr_dx9_media_sharing */ + cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR + clGetDeviceIDsFromDX9MediaAdapterKHR; + cl_api_clEnqueueAcquireDX9MediaSurfacesKHR + clEnqueueAcquireDX9MediaSurfacesKHR; + cl_api_clEnqueueReleaseDX9MediaSurfacesKHR + clEnqueueReleaseDX9MediaSurfacesKHR; + + /* cl_khr_egl_image */ + cl_api_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; + cl_api_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; + cl_api_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; + + /* cl_khr_egl_event */ + cl_api_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; + + /* OpenCL 2.0 */ + cl_api_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; + cl_api_clCreatePipe clCreatePipe; + cl_api_clGetPipeInfo clGetPipeInfo; + cl_api_clSVMAlloc clSVMAlloc; + cl_api_clSVMFree clSVMFree; + cl_api_clEnqueueSVMFree clEnqueueSVMFree; + cl_api_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; + cl_api_clEnqueueSVMMemFill clEnqueueSVMMemFill; + cl_api_clEnqueueSVMMap clEnqueueSVMMap; + cl_api_clEnqueueSVMUnmap clEnqueueSVMUnmap; + cl_api_clCreateSamplerWithProperties clCreateSamplerWithProperties; + cl_api_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; + cl_api_clSetKernelExecInfo clSetKernelExecInfo; + + /* cl_khr_sub_groups */ + cl_api_clGetKernelSubGroupInfoKHR clGetKernelSubGroupInfoKHR; + + /* OpenCL 2.1 */ + cl_api_clCloneKernel clCloneKernel; + cl_api_clCreateProgramWithIL clCreateProgramWithIL; + cl_api_clEnqueueSVMMigrateMem clEnqueueSVMMigrateMem; + cl_api_clGetDeviceAndHostTimer clGetDeviceAndHostTimer; + cl_api_clGetHostTimer clGetHostTimer; + cl_api_clGetKernelSubGroupInfo clGetKernelSubGroupInfo; + cl_api_clSetDefaultDeviceCommandQueue clSetDefaultDeviceCommandQueue; + + /* OpenCL 2.2 */ + cl_api_clSetProgramReleaseCallback clSetProgramReleaseCallback; + cl_api_clSetProgramSpecializationConstant clSetProgramSpecializationConstant; +} cl_icd_dispatch; + +#ifdef __cplusplus +} +#endif + +#endif /* #ifndef OPENCL_CL_ICD_H */ diff --git a/projects/hip/amdocl/CL/cl_platform.h b/projects/hip/amdocl/CL/cl_platform.h new file mode 100644 index 0000000000..7f4ddea5b3 --- /dev/null +++ b/projects/hip/amdocl/CL/cl_platform.h @@ -0,0 +1,1384 @@ +/********************************************************************************** + * Copyright (c) 2008-2018 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) + #define CL_API_ENTRY + #define CL_API_CALL __stdcall + #define CL_CALLBACK __stdcall +#else + #define CL_API_ENTRY + #define CL_API_CALL + #define CL_CALLBACK +#endif + +/* + * Deprecation flags refer to the last version of the header in which the + * feature was not deprecated. + * + * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without + * deprecation but is deprecated in versions later than 1.1. + */ + +#define CL_EXTENSION_WEAK_LINK +#define CL_API_SUFFIX__VERSION_1_0 +#define CL_EXT_SUFFIX__VERSION_1_0 +#define CL_API_SUFFIX__VERSION_1_1 +#define CL_EXT_SUFFIX__VERSION_1_1 +#define CL_API_SUFFIX__VERSION_1_2 +#define CL_EXT_SUFFIX__VERSION_1_2 +#define CL_API_SUFFIX__VERSION_2_0 +#define CL_EXT_SUFFIX__VERSION_2_0 +#define CL_API_SUFFIX__VERSION_2_1 +#define CL_EXT_SUFFIX__VERSION_2_1 +#define CL_API_SUFFIX__VERSION_2_2 +#define CL_EXT_SUFFIX__VERSION_2_2 + + +#ifdef __GNUC__ + #define CL_EXT_SUFFIX_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX_DEPRECATED +#elif defined(_WIN32) + #define CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX_DEPRECATED __declspec(deprecated) +#else + #define CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#if (defined (_WIN32) && defined(_MSC_VER)) + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 1.7976931348623158e+308 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short; +typedef uint16_t cl_ushort; +typedef int32_t cl_int; +typedef uint32_t cl_uint; +typedef int64_t cl_long; +typedef uint64_t cl_ulong; + +typedef uint16_t cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + typedef __vector unsigned char __cl_uchar16; + typedef __vector signed char __cl_char16; + typedef __vector unsigned short __cl_ushort8; + typedef __vector signed short __cl_short8; + typedef __vector unsigned int __cl_uint4; + typedef __vector signed int __cl_int4; + typedef __vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif + +/* Define capabilities for anonymous struct members. */ +#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ +#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined( _WIN32) && defined(_MSC_VER) + #if _MSC_VER >= 1500 + /* Microsoft Developer Studio 2008 supports anonymous structs, but + * complains by default. */ + #define __CL_HAS_ANON_STRUCT__ 1 + #define __CL_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless + * struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) + #endif +#else +#define __CL_HAS_ANON_STRUCT__ 0 +#define __CL_ANON_STRUCT__ +#endif + +/* Define alignment keys */ +#if defined( __GNUC__ ) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if __CL_HAS_ANON_STRUCT__ + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + + +/* ---- cl_halfn ---- */ +typedef union +{ + cl_half CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_half lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2; +#endif +}cl_half2; + +typedef union +{ + cl_half CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[2]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4; +#endif +}cl_half4; + +/* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */ +typedef cl_half4 cl_half3; + +typedef union +{ + cl_half CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[4]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[2]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8; +#endif +}cl_half8; + +typedef union +{ + cl_half CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[8]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[4]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8[2]; +#endif +#if defined( __CL_HALF16__ ) + __cl_half16 v16; +#endif +}cl_half16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#undef __CL_HAS_ANON_STRUCT__ +#undef __CL_ANON_STRUCT__ +#if defined( _WIN32) && defined(_MSC_VER) + #if _MSC_VER >=1500 + #pragma warning( pop ) + #endif +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/projects/hip/amdocl/CL/cl_version.h b/projects/hip/amdocl/CL/cl_version.h new file mode 100644 index 0000000000..bb766cb9bb --- /dev/null +++ b/projects/hip/amdocl/CL/cl_version.h @@ -0,0 +1,86 @@ +/******************************************************************************* + * Copyright (c) 2018 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __CL_VERSION_H +#define __CL_VERSION_H + +/* Detect which version to target */ +#if !defined(CL_TARGET_OPENCL_VERSION) +#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)") +#define CL_TARGET_OPENCL_VERSION 220 +#endif +#if CL_TARGET_OPENCL_VERSION != 100 && \ + CL_TARGET_OPENCL_VERSION != 110 && \ + CL_TARGET_OPENCL_VERSION != 120 && \ + CL_TARGET_OPENCL_VERSION != 200 && \ + CL_TARGET_OPENCL_VERSION != 210 && \ + CL_TARGET_OPENCL_VERSION != 220 +#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)") +#undef CL_TARGET_OPENCL_VERSION +#define CL_TARGET_OPENCL_VERSION 220 +#endif + + +/* OpenCL Version */ +#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2) +#define CL_VERSION_2_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1) +#define CL_VERSION_2_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0) +#define CL_VERSION_2_0 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2) +#define CL_VERSION_1_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1) +#define CL_VERSION_1_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0) +#define CL_VERSION_1_0 1 +#endif + +/* Allow deprecated APIs for older OpenCL versions. */ +#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif + +#endif /* __CL_VERSION_H */ diff --git a/projects/hip/amdocl/CL/opencl.h b/projects/hip/amdocl/CL/opencl.h new file mode 100644 index 0000000000..143d1d2dc6 --- /dev/null +++ b/projects/hip/amdocl/CL/opencl.h @@ -0,0 +1,47 @@ +/******************************************************************************* + * Copyright (c) 2008-2015 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_H +#define __OPENCL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_H */ diff --git a/projects/hip/amdocl/EGL/egl.h b/projects/hip/amdocl/EGL/egl.h new file mode 100644 index 0000000000..99ea342a47 --- /dev/null +++ b/projects/hip/amdocl/EGL/egl.h @@ -0,0 +1,329 @@ +/* -*- mode: c; tab-width: 8; -*- */ +/* vi: set sw=4 ts=8: */ +/* Reference version of egl.h for EGL 1.4. + * $Revision: 9356 $ on $Date: 2009-10-21 02:52:25 -0700 (Wed, 21 Oct 2009) $ + */ + +/* +** Copyright (c) 2007-2009 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +#ifndef __egl_h_ +#define __egl_h_ + +/* All platform-dependent types and macro boilerplate (such as EGLAPI + * and EGLAPIENTRY) should go in eglplatform.h. + */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* EGL Types */ +/* EGLint is defined in eglplatform.h */ +typedef unsigned int EGLBoolean; +typedef unsigned int EGLenum; +typedef void *EGLConfig; +typedef void *EGLContext; +typedef void *EGLDisplay; +typedef void *EGLSurface; +typedef void *EGLClientBuffer; + +/* EGL Versioning */ +#define EGL_VERSION_1_0 1 +#define EGL_VERSION_1_1 1 +#define EGL_VERSION_1_2 1 +#define EGL_VERSION_1_3 1 +#define EGL_VERSION_1_4 1 + +/* EGL Enumerants. Bitmasks and other exceptional cases aside, most + * enums are assigned unique values starting at 0x3000. + */ + +/* EGL aliases */ +#define EGL_FALSE 0 +#define EGL_TRUE 1 + +/* Out-of-band handle values */ +#define EGL_DEFAULT_DISPLAY ((EGLNativeDisplayType)0) +#define EGL_NO_CONTEXT ((EGLContext)0) +#define EGL_NO_DISPLAY ((EGLDisplay)0) +#define EGL_NO_SURFACE ((EGLSurface)0) + +/* Out-of-band attribute value */ +#define EGL_DONT_CARE ((EGLint)-1) + +/* Errors / GetError return values */ +#define EGL_SUCCESS 0x3000 +#define EGL_NOT_INITIALIZED 0x3001 +#define EGL_BAD_ACCESS 0x3002 +#define EGL_BAD_ALLOC 0x3003 +#define EGL_BAD_ATTRIBUTE 0x3004 +#define EGL_BAD_CONFIG 0x3005 +#define EGL_BAD_CONTEXT 0x3006 +#define EGL_BAD_CURRENT_SURFACE 0x3007 +#define EGL_BAD_DISPLAY 0x3008 +#define EGL_BAD_MATCH 0x3009 +#define EGL_BAD_NATIVE_PIXMAP 0x300A +#define EGL_BAD_NATIVE_WINDOW 0x300B +#define EGL_BAD_PARAMETER 0x300C +#define EGL_BAD_SURFACE 0x300D +#define EGL_CONTEXT_LOST 0x300E /* EGL 1.1 - IMG_power_management */ + +/* Reserved 0x300F-0x301F for additional errors */ + +/* Config attributes */ +#define EGL_BUFFER_SIZE 0x3020 +#define EGL_ALPHA_SIZE 0x3021 +#define EGL_BLUE_SIZE 0x3022 +#define EGL_GREEN_SIZE 0x3023 +#define EGL_RED_SIZE 0x3024 +#define EGL_DEPTH_SIZE 0x3025 +#define EGL_STENCIL_SIZE 0x3026 +#define EGL_CONFIG_CAVEAT 0x3027 +#define EGL_CONFIG_ID 0x3028 +#define EGL_LEVEL 0x3029 +#define EGL_MAX_PBUFFER_HEIGHT 0x302A +#define EGL_MAX_PBUFFER_PIXELS 0x302B +#define EGL_MAX_PBUFFER_WIDTH 0x302C +#define EGL_NATIVE_RENDERABLE 0x302D +#define EGL_NATIVE_VISUAL_ID 0x302E +#define EGL_NATIVE_VISUAL_TYPE 0x302F +#define EGL_SAMPLES 0x3031 +#define EGL_SAMPLE_BUFFERS 0x3032 +#define EGL_SURFACE_TYPE 0x3033 +#define EGL_TRANSPARENT_TYPE 0x3034 +#define EGL_TRANSPARENT_BLUE_VALUE 0x3035 +#define EGL_TRANSPARENT_GREEN_VALUE 0x3036 +#define EGL_TRANSPARENT_RED_VALUE 0x3037 +#define EGL_NONE 0x3038 /* Attrib list terminator */ +#define EGL_BIND_TO_TEXTURE_RGB 0x3039 +#define EGL_BIND_TO_TEXTURE_RGBA 0x303A +#define EGL_MIN_SWAP_INTERVAL 0x303B +#define EGL_MAX_SWAP_INTERVAL 0x303C +#define EGL_LUMINANCE_SIZE 0x303D +#define EGL_ALPHA_MASK_SIZE 0x303E +#define EGL_COLOR_BUFFER_TYPE 0x303F +#define EGL_RENDERABLE_TYPE 0x3040 +#define EGL_MATCH_NATIVE_PIXMAP 0x3041 /* Pseudo-attribute (not queryable) */ +#define EGL_CONFORMANT 0x3042 + +/* Reserved 0x3041-0x304F for additional config attributes */ + +/* Config attribute values */ +#define EGL_SLOW_CONFIG 0x3050 /* EGL_CONFIG_CAVEAT value */ +#define EGL_NON_CONFORMANT_CONFIG 0x3051 /* EGL_CONFIG_CAVEAT value */ +#define EGL_TRANSPARENT_RGB 0x3052 /* EGL_TRANSPARENT_TYPE value */ +#define EGL_RGB_BUFFER 0x308E /* EGL_COLOR_BUFFER_TYPE value */ +#define EGL_LUMINANCE_BUFFER 0x308F /* EGL_COLOR_BUFFER_TYPE value */ + +/* More config attribute values, for EGL_TEXTURE_FORMAT */ +#define EGL_NO_TEXTURE 0x305C +#define EGL_TEXTURE_RGB 0x305D +#define EGL_TEXTURE_RGBA 0x305E +#define EGL_TEXTURE_2D 0x305F + +/* Config attribute mask bits */ +#define EGL_PBUFFER_BIT 0x0001 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_PIXMAP_BIT 0x0002 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_WINDOW_BIT 0x0004 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_VG_COLORSPACE_LINEAR_BIT 0x0020 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_VG_ALPHA_FORMAT_PRE_BIT 0x0040 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400 /* EGL_SURFACE_TYPE mask bits */ + +#define EGL_OPENGL_ES_BIT 0x0001 /* EGL_RENDERABLE_TYPE mask bits */ +#define EGL_OPENVG_BIT 0x0002 /* EGL_RENDERABLE_TYPE mask bits */ +#define EGL_OPENGL_ES2_BIT 0x0004 /* EGL_RENDERABLE_TYPE mask bits */ +#define EGL_OPENGL_BIT 0x0008 /* EGL_RENDERABLE_TYPE mask bits */ + +/* QueryString targets */ +#define EGL_VENDOR 0x3053 +#define EGL_VERSION 0x3054 +#define EGL_EXTENSIONS 0x3055 +#define EGL_CLIENT_APIS 0x308D + +/* QuerySurface / SurfaceAttrib / CreatePbufferSurface targets */ +#define EGL_HEIGHT 0x3056 +#define EGL_WIDTH 0x3057 +#define EGL_LARGEST_PBUFFER 0x3058 +#define EGL_TEXTURE_FORMAT 0x3080 +#define EGL_TEXTURE_TARGET 0x3081 +#define EGL_MIPMAP_TEXTURE 0x3082 +#define EGL_MIPMAP_LEVEL 0x3083 +#define EGL_RENDER_BUFFER 0x3086 +#define EGL_VG_COLORSPACE 0x3087 +#define EGL_VG_ALPHA_FORMAT 0x3088 +#define EGL_HORIZONTAL_RESOLUTION 0x3090 +#define EGL_VERTICAL_RESOLUTION 0x3091 +#define EGL_PIXEL_ASPECT_RATIO 0x3092 +#define EGL_SWAP_BEHAVIOR 0x3093 +#define EGL_MULTISAMPLE_RESOLVE 0x3099 + +/* EGL_RENDER_BUFFER values / BindTexImage / ReleaseTexImage buffer targets */ +#define EGL_BACK_BUFFER 0x3084 +#define EGL_SINGLE_BUFFER 0x3085 + +/* OpenVG color spaces */ +#define EGL_VG_COLORSPACE_sRGB 0x3089 /* EGL_VG_COLORSPACE value */ +#define EGL_VG_COLORSPACE_LINEAR 0x308A /* EGL_VG_COLORSPACE value */ + +/* OpenVG alpha formats */ +#define EGL_VG_ALPHA_FORMAT_NONPRE 0x308B /* EGL_ALPHA_FORMAT value */ +#define EGL_VG_ALPHA_FORMAT_PRE 0x308C /* EGL_ALPHA_FORMAT value */ + +/* Constant scale factor by which fractional display resolutions & + * aspect ratio are scaled when queried as integer values. + */ +#define EGL_DISPLAY_SCALING 10000 + +/* Unknown display resolution/aspect ratio */ +#define EGL_UNKNOWN ((EGLint)-1) + +/* Back buffer swap behaviors */ +#define EGL_BUFFER_PRESERVED 0x3094 /* EGL_SWAP_BEHAVIOR value */ +#define EGL_BUFFER_DESTROYED 0x3095 /* EGL_SWAP_BEHAVIOR value */ + +/* CreatePbufferFromClientBuffer buffer types */ +#define EGL_OPENVG_IMAGE 0x3096 + +/* QueryContext targets */ +#define EGL_CONTEXT_CLIENT_TYPE 0x3097 + +/* CreateContext attributes */ +#define EGL_CONTEXT_CLIENT_VERSION 0x3098 + +/* Multisample resolution behaviors */ +#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A /* EGL_MULTISAMPLE_RESOLVE value */ +#define EGL_MULTISAMPLE_RESOLVE_BOX 0x309B /* EGL_MULTISAMPLE_RESOLVE value */ + +/* BindAPI/QueryAPI targets */ +#define EGL_OPENGL_ES_API 0x30A0 +#define EGL_OPENVG_API 0x30A1 +#define EGL_OPENGL_API 0x30A2 + +/* GetCurrentSurface targets */ +#define EGL_DRAW 0x3059 +#define EGL_READ 0x305A + +/* WaitNative engines */ +#define EGL_CORE_NATIVE_ENGINE 0x305B + +/* EGL 1.2 tokens renamed for consistency in EGL 1.3 */ +#define EGL_COLORSPACE EGL_VG_COLORSPACE +#define EGL_ALPHA_FORMAT EGL_VG_ALPHA_FORMAT +#define EGL_COLORSPACE_sRGB EGL_VG_COLORSPACE_sRGB +#define EGL_COLORSPACE_LINEAR EGL_VG_COLORSPACE_LINEAR +#define EGL_ALPHA_FORMAT_NONPRE EGL_VG_ALPHA_FORMAT_NONPRE +#define EGL_ALPHA_FORMAT_PRE EGL_VG_ALPHA_FORMAT_PRE + +/* EGL extensions must request enum blocks from the Khronos + * API Registrar, who maintains the enumerant registry. Submit + * a bug in Khronos Bugzilla against task "Registry". + */ + + + +/* EGL Functions */ + +EGLAPI EGLint EGLAPIENTRY eglGetError(void); + +EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay(EGLNativeDisplayType display_id); +EGLAPI EGLBoolean EGLAPIENTRY eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor); +EGLAPI EGLBoolean EGLAPIENTRY eglTerminate(EGLDisplay dpy); + +EGLAPI const char * EGLAPIENTRY eglQueryString(EGLDisplay dpy, EGLint name); + +EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs(EGLDisplay dpy, EGLConfig *configs, + EGLint config_size, EGLint *num_config); +EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list, + EGLConfig *configs, EGLint config_size, + EGLint *num_config); +EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib(EGLDisplay dpy, EGLConfig config, + EGLint attribute, EGLint *value); + +EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config, + EGLNativeWindowType win, + const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config, + const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config, + EGLNativePixmapType pixmap, + const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface(EGLDisplay dpy, EGLSurface surface); +EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface(EGLDisplay dpy, EGLSurface surface, + EGLint attribute, EGLint *value); + +EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI(EGLenum api); +EGLAPI EGLenum EGLAPIENTRY eglQueryAPI(void); + +EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient(void); + +EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread(void); + +EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer( + EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, + EGLConfig config, const EGLint *attrib_list); + +EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib(EGLDisplay dpy, EGLSurface surface, + EGLint attribute, EGLint value); +EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer); +EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer); + + +EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval(EGLDisplay dpy, EGLint interval); + + +EGLAPI EGLContext EGLAPIENTRY eglCreateContext(EGLDisplay dpy, EGLConfig config, + EGLContext share_context, + const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext(EGLDisplay dpy, EGLContext ctx); +EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, + EGLSurface read, EGLContext ctx); + +EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext(void); +EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface(EGLint readdraw); +EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay(void); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext(EGLDisplay dpy, EGLContext ctx, + EGLint attribute, EGLint *value); + +EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL(void); +EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative(EGLint engine); +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers(EGLDisplay dpy, EGLSurface surface); +EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, + EGLNativePixmapType target); + +/* This is a generic function pointer type, whose name indicates it must + * be cast to the proper type *and calling convention* before use. + */ +typedef void (*__eglMustCastToProperFunctionPointerType)(void); + +/* Now, define eglGetProcAddress using the generic function ptr. type */ +EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY + eglGetProcAddress(const char *procname); + +#ifdef __cplusplus +} +#endif + +#endif /* __egl_h_ */ diff --git a/projects/hip/amdocl/EGL/eglext.h b/projects/hip/amdocl/EGL/eglext.h new file mode 100644 index 0000000000..2317b0cf45 --- /dev/null +++ b/projects/hip/amdocl/EGL/eglext.h @@ -0,0 +1,645 @@ +#ifndef __eglext_h_ +#define __eglext_h_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** Copyright (c) 2013 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ +/* +** This header is generated from the Khronos OpenGL / OpenGL ES XML +** API Registry. The current version of the Registry, generator scripts +** used to make the header, and the header can be found at +** http://www.opengl.org/registry/ +** +** Khronos $Revision: 24350 $ on $Date: 2013-12-04 12:46:23 -0800 (Wed, 04 Dec 2013) $ +*/ + +#include + +#define EGL_EGLEXT_VERSION 20131204 + +/* Generated C header for: + * API: egl + * Versions considered: .* + * Versions emitted: _nomatch_^ + * Default extensions included: egl + * Additional extensions included: _nomatch_^ + * Extensions removed: _nomatch_^ + */ + +#ifndef EGL_KHR_cl_event +#define EGL_KHR_cl_event 1 +#define EGL_CL_EVENT_HANDLE_KHR 0x309C +#define EGL_SYNC_CL_EVENT_KHR 0x30FE +#define EGL_SYNC_CL_EVENT_COMPLETE_KHR 0x30FF +#endif /* EGL_KHR_cl_event */ + +#ifndef EGL_KHR_cl_event2 +#define EGL_KHR_cl_event2 1 +typedef void *EGLSyncKHR; +typedef intptr_t EGLAttribKHR; +typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNC64KHRPROC) (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSync64KHR (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list); +#endif +#endif /* EGL_KHR_cl_event2 */ + +#ifndef EGL_KHR_client_get_all_proc_addresses +#define EGL_KHR_client_get_all_proc_addresses 1 +#endif /* EGL_KHR_client_get_all_proc_addresses */ + +#ifndef EGL_KHR_config_attribs +#define EGL_KHR_config_attribs 1 +#define EGL_CONFORMANT_KHR 0x3042 +#define EGL_VG_COLORSPACE_LINEAR_BIT_KHR 0x0020 +#define EGL_VG_ALPHA_FORMAT_PRE_BIT_KHR 0x0040 +#endif /* EGL_KHR_config_attribs */ + +#ifndef EGL_KHR_create_context +#define EGL_KHR_create_context 1 +#define EGL_CONTEXT_MAJOR_VERSION_KHR 0x3098 +#define EGL_CONTEXT_MINOR_VERSION_KHR 0x30FB +#define EGL_CONTEXT_FLAGS_KHR 0x30FC +#define EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR 0x30FD +#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_KHR 0x31BD +#define EGL_NO_RESET_NOTIFICATION_KHR 0x31BE +#define EGL_LOSE_CONTEXT_ON_RESET_KHR 0x31BF +#define EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR 0x00000001 +#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR 0x00000002 +#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR 0x00000004 +#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR 0x00000001 +#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT_KHR 0x00000002 +#define EGL_OPENGL_ES3_BIT_KHR 0x00000040 +#endif /* EGL_KHR_create_context */ + +#ifndef EGL_KHR_fence_sync +#define EGL_KHR_fence_sync 1 +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR 0x30F0 +#define EGL_SYNC_CONDITION_KHR 0x30F8 +#define EGL_SYNC_FENCE_KHR 0x30F9 +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_KHR_fence_sync */ + +#ifndef EGL_KHR_get_all_proc_addresses +#define EGL_KHR_get_all_proc_addresses 1 +#endif /* EGL_KHR_get_all_proc_addresses */ + +#ifndef EGL_KHR_gl_renderbuffer_image +#define EGL_KHR_gl_renderbuffer_image 1 +#define EGL_GL_RENDERBUFFER_KHR 0x30B9 +#endif /* EGL_KHR_gl_renderbuffer_image */ + +#ifndef EGL_KHR_gl_texture_2D_image +#define EGL_KHR_gl_texture_2D_image 1 +#define EGL_GL_TEXTURE_2D_KHR 0x30B1 +#define EGL_GL_TEXTURE_LEVEL_KHR 0x30BC +#endif /* EGL_KHR_gl_texture_2D_image */ + +#ifndef EGL_KHR_gl_texture_3D_image +#define EGL_KHR_gl_texture_3D_image 1 +#define EGL_GL_TEXTURE_3D_KHR 0x30B2 +#define EGL_GL_TEXTURE_ZOFFSET_KHR 0x30BD +#endif /* EGL_KHR_gl_texture_3D_image */ + +#ifndef EGL_KHR_gl_texture_cubemap_image +#define EGL_KHR_gl_texture_cubemap_image 1 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR 0x30B3 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR 0x30B4 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR 0x30B5 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR 0x30B6 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR 0x30B7 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR 0x30B8 +#endif /* EGL_KHR_gl_texture_cubemap_image */ + +#ifndef EGL_KHR_image +#define EGL_KHR_image 1 +typedef void *EGLImageKHR; +#define EGL_NATIVE_PIXMAP_KHR 0x30B0 +#define EGL_NO_IMAGE_KHR ((EGLImageKHR)0) +typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEIMAGEKHRPROC) (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYIMAGEKHRPROC) (EGLDisplay dpy, EGLImageKHR image); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLImageKHR EGLAPIENTRY eglCreateImageKHR (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImageKHR (EGLDisplay dpy, EGLImageKHR image); +#endif +#endif /* EGL_KHR_image */ + +#ifndef EGL_KHR_image_base +#define EGL_KHR_image_base 1 +#define EGL_IMAGE_PRESERVED_KHR 0x30D2 +#endif /* EGL_KHR_image_base */ + +#ifndef EGL_KHR_image_pixmap +#define EGL_KHR_image_pixmap 1 +#endif /* EGL_KHR_image_pixmap */ + +#ifndef EGL_KHR_lock_surface +#define EGL_KHR_lock_surface 1 +#define EGL_READ_SURFACE_BIT_KHR 0x0001 +#define EGL_WRITE_SURFACE_BIT_KHR 0x0002 +#define EGL_LOCK_SURFACE_BIT_KHR 0x0080 +#define EGL_OPTIMAL_FORMAT_BIT_KHR 0x0100 +#define EGL_MATCH_FORMAT_KHR 0x3043 +#define EGL_FORMAT_RGB_565_EXACT_KHR 0x30C0 +#define EGL_FORMAT_RGB_565_KHR 0x30C1 +#define EGL_FORMAT_RGBA_8888_EXACT_KHR 0x30C2 +#define EGL_FORMAT_RGBA_8888_KHR 0x30C3 +#define EGL_MAP_PRESERVE_PIXELS_KHR 0x30C4 +#define EGL_LOCK_USAGE_HINT_KHR 0x30C5 +#define EGL_BITMAP_POINTER_KHR 0x30C6 +#define EGL_BITMAP_PITCH_KHR 0x30C7 +#define EGL_BITMAP_ORIGIN_KHR 0x30C8 +#define EGL_BITMAP_PIXEL_RED_OFFSET_KHR 0x30C9 +#define EGL_BITMAP_PIXEL_GREEN_OFFSET_KHR 0x30CA +#define EGL_BITMAP_PIXEL_BLUE_OFFSET_KHR 0x30CB +#define EGL_BITMAP_PIXEL_ALPHA_OFFSET_KHR 0x30CC +#define EGL_BITMAP_PIXEL_LUMINANCE_OFFSET_KHR 0x30CD +#define EGL_LOWER_LEFT_KHR 0x30CE +#define EGL_UPPER_LEFT_KHR 0x30CF +typedef EGLBoolean (EGLAPIENTRYP PFNEGLLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLUNLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglLockSurfaceKHR (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglUnlockSurfaceKHR (EGLDisplay dpy, EGLSurface surface); +#endif +#endif /* EGL_KHR_lock_surface */ + +#ifndef EGL_KHR_lock_surface2 +#define EGL_KHR_lock_surface2 1 +#define EGL_BITMAP_PIXEL_SIZE_KHR 0x3110 +#endif /* EGL_KHR_lock_surface2 */ + +#ifndef EGL_KHR_lock_surface3 +#define EGL_KHR_lock_surface3 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACE64KHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface64KHR (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value); +#endif +#endif /* EGL_KHR_lock_surface3 */ + +#ifndef EGL_KHR_reusable_sync +#define EGL_KHR_reusable_sync 1 +typedef khronos_utime_nanoseconds_t EGLTimeKHR; +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_SYNC_STATUS_KHR 0x30F1 +#define EGL_SIGNALED_KHR 0x30F2 +#define EGL_UNSIGNALED_KHR 0x30F3 +#define EGL_TIMEOUT_EXPIRED_KHR 0x30F5 +#define EGL_CONDITION_SATISFIED_KHR 0x30F6 +#define EGL_SYNC_TYPE_KHR 0x30F7 +#define EGL_SYNC_REUSABLE_KHR 0x30FA +#define EGL_SYNC_FLUSH_COMMANDS_BIT_KHR 0x0001 +#define EGL_FOREVER_KHR 0xFFFFFFFFFFFFFFFFull +#define EGL_NO_SYNC_KHR ((EGLSyncKHR)0) +typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNCKHRPROC) (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync); +typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncKHR (EGLDisplay dpy, EGLSyncKHR sync); +EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout); +EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode); +EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_KHR_reusable_sync */ + +#ifndef EGL_KHR_stream +#define EGL_KHR_stream 1 +typedef void *EGLStreamKHR; +typedef khronos_uint64_t EGLuint64KHR; +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_NO_STREAM_KHR ((EGLStreamKHR)0) +#define EGL_CONSUMER_LATENCY_USEC_KHR 0x3210 +#define EGL_PRODUCER_FRAME_KHR 0x3212 +#define EGL_CONSUMER_FRAME_KHR 0x3213 +#define EGL_STREAM_STATE_KHR 0x3214 +#define EGL_STREAM_STATE_CREATED_KHR 0x3215 +#define EGL_STREAM_STATE_CONNECTING_KHR 0x3216 +#define EGL_STREAM_STATE_EMPTY_KHR 0x3217 +#define EGL_STREAM_STATE_NEW_FRAME_AVAILABLE_KHR 0x3218 +#define EGL_STREAM_STATE_OLD_FRAME_AVAILABLE_KHR 0x3219 +#define EGL_STREAM_STATE_DISCONNECTED_KHR 0x321A +#define EGL_BAD_STREAM_KHR 0x321B +#define EGL_BAD_STATE_KHR 0x321C +typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMKHRPROC) (EGLDisplay dpy, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMATTRIBKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMU64KHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamKHR (EGLDisplay dpy, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyStreamKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLBoolean EGLAPIENTRY eglStreamAttribKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamu64KHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_KHR_stream */ + +#ifndef EGL_KHR_stream_consumer_gltexture +#define EGL_KHR_stream_consumer_gltexture 1 +#ifdef EGL_KHR_stream +#define EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR 0x321E +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERACQUIREKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERRELEASEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerGLTextureExternalKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerAcquireKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerReleaseKHR (EGLDisplay dpy, EGLStreamKHR stream); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_consumer_gltexture */ + +#ifndef EGL_KHR_stream_cross_process_fd +#define EGL_KHR_stream_cross_process_fd 1 +typedef int EGLNativeFileDescriptorKHR; +#ifdef EGL_KHR_stream +#define EGL_NO_FILE_DESCRIPTOR_KHR ((EGLNativeFileDescriptorKHR)(-1)) +typedef EGLNativeFileDescriptorKHR (EGLAPIENTRYP PFNEGLGETSTREAMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMFROMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLNativeFileDescriptorKHR EGLAPIENTRY eglGetStreamFileDescriptorKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamFromFileDescriptorKHR (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_cross_process_fd */ + +#ifndef EGL_KHR_stream_fifo +#define EGL_KHR_stream_fifo 1 +#ifdef EGL_KHR_stream +#define EGL_STREAM_FIFO_LENGTH_KHR 0x31FC +#define EGL_STREAM_TIME_NOW_KHR 0x31FD +#define EGL_STREAM_TIME_CONSUMER_KHR 0x31FE +#define EGL_STREAM_TIME_PRODUCER_KHR 0x31FF +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMTIMEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamTimeKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_fifo */ + +#ifndef EGL_KHR_stream_producer_aldatalocator +#define EGL_KHR_stream_producer_aldatalocator 1 +#ifdef EGL_KHR_stream +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_producer_aldatalocator */ + +#ifndef EGL_KHR_stream_producer_eglsurface +#define EGL_KHR_stream_producer_eglsurface 1 +#ifdef EGL_KHR_stream +#define EGL_STREAM_BIT_KHR 0x0800 +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATESTREAMPRODUCERSURFACEKHRPROC) (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSurface EGLAPIENTRY eglCreateStreamProducerSurfaceKHR (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_producer_eglsurface */ + +#ifndef EGL_KHR_surfaceless_context +#define EGL_KHR_surfaceless_context 1 +#endif /* EGL_KHR_surfaceless_context */ + +#ifndef EGL_KHR_vg_parent_image +#define EGL_KHR_vg_parent_image 1 +#define EGL_VG_PARENT_IMAGE_KHR 0x30BA +#endif /* EGL_KHR_vg_parent_image */ + +#ifndef EGL_KHR_wait_sync +#define EGL_KHR_wait_sync 1 +typedef EGLint (EGLAPIENTRYP PFNEGLWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLint EGLAPIENTRY eglWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags); +#endif +#endif /* EGL_KHR_wait_sync */ + +#ifndef EGL_ANDROID_blob_cache +#define EGL_ANDROID_blob_cache 1 +typedef khronos_ssize_t EGLsizeiANDROID; +typedef void (*EGLSetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, const void *value, EGLsizeiANDROID valueSize); +typedef EGLsizeiANDROID (*EGLGetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, void *value, EGLsizeiANDROID valueSize); +typedef void (EGLAPIENTRYP PFNEGLSETBLOBCACHEFUNCSANDROIDPROC) (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI void EGLAPIENTRY eglSetBlobCacheFuncsANDROID (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get); +#endif +#endif /* EGL_ANDROID_blob_cache */ + +#ifndef EGL_ANDROID_framebuffer_target +#define EGL_ANDROID_framebuffer_target 1 +#define EGL_FRAMEBUFFER_TARGET_ANDROID 0x3147 +#endif /* EGL_ANDROID_framebuffer_target */ + +#ifndef EGL_ANDROID_image_native_buffer +#define EGL_ANDROID_image_native_buffer 1 +#define EGL_NATIVE_BUFFER_ANDROID 0x3140 +#endif /* EGL_ANDROID_image_native_buffer */ + +#ifndef EGL_ANDROID_native_fence_sync +#define EGL_ANDROID_native_fence_sync 1 +#define EGL_SYNC_NATIVE_FENCE_ANDROID 0x3144 +#define EGL_SYNC_NATIVE_FENCE_FD_ANDROID 0x3145 +#define EGL_SYNC_NATIVE_FENCE_SIGNALED_ANDROID 0x3146 +#define EGL_NO_NATIVE_FENCE_FD_ANDROID -1 +typedef EGLint (EGLAPIENTRYP PFNEGLDUPNATIVEFENCEFDANDROIDPROC) (EGLDisplay dpy, EGLSyncKHR sync); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLint EGLAPIENTRY eglDupNativeFenceFDANDROID (EGLDisplay dpy, EGLSyncKHR sync); +#endif +#endif /* EGL_ANDROID_native_fence_sync */ + +#ifndef EGL_ANDROID_recordable +#define EGL_ANDROID_recordable 1 +#define EGL_RECORDABLE_ANDROID 0x3142 +#endif /* EGL_ANDROID_recordable */ + +#ifndef EGL_ANGLE_d3d_share_handle_client_buffer +#define EGL_ANGLE_d3d_share_handle_client_buffer 1 +#define EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE 0x3200 +#endif /* EGL_ANGLE_d3d_share_handle_client_buffer */ + +#ifndef EGL_ANGLE_query_surface_pointer +#define EGL_ANGLE_query_surface_pointer 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACEPOINTERANGLEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value); +#endif +#endif /* EGL_ANGLE_query_surface_pointer */ + +#ifndef EGL_ANGLE_surface_d3d_texture_2d_share_handle +#define EGL_ANGLE_surface_d3d_texture_2d_share_handle 1 +#endif /* EGL_ANGLE_surface_d3d_texture_2d_share_handle */ + +#ifndef EGL_ARM_pixmap_multisample_discard +#define EGL_ARM_pixmap_multisample_discard 1 +#define EGL_DISCARD_SAMPLES_ARM 0x3286 +#endif /* EGL_ARM_pixmap_multisample_discard */ + +#ifndef EGL_EXT_buffer_age +#define EGL_EXT_buffer_age 1 +#define EGL_BUFFER_AGE_EXT 0x313D +#endif /* EGL_EXT_buffer_age */ + +#ifndef EGL_EXT_client_extensions +#define EGL_EXT_client_extensions 1 +#endif /* EGL_EXT_client_extensions */ + +#ifndef EGL_EXT_create_context_robustness +#define EGL_EXT_create_context_robustness 1 +#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT 0x30BF +#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_EXT 0x3138 +#define EGL_NO_RESET_NOTIFICATION_EXT 0x31BE +#define EGL_LOSE_CONTEXT_ON_RESET_EXT 0x31BF +#endif /* EGL_EXT_create_context_robustness */ + +#ifndef EGL_EXT_image_dma_buf_import +#define EGL_EXT_image_dma_buf_import 1 +#define EGL_LINUX_DMA_BUF_EXT 0x3270 +#define EGL_LINUX_DRM_FOURCC_EXT 0x3271 +#define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272 +#define EGL_DMA_BUF_PLANE0_OFFSET_EXT 0x3273 +#define EGL_DMA_BUF_PLANE0_PITCH_EXT 0x3274 +#define EGL_DMA_BUF_PLANE1_FD_EXT 0x3275 +#define EGL_DMA_BUF_PLANE1_OFFSET_EXT 0x3276 +#define EGL_DMA_BUF_PLANE1_PITCH_EXT 0x3277 +#define EGL_DMA_BUF_PLANE2_FD_EXT 0x3278 +#define EGL_DMA_BUF_PLANE2_OFFSET_EXT 0x3279 +#define EGL_DMA_BUF_PLANE2_PITCH_EXT 0x327A +#define EGL_YUV_COLOR_SPACE_HINT_EXT 0x327B +#define EGL_SAMPLE_RANGE_HINT_EXT 0x327C +#define EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT 0x327D +#define EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT 0x327E +#define EGL_ITU_REC601_EXT 0x327F +#define EGL_ITU_REC709_EXT 0x3280 +#define EGL_ITU_REC2020_EXT 0x3281 +#define EGL_YUV_FULL_RANGE_EXT 0x3282 +#define EGL_YUV_NARROW_RANGE_EXT 0x3283 +#define EGL_YUV_CHROMA_SITING_0_EXT 0x3284 +#define EGL_YUV_CHROMA_SITING_0_5_EXT 0x3285 +#endif /* EGL_EXT_image_dma_buf_import */ + +#ifndef EGL_EXT_multiview_window +#define EGL_EXT_multiview_window 1 +#define EGL_MULTIVIEW_VIEW_COUNT_EXT 0x3134 +#endif /* EGL_EXT_multiview_window */ + +#ifndef EGL_EXT_platform_base +#define EGL_EXT_platform_base 1 +typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETPLATFORMDISPLAYEXTPROC) (EGLenum platform, void *native_display, const EGLint *attrib_list); +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list); +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplayEXT (EGLenum platform, void *native_display, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list); +#endif +#endif /* EGL_EXT_platform_base */ + +#ifndef EGL_EXT_platform_wayland +#define EGL_EXT_platform_wayland 1 +#define EGL_PLATFORM_WAYLAND_EXT 0x31D8 +#endif /* EGL_EXT_platform_wayland */ + +#ifndef EGL_EXT_platform_x11 +#define EGL_EXT_platform_x11 1 +#define EGL_PLATFORM_X11_EXT 0x31D5 +#define EGL_PLATFORM_X11_SCREEN_EXT 0x31D6 +#endif /* EGL_EXT_platform_x11 */ + +#ifndef EGL_EXT_swap_buffers_with_damage +#define EGL_EXT_swap_buffers_with_damage 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEEXTPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageEXT (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); +#endif +#endif /* EGL_EXT_swap_buffers_with_damage */ + +#ifndef EGL_HI_clientpixmap +#define EGL_HI_clientpixmap 1 +struct EGLClientPixmapHI { + void *pData; + EGLint iWidth; + EGLint iHeight; + EGLint iStride; +}; +#define EGL_CLIENT_PIXMAP_POINTER_HI 0x8F74 +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPIXMAPSURFACEHIPROC) (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurfaceHI (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap); +#endif +#endif /* EGL_HI_clientpixmap */ + +#ifndef EGL_HI_colorformats +#define EGL_HI_colorformats 1 +#define EGL_COLOR_FORMAT_HI 0x8F70 +#define EGL_COLOR_RGB_HI 0x8F71 +#define EGL_COLOR_RGBA_HI 0x8F72 +#define EGL_COLOR_ARGB_HI 0x8F73 +#endif /* EGL_HI_colorformats */ + +#ifndef EGL_IMG_context_priority +#define EGL_IMG_context_priority 1 +#define EGL_CONTEXT_PRIORITY_LEVEL_IMG 0x3100 +#define EGL_CONTEXT_PRIORITY_HIGH_IMG 0x3101 +#define EGL_CONTEXT_PRIORITY_MEDIUM_IMG 0x3102 +#define EGL_CONTEXT_PRIORITY_LOW_IMG 0x3103 +#endif /* EGL_IMG_context_priority */ + +#ifndef EGL_MESA_drm_image +#define EGL_MESA_drm_image 1 +#define EGL_DRM_BUFFER_FORMAT_MESA 0x31D0 +#define EGL_DRM_BUFFER_USE_MESA 0x31D1 +#define EGL_DRM_BUFFER_FORMAT_ARGB32_MESA 0x31D2 +#define EGL_DRM_BUFFER_MESA 0x31D3 +#define EGL_DRM_BUFFER_STRIDE_MESA 0x31D4 +#define EGL_DRM_BUFFER_USE_SCANOUT_MESA 0x00000001 +#define EGL_DRM_BUFFER_USE_SHARE_MESA 0x00000002 +typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEDRMIMAGEMESAPROC) (EGLDisplay dpy, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDRMIMAGEMESAPROC) (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLImageKHR EGLAPIENTRY eglCreateDRMImageMESA (EGLDisplay dpy, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglExportDRMImageMESA (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride); +#endif +#endif /* EGL_MESA_drm_image */ + +#ifndef EGL_MESA_platform_gbm +#define EGL_MESA_platform_gbm 1 +#define EGL_PLATFORM_GBM_MESA 0x31D7 +#endif /* EGL_MESA_platform_gbm */ + +#ifndef EGL_NV_3dvision_surface +#define EGL_NV_3dvision_surface 1 +#define EGL_AUTO_STEREO_NV 0x3136 +#endif /* EGL_NV_3dvision_surface */ + +#ifndef EGL_NV_coverage_sample +#define EGL_NV_coverage_sample 1 +#define EGL_COVERAGE_BUFFERS_NV 0x30E0 +#define EGL_COVERAGE_SAMPLES_NV 0x30E1 +#endif /* EGL_NV_coverage_sample */ + +#ifndef EGL_NV_coverage_sample_resolve +#define EGL_NV_coverage_sample_resolve 1 +#define EGL_COVERAGE_SAMPLE_RESOLVE_NV 0x3131 +#define EGL_COVERAGE_SAMPLE_RESOLVE_DEFAULT_NV 0x3132 +#define EGL_COVERAGE_SAMPLE_RESOLVE_NONE_NV 0x3133 +#endif /* EGL_NV_coverage_sample_resolve */ + +#ifndef EGL_NV_depth_nonlinear +#define EGL_NV_depth_nonlinear 1 +#define EGL_DEPTH_ENCODING_NV 0x30E2 +#define EGL_DEPTH_ENCODING_NONE_NV 0 +#define EGL_DEPTH_ENCODING_NONLINEAR_NV 0x30E3 +#endif /* EGL_NV_depth_nonlinear */ + +#ifndef EGL_NV_native_query +#define EGL_NV_native_query 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEDISPLAYNVPROC) (EGLDisplay dpy, EGLNativeDisplayType *display_id); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEWINDOWNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEPIXMAPNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeDisplayNV (EGLDisplay dpy, EGLNativeDisplayType *display_id); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeWindowNV (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativePixmapNV (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap); +#endif +#endif /* EGL_NV_native_query */ + +#ifndef EGL_NV_post_convert_rounding +#define EGL_NV_post_convert_rounding 1 +#endif /* EGL_NV_post_convert_rounding */ + +#ifndef EGL_NV_post_sub_buffer +#define EGL_NV_post_sub_buffer 1 +#define EGL_POST_SUB_BUFFER_SUPPORTED_NV 0x30BE +typedef EGLBoolean (EGLAPIENTRYP PFNEGLPOSTSUBBUFFERNVPROC) (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglPostSubBufferNV (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height); +#endif +#endif /* EGL_NV_post_sub_buffer */ + +#ifndef EGL_NV_stream_sync +#define EGL_NV_stream_sync 1 +#define EGL_SYNC_NEW_FRAME_NV 0x321F +typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESTREAMSYNCNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateStreamSyncNV (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list); +#endif +#endif /* EGL_NV_stream_sync */ + +#ifndef EGL_NV_sync +#define EGL_NV_sync 1 +typedef void *EGLSyncNV; +typedef khronos_utime_nanoseconds_t EGLTimeNV; +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_NV 0x30E6 +#define EGL_SYNC_STATUS_NV 0x30E7 +#define EGL_SIGNALED_NV 0x30E8 +#define EGL_UNSIGNALED_NV 0x30E9 +#define EGL_SYNC_FLUSH_COMMANDS_BIT_NV 0x0001 +#define EGL_FOREVER_NV 0xFFFFFFFFFFFFFFFFull +#define EGL_ALREADY_SIGNALED_NV 0x30EA +#define EGL_TIMEOUT_EXPIRED_NV 0x30EB +#define EGL_CONDITION_SATISFIED_NV 0x30EC +#define EGL_SYNC_TYPE_NV 0x30ED +#define EGL_SYNC_CONDITION_NV 0x30EE +#define EGL_SYNC_FENCE_NV 0x30EF +#define EGL_NO_SYNC_NV ((EGLSyncNV)0) +typedef EGLSyncNV (EGLAPIENTRYP PFNEGLCREATEFENCESYNCNVPROC) (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCNVPROC) (EGLSyncNV sync); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLFENCENVPROC) (EGLSyncNV sync); +typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCNVPROC) (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCNVPROC) (EGLSyncNV sync, EGLenum mode); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBNVPROC) (EGLSyncNV sync, EGLint attribute, EGLint *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncNV EGLAPIENTRY eglCreateFenceSyncNV (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncNV (EGLSyncNV sync); +EGLAPI EGLBoolean EGLAPIENTRY eglFenceNV (EGLSyncNV sync); +EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncNV (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout); +EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncNV (EGLSyncNV sync, EGLenum mode); +EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribNV (EGLSyncNV sync, EGLint attribute, EGLint *value); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_NV_sync */ + +#ifndef EGL_NV_system_time +#define EGL_NV_system_time 1 +typedef khronos_utime_nanoseconds_t EGLuint64NV; +#ifdef KHRONOS_SUPPORT_INT64 +typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMEFREQUENCYNVPROC) (void); +typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMENVPROC) (void); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeFrequencyNV (void); +EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeNV (void); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_NV_system_time */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/projects/hip/amdocl/EGL/eglplatform.h b/projects/hip/amdocl/EGL/eglplatform.h new file mode 100644 index 0000000000..3ab8844f09 --- /dev/null +++ b/projects/hip/amdocl/EGL/eglplatform.h @@ -0,0 +1,125 @@ +#ifndef __eglplatform_h_ +#define __eglplatform_h_ + +/* +** Copyright (c) 2007-2013 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Platform-specific types and definitions for egl.h + * $Revision: 23432 $ on $Date: 2013-10-09 00:57:24 -0700 (Wed, 09 Oct 2013) $ + * + * Adopters may modify khrplatform.h and this file to suit their platform. + * You are encouraged to submit all modifications to the Khronos group so that + * they can be included in future versions of this file. Please submit changes + * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla) + * by filing a bug against product "EGL" component "Registry". + */ + +#include + +/* Macros used in EGL function prototype declarations. + * + * EGL functions should be prototyped as: + * + * EGLAPI return-type EGLAPIENTRY eglFunction(arguments); + * typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments); + * + * KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h + */ + +#ifndef EGLAPI +#define EGLAPI KHRONOS_APICALL +#endif + +#ifndef EGLAPIENTRY +#define EGLAPIENTRY KHRONOS_APIENTRY +#endif +#define EGLAPIENTRYP EGLAPIENTRY* + +/* The types NativeDisplayType, NativeWindowType, and NativePixmapType + * are aliases of window-system-dependent types, such as X Display * or + * Windows Device Context. They must be defined in platform-specific + * code below. The EGL-prefixed versions of Native*Type are the same + * types, renamed in EGL 1.3 so all types in the API start with "EGL". + * + * Khronos STRONGLY RECOMMENDS that you use the default definitions + * provided below, since these changes affect both binary and source + * portability of applications using EGL running on different EGL + * implementations. + */ + +#if defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include + +typedef HDC EGLNativeDisplayType; +typedef HBITMAP EGLNativePixmapType; +typedef HWND EGLNativeWindowType; + +#elif defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */ + +typedef int EGLNativeDisplayType; +typedef void *EGLNativeWindowType; +typedef void *EGLNativePixmapType; + +#elif defined(__ANDROID__) || defined(ANDROID) + +#include + +struct egl_native_pixmap_t; + +typedef struct ANativeWindow* EGLNativeWindowType; +typedef struct egl_native_pixmap_t* EGLNativePixmapType; +typedef void* EGLNativeDisplayType; + +#elif defined(__unix__) + +/* X11 (tentative) */ +#include +#include + +typedef Display *EGLNativeDisplayType; +typedef Pixmap EGLNativePixmapType; +typedef Window EGLNativeWindowType; + +#else +#error "Platform not recognized" +#endif + +/* EGL 1.2 types, renamed for consistency in EGL 1.3 */ +typedef EGLNativeDisplayType NativeDisplayType; +typedef EGLNativePixmapType NativePixmapType; +typedef EGLNativeWindowType NativeWindowType; + + +/* Define EGLint. This must be a signed integral type large enough to contain + * all legal attribute names and values passed into and out of EGL, whether + * their type is boolean, bitmask, enumerant (symbolic constant), integer, + * handle, or other. While in general a 32-bit integer will suffice, if + * handles are 64 bit types, then EGLint should be defined as a signed 64-bit + * integer type. + */ +typedef khronos_int32_t EGLint; + +#endif /* __eglplatform_h */ diff --git a/projects/hip/amdocl/KHR/khrplatform.h b/projects/hip/amdocl/KHR/khrplatform.h new file mode 100644 index 0000000000..c9e6f17d34 --- /dev/null +++ b/projects/hip/amdocl/KHR/khrplatform.h @@ -0,0 +1,282 @@ +#ifndef __khrplatform_h_ +#define __khrplatform_h_ + +/* +** Copyright (c) 2008-2009 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Khronos platform-specific types and definitions. + * + * $Revision: 23298 $ on $Date: 2013-09-30 17:07:13 -0700 (Mon, 30 Sep 2013) $ + * + * Adopters may modify this file to suit their platform. Adopters are + * encouraged to submit platform specific modifications to the Khronos + * group so that they can be included in future versions of this file. + * Please submit changes by sending them to the public Khronos Bugzilla + * (http://khronos.org/bugzilla) by filing a bug against product + * "Khronos (general)" component "Registry". + * + * A predefined template which fills in some of the bug fields can be + * reached using http://tinyurl.com/khrplatform-h-bugreport, but you + * must create a Bugzilla login first. + * + * + * See the Implementer's Guidelines for information about where this file + * should be located on your system and for more details of its use: + * http://www.khronos.org/registry/implementers_guide.pdf + * + * This file should be included as + * #include + * by Khronos client API header files that use its types and defines. + * + * The types in khrplatform.h should only be used to define API-specific types. + * + * Types defined in khrplatform.h: + * khronos_int8_t signed 8 bit + * khronos_uint8_t unsigned 8 bit + * khronos_int16_t signed 16 bit + * khronos_uint16_t unsigned 16 bit + * khronos_int32_t signed 32 bit + * khronos_uint32_t unsigned 32 bit + * khronos_int64_t signed 64 bit + * khronos_uint64_t unsigned 64 bit + * khronos_intptr_t signed same number of bits as a pointer + * khronos_uintptr_t unsigned same number of bits as a pointer + * khronos_ssize_t signed size + * khronos_usize_t unsigned size + * khronos_float_t signed 32 bit floating point + * khronos_time_ns_t unsigned 64 bit time in nanoseconds + * khronos_utime_nanoseconds_t unsigned time interval or absolute time in + * nanoseconds + * khronos_stime_nanoseconds_t signed time interval in nanoseconds + * khronos_boolean_enum_t enumerated boolean type. This should + * only be used as a base type when a client API's boolean type is + * an enum. Client APIs which use an integer or other type for + * booleans cannot use this as the base type for their boolean. + * + * Tokens defined in khrplatform.h: + * + * KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values. + * + * KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0. + * KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0. + * + * Calling convention macros defined in this file: + * KHRONOS_APICALL + * KHRONOS_APIENTRY + * KHRONOS_APIATTRIBUTES + * + * These may be used in function prototypes as: + * + * KHRONOS_APICALL void KHRONOS_APIENTRY funcname( + * int arg1, + * int arg2) KHRONOS_APIATTRIBUTES; + */ + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APICALL + *------------------------------------------------------------------------- + * This precedes the return type of the function in the function prototype. + */ +#if defined(_WIN32) && !defined(__SCITECH_SNAP__) +# define KHRONOS_APICALL __declspec(dllimport) +#elif defined (__SYMBIAN32__) +# define KHRONOS_APICALL IMPORT_C +#else +# define KHRONOS_APICALL +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIENTRY + *------------------------------------------------------------------------- + * This follows the return type of the function and precedes the function + * name in the function prototype. + */ +#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__) + /* Win32 but not WinCE */ +# define KHRONOS_APIENTRY __stdcall +#else +# define KHRONOS_APIENTRY +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIATTRIBUTES + *------------------------------------------------------------------------- + * This follows the closing parenthesis of the function prototype arguments. + */ +#if defined (__ARMCC_2__) +#define KHRONOS_APIATTRIBUTES __softfp +#else +#define KHRONOS_APIATTRIBUTES +#endif + +/*------------------------------------------------------------------------- + * basic type definitions + *-----------------------------------------------------------------------*/ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__) + + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__VMS ) || defined(__sgi) + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(_WIN32) && !defined(__SCITECH_SNAP__) + +/* + * Win32 + */ +typedef __int32 khronos_int32_t; +typedef unsigned __int32 khronos_uint32_t; +typedef __int64 khronos_int64_t; +typedef unsigned __int64 khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__sun__) || defined(__digital__) + +/* + * Sun or Digital + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#if defined(__arch64__) || defined(_LP64) +typedef long int khronos_int64_t; +typedef unsigned long int khronos_uint64_t; +#else +typedef long long int khronos_int64_t; +typedef unsigned long long int khronos_uint64_t; +#endif /* __arch64__ */ +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif 0 + +/* + * Hypothetical platform with no float or int64 support + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#define KHRONOS_SUPPORT_INT64 0 +#define KHRONOS_SUPPORT_FLOAT 0 + +#else + +/* + * Generic fallback + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#endif + + +/* + * Types that are (so far) the same on all platforms + */ +typedef signed char khronos_int8_t; +typedef unsigned char khronos_uint8_t; +typedef signed short int khronos_int16_t; +typedef unsigned short int khronos_uint16_t; + +/* + * Types that differ between LLP64 and LP64 architectures - in LLP64, + * pointers are 64 bits, but 'long' is still 32 bits. Win64 appears + * to be the only LLP64 architecture in current use. + */ +#ifdef _WIN64 +typedef signed long long int khronos_intptr_t; +typedef unsigned long long int khronos_uintptr_t; +typedef signed long long int khronos_ssize_t; +typedef unsigned long long int khronos_usize_t; +#else +typedef signed long int khronos_intptr_t; +typedef unsigned long int khronos_uintptr_t; +typedef signed long int khronos_ssize_t; +typedef unsigned long int khronos_usize_t; +#endif + +#if KHRONOS_SUPPORT_FLOAT +/* + * Float type + */ +typedef float khronos_float_t; +#endif + +#if KHRONOS_SUPPORT_INT64 +/* Time types + * + * These types can be used to represent a time interval in nanoseconds or + * an absolute Unadjusted System Time. Unadjusted System Time is the number + * of nanoseconds since some arbitrary system event (e.g. since the last + * time the system booted). The Unadjusted System Time is an unsigned + * 64 bit value that wraps back to 0 every 584 years. Time intervals + * may be either signed or unsigned. + */ +typedef khronos_uint64_t khronos_utime_nanoseconds_t; +typedef khronos_int64_t khronos_stime_nanoseconds_t; +#endif + +/* + * Dummy value used to pad enum types to 32 bits. + */ +#ifndef KHRONOS_MAX_ENUM +#define KHRONOS_MAX_ENUM 0x7FFFFFFF +#endif + +/* + * Enumerated boolean type + * + * Values other than zero should be considered to be true. Therefore + * comparisons should not be made against KHRONOS_TRUE. + */ +typedef enum { + KHRONOS_FALSE = 0, + KHRONOS_TRUE = 1, + KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM +} khronos_boolean_enum_t; + +#endif /* __khrplatform_h_ */ diff --git a/projects/hip/amdocl/cl_common.hpp b/projects/hip/amdocl/cl_common.hpp new file mode 100644 index 0000000000..a88a06f498 --- /dev/null +++ b/projects/hip/amdocl/cl_common.hpp @@ -0,0 +1,301 @@ +/* Copyright (c) 2008-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef CL_COMMON_HPP_ +#define CL_COMMON_HPP_ + +#include "top.hpp" +#include "platform/runtime.hpp" +#include "platform/command.hpp" +#include "platform/memory.hpp" +#include "thread/thread.hpp" +#include "platform/commandqueue.hpp" + +#include +#include + +//! \cond ignore +namespace amd { + +template +class NotNullWrapper +{ +private: + T* const ptrOrNull_; + +protected: + explicit NotNullWrapper(T* ptrOrNull) + : ptrOrNull_(ptrOrNull) + { } + +public: + void operator = (T value) const + { + if (ptrOrNull_ != NULL) { + *ptrOrNull_ = value; + } + } +}; + +template +class NotNullReference : protected NotNullWrapper +{ +public: + explicit NotNullReference(T* ptrOrNull) + : NotNullWrapper(ptrOrNull) + { } + + const NotNullWrapper& operator * () const { return *this; } +}; + +} // namespace amd + +template +inline amd::NotNullReference +not_null(T* ptrOrNull) +{ + return amd::NotNullReference(ptrOrNull); +} + +#define CL_CHECK_THREAD(thread) \ + (thread != NULL || ((thread = new amd::HostThread()) != NULL \ + && thread == amd::Thread::current())) + +#define RUNTIME_ENTRY_RET(ret, func, args) \ +CL_API_ENTRY ret CL_API_CALL \ +func args \ +{ \ + amd::Thread* thread = amd::Thread::current(); \ + if (!CL_CHECK_THREAD(thread)) { \ + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; \ + return (ret) 0; \ + } + +#define RUNTIME_ENTRY_RET_NOERRCODE(ret, func, args) \ +CL_API_ENTRY ret CL_API_CALL \ +func args \ +{ \ + amd::Thread* thread = amd::Thread::current(); \ + if (!CL_CHECK_THREAD(thread)) { \ + return (ret) 0; \ + } + +#define RUNTIME_ENTRY(ret, func, args) \ +CL_API_ENTRY ret CL_API_CALL \ +func args \ +{ \ + amd::Thread* thread = amd::Thread::current(); \ + if (!CL_CHECK_THREAD(thread)) { \ + return CL_OUT_OF_HOST_MEMORY; \ + } + +#define RUNTIME_ENTRY_VOID(ret, func, args) \ +CL_API_ENTRY ret CL_API_CALL \ +func args \ +{ \ + amd::Thread* thread = amd::Thread::current(); \ + if (!CL_CHECK_THREAD(thread)) { \ + return; \ + } + +#define RUNTIME_EXIT \ + /* FIXME_lmoriche: we should check to thread->lastError here! */ \ +} + +//! Helper function to check "properties" parameter in various functions +int checkContextProperties( + const cl_context_properties *properties, + bool* offlineDevices); + +namespace amd { + +namespace detail { + +template +struct ParamInfo +{ + static inline std::pair get(const T& param) { + return std::pair(¶m, sizeof(T)); + } +}; + +template <> +struct ParamInfo +{ + static inline std::pair get(const char* param) { + return std::pair(param, strlen(param) + 1); + } +}; + +template +struct ParamInfo +{ + static inline std::pair get(const char* param) { + return std::pair(param, strlen(param) + 1); + } +}; + +} // namespace detail + +template +static inline cl_int +clGetInfo( + T& field, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) +{ + const void *valuePtr; + size_t valueSize; + + std::tie(valuePtr, valueSize) + = detail::ParamInfo::type>::get(field); + + *not_null(param_value_size_ret) = valueSize; + + cl_int ret = CL_SUCCESS; + if (param_value != NULL && param_value_size < valueSize) { + if (!std::is_pointer() || !std::is_same::type>::type, char>()) { + return CL_INVALID_VALUE; + } + // For char* and char[] params, we will at least fill up to + // param_value_size, then return an error. + valueSize = param_value_size; + static_cast(param_value)[--valueSize] = '\0'; + ret = CL_INVALID_VALUE; + } + + if (param_value != NULL) { + ::memcpy(param_value, valuePtr, valueSize); + if (param_value_size > valueSize) { + ::memset(static_cast
(param_value) + valueSize, + '\0', param_value_size - valueSize); + } + } + + return ret; +} + +static inline cl_int +clSetEventWaitList( + Command::EventWaitList& eventWaitList, + const amd::HostQueue& hostQueue, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list) +{ + if ((num_events_in_wait_list == 0 && event_wait_list != NULL) + || (num_events_in_wait_list != 0 && event_wait_list == NULL)) { + return CL_INVALID_EVENT_WAIT_LIST; + } + + while (num_events_in_wait_list-- > 0) { + cl_event event = *event_wait_list++; + Event* amdEvent = as_amd(event); + if (!is_valid(event)) { + return CL_INVALID_EVENT_WAIT_LIST; + } + if (&hostQueue.context() != &amdEvent->context()) { + return CL_INVALID_CONTEXT; + } + if ((amdEvent->command().queue() != &hostQueue) && !amdEvent->notifyCmdQueue()) { + return CL_INVALID_EVENT_WAIT_LIST; + } + eventWaitList.push_back(amdEvent); + } + return CL_SUCCESS; +} + +//! Common function declarations for CL-external graphics API interop +cl_int clEnqueueAcquireExtObjectsAMD(cl_command_queue command_queue, + cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event, cl_command_type cmd_type); +cl_int clEnqueueReleaseExtObjectsAMD(cl_command_queue command_queue, + cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event, cl_command_type cmd_type); + +// This may need moving somewhere tidier... + +struct PlatformIDS { const struct KHRicdVendorDispatchRec* dispatch_; }; +class PlatformID { +public: + static PlatformIDS Platform; +}; +#define AMD_PLATFORM (reinterpret_cast(&amd::PlatformID::Platform)) + +} // namespace amd + +extern "C" { + +extern CL_API_ENTRY cl_key_amd CL_API_CALL +clCreateKeyAMD( + cl_platform_id platform, + void (CL_CALLBACK * destructor)( void * ), + cl_int * errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL +clObjectGetValueForKeyAMD( + void * object, + cl_key_amd key, + void ** ret_val); + +extern CL_API_ENTRY cl_int CL_API_CALL +clObjectSetValueForKeyAMD( + void * object, + cl_key_amd key, + void * value); + +#if defined(CL_VERSION_1_1) +extern CL_API_ENTRY cl_int CL_API_CALL +clSetCommandQueueProperty( + cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties *old_properties) CL_API_SUFFIX__VERSION_1_0; +#endif // CL_VERSION_1_1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clConvertImageAMD( + cl_context context, + cl_mem image, + const cl_image_format * image_format, + cl_int * errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBufferFromImageAMD( + cl_context context, + cl_mem image, + cl_int * errcode_ret); + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithAssemblyAMD( + cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret); + +} // extern "C" + +//! \endcond + +#endif /*CL_COMMON_HPP_*/ diff --git a/projects/hip/amdocl/cl_debugger_amd.h b/projects/hip/amdocl/cl_debugger_amd.h new file mode 100644 index 0000000000..1e9fe29e3a --- /dev/null +++ b/projects/hip/amdocl/cl_debugger_amd.h @@ -0,0 +1,694 @@ +/* Copyright (c) 2014-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef __CL_DEBUGGER_AMD_H +#define __CL_DEBUGGER_AMD_H + +#ifdef __APPLE__ +#include +#else +#include +#endif + +/****************************************** +* Private AMD extension cl_dbg * +******************************************/ +#ifdef __cplusplus +extern "C" { +#endif /*__cplusplus*/ + +#define CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD -80 +#define CL_DEBUGGER_REGISTER_FAILURE_AMD -81 +#define CL_TRAP_HANDLER_NOT_DEFINED_AMD -82 +#define CL_EVENT_TIMEOUT_AMD -83 + + +typedef uintptr_t cl_dbg_event_amd; //! debug event + +/*! \brief Trap Handler Type + * + * The trap handler for each support type. + */ +enum cl_dbg_trap_type_amd { + CL_DBG_DEBUG_TRAP = 0, //! HW debug + CL_DBG_MAX_TRAP +}; + +/*! \brief Wave actions used to control the wave execution on the hardware + * + * The wave action enumerations are used to specify the desired + * behavior when calling the wave control function. Overall, there are + * five types of operations that can be specified. + */ +enum cl_dbg_waves_action_amd { + CL_DBG_WAVES_DONT_USE_ZERO = 0, //! NOT USED + CL_DBG_WAVES_HALT = 1, //! halt wave + CL_DBG_WAVES_RESUME = 2, //! resume wave + CL_DBG_WAVES_KILL = 3, //! kill wave + CL_DBG_WAVES_DEBUG = 4, //! debug wave + CL_DBG_WAVES_TRAP = 5, //! trap + CL_DBG_WAVES_MAX +}; + +/*! \brief Host actions when encountering an exception in the kernel. + * + * The host action enumeration is used to specify the desired host + * response in the event thatn a device kernel exception is encountered. + */ +enum cl_dbg_host_action_amd { + CL_DBG_HOST_IGNORE = 1, //! ignore the kernel exception + CL_DBG_HOST_EXIT = 2, //! exit the host application on a kernel exception + CL_DBG_HOST_NOTIFY = 4 //! report the kernel exception +}; + +/*! \brief Mode of the wave action when calling the wave control function + * + * The wave mode enumerations are used to specify the desired + * broadcast level when calling the wave control function. + */ +enum cl_dbg_wave_mode_amd { + CL_DBG_WAVEMODE_SINGLE = 0, //! send command to single wave + CL_DBG_WAVEMODE_BROADCAST = 2, //! send command to wave with match VMID + CL_DBG_WAVEMODE_BROADCAST_CU = 3, //! send command to wave with match VMID with specific CU + CL_DBG_WAVEMODE_MAX +}; + +/*! \brief Enumeration of address watch mode + * + * This enumeration indicates the different modes of address watch. + */ +enum cl_dbg_address_watch_mode_amd { + CL_DBG_ADDR_WATCH_MODE_READ = 0, //! Read operations only + CL_DBG_ADDR_WATCH_MODE_NONREAD = 1, //! Write or Atomic operations only + CL_DBG_ADDR_WATCH_MODE_ATOMIC = 2, //! Atomic Operations only + CL_DBG_ADDR_WATCH_MODE_ALL = 3, //! Read, Write or Atomic operations + CL_DBG_ADDR_WATCH_MODE_MAX //! Number of address watch modes +}; + +/*! \brief Dispatch exception policy descriptor + * + * The dispatch exception policy descriptor is used to define the + * expected exception policy in the event an exception is encountered + * on the associated dispatch. + */ +typedef struct _cl_dbg_exception_policy_amd { + cl_uint exceptionMask; //! exception mask + cl_dbg_waves_action_amd waveAction; //! wave action + cl_dbg_host_action_amd hostAction; //! host action + cl_dbg_wave_mode_amd waveMode; //! wave mode +} cl_dbg_exception_policy_amd; + +/*! \brief Kernel execution mode + * + * This structure is used to control the kernel execution mode. The + * following aspects are included in this structure: + * 1. Regular execution or debug mode (0: regular execution (default), + * 1: debug mode) + * 2. SQ debugger mode on/off + * 3. Disable L1 scalar cache (0: enable (default), 1: disable) + * 4. Disable L1 vector cache (0: enable (default), 1: disable) + * 5. Disable L2 cache (0: enable (default), 1: disable) + * 6. Num of CUs reserved for display (0 (default), 7: max) + */ +typedef struct _cl_dbg_kernel_exec_mode_amd { + union { + struct { + cl_uint monitorMode : 1; + cl_uint gpuSingleStepMode : 1; + cl_uint disableL1Scalar : 1; + cl_uint disableL1Vector : 1; + cl_uint disableL2Cache : 1; + cl_uint reservedCuNum : 3; + cl_uint reserved : 24; + }; + cl_uint ui32All; + }; +} cl_dbg_kernel_exec_mode_amd; + +/*! \brief GPU cache mask + * + * This structure is used to specify the GPU cache to be flushed/invalidated + */ +typedef struct _cl_dbg_gpu_cache_mask_amd { + union { + struct { + cl_uint sqICache : 1; //! instruction cache + cl_uint sqKCache : 1; //! data cache + cl_uint tcL1 : 1; //! tcL1 cache + cl_uint tcL2 : 1; //! tcL2 cache + cl_uint reserved : 28; + }; + cl_uint ui32All; + }; +} cl_dbg_gpu_cache_mask_amd; + +/*! \brief Dispatch Debug Info + * + * This structure is used to store the scratch and global memory descriptors + */ +typedef struct _cl_dispatch_debug_info_amd { + cl_uint scratchMemoryDescriptor[4]; //! Scratch memory descriptors + cl_uint globalMemoryDescriptor[4]; //! Global memory descriptors +} cl_dispatch_debug_info_amd; + +/*! \brief AQL Packet Info + * + * This structure is used to store AQL packet informatin for kernel dispatch + */ +typedef struct _cl_aql_packet_info_amd { + cl_uint trapReservedVgprIndex; //! VGPR index reserved for trap + //! value is -1 when kernel was not compiled + //! in debug mode. + cl_uint scratchBufferWaveOffset; //! scratch buffer wave offset + //! value is -1 when kernel was not compiled + //! in debug mode or scratch buffer is not enabled + void* pointerToIsaBuffer; //! Pointer to buffer containing ISA + size_t sizeOfIsaBuffer; //! Size of the ISA buffer + + cl_uint numberOfVgprs; //! Number of VGPRs used by the kernel + cl_uint numberOfSgprs; //! Number of SGPRs used by the kernel + size_t sizeOfStaticGroupMemory; //! Static local memory used by the kernel +} cl_aql_packet_info_amd; + +/*! \brief Wave address + * + * This structure specifies the wave for the SQ control command + */ +typedef struct _cl_dbg_wave_addr_amd { + cl_uint shaderEngine : 2; //! Shader engine + cl_uint shaderArray : 1; //! Shader array + cl_uint computeUnit : 4; //! Compute unit + cl_uint simd : 2; //! SIMD id + cl_uint wave : 4; //! Wave id + cl_uint vmid : 4; //! VMID + cl_uint reserved : 15; + +} cl_dbg_wave_addr_amd; + +/*! \brief Pre-dispatch call back function signature + * + * This is the signature of the call back fuction before the kernel + * dispatch. The call back function is to indicate the start of the + * the kernel launch. It is used by the debugger. + */ +typedef void* (*cl_PreDispatchCallBackFunctionAMD)(cl_device_id device, void* ocl_event_handle, + const void* aql_packet, void* acl_binary, + void* user_args); + +/*! \brief Post-dispatch call back function signature + * + * This is the signature of the call back fuction after the kernel + * dispatch. The call back function is to indicate the completion of + * the the kernel launch. It is used by the debugger. + */ +typedef void* (*cl_PostDispatchCallBackFunctionAMD)(cl_device_id device, cl_ulong event, + void* user_args); + +/*! \brief Set up the dispatch call back function pointers + * + * \param device specifies the device to be used + * + * \param preDispatchFunction is the function to be called before dispatching the kernel + * + * \param postDispatchFunction is the function to be called after kernel execution + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackFunctionsAMD( + cl_device_id /* device */, cl_PreDispatchCallBackFunctionAMD /* preDispatchFunction */, + cl_PostDispatchCallBackFunctionAMD /* postDispatchFunction */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Set up the arguments of the dispatch call back function + * + * \param device specifies the device to be used + * + * \param preDispatchArgs is the arguments for the pre-dispatch callback function + * + * \param postDispatchArgs is the arguments for the post-dispatch callback function + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackArgumentsAMD(cl_device_id /* device */, + void* /* preDispatchArgs */, + void* /* postDispatchArgs */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Invalidate all cache on the device. + * + * \param device specifies the device to be used + * + * \param mask is the mask to specify which cache to be flush/invalidate + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgFlushCacheAMD(cl_device_id /* device */, + cl_dbg_gpu_cache_mask_amd /* mask */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Set up an exception policy in the trap handler object + * + * \param device specifies the device to be used + * + * \param policy specifies the exception policy, which includes the exception mask, + * wave action, host action, wave mode. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the policy is not specified (NULL) + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetExceptionPolicyAMD( + cl_device_id /* device */, cl_dbg_exception_policy_amd* /* policy */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Get the exception policy in the trap handler object + * + * \param device specifies the device to be used + * + * \param policy is a pointer to the memory where the policy is returned + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the policy storage is not specified + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetExceptionPolicyAMD( + cl_device_id /* device */, cl_dbg_exception_policy_amd* /* policy */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Set up the kernel execution mode in the trap handler object + * + * \param device specifies the device to be used + * + * \param mode specifies the kernel execution mode, which indicate whether single + * step mode is used, how many CUs are reserved. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the mode is not specified, ie, has a NULL value + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetKernelExecutionModeAMD( + cl_device_id /* device */, cl_dbg_kernel_exec_mode_amd* /* mode */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Get the kernel execution mode in the trap handler object + * + * \param device specifies the device to be used + * + * \param mode is a pointer to the memory where the exectuion mode is returned + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the mode storage is not specified, ie, has a NULL value + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelExecutionModeAMD( + cl_device_id /* device */, cl_dbg_kernel_exec_mode_amd* /* mode */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Create a debug event + * + * \param device specifies the device to be used + * + * \param autoReset is the auto reset flag + * + * \param pDebugEvent returns the debug event to be used for exception notification + * + * \param pEventId is the event ID, which is not used at this moment + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the pDebugEvent value is NULL + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + * - CL_OUT_OF_RESOURCES if fails to create the event + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgCreateEventAMD(cl_device_id /* device */, + bool /* autoReset */, + cl_dbg_event_amd* /* pDebugEvent */, + cl_uint* /* pEventId */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Wait for a debug event to be signaled + * + * \param device specifies the device to be used + * + * \param pDebugEvent is the debug event to be waited for + * + * \param pEventId is the event ID, which is not used at this moment + * + * \param timeOut is the duration for waiting + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the pDebugEvent value is NULL + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + * - CL_EVENT_TIMEOUT_AMD if timeout occurs + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaitEventAMD(cl_device_id /* device */, + cl_dbg_event_amd /* pDebugEvent */, + cl_uint /* pEventId */, + cl_uint /* timeOut */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Destroy a debug event + * + * \param device specifies the device to be used + * + * \param pDebugEvent is the debug event to be waited for + * + * \param pEventId is the event ID, which is not used at this moment + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the pDebugEvent value is NULL + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgDestroyEventAMD(cl_device_id /* device */, + cl_dbg_event_amd* /* pDebugEvent */, + cl_uint* /* pEventId */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Register the debugger on a device + * + * \param context specifies the context for the debugger + * + * \param device specifies the device to be used + * + * \param pMessageStorge specifies the memory for trap message passing between KMD and OCL runtime + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_CONTEXT if the context is not valid + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the pMEssageStorge value is NULL + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + * - CL_OUT_OF_RESOURCES if a host queue cannot be created for the debugger + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgRegisterDebuggerAMD( + cl_context /* context */, cl_device_id /* device */, volatile void* /* pMessageStorage */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Unregister the debugger on a device + * + * \param device specifies the device to be used + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnregisterDebuggerAMD(cl_device_id /* device */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Setup the pointer of the acl_binary to be used by the debugger + * + * \param device specifies the device to be used + * + * \param aclBinary specifies the ACL binary to be used + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the aclBinary is not provided + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetAclBinaryAMD(cl_device_id /* device */, + void* /* aclBinary */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Control the execution of wavefront on the GPU + * + * \param device specifies the device to be used + * + * \param action specifies the wave action - halt, resume, kill, debug + * + * \param mode specifies the wave mode + * + * \param trapID specifies the trap ID, which should be 0x7 + * + * \param waveAddress specifies the wave address for the wave control + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the waveMsg is not provided, invalid action or mode value + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaveControlAMD(cl_device_id /* device */, + cl_dbg_waves_action_amd /* action */, + cl_dbg_wave_mode_amd /* mode */, + cl_uint /* trapId */, + cl_dbg_wave_addr_amd /* waveAddress */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Set watch points on memory address ranges to generate exception events + * + * \param device specifies the device to be used + * + * \param numWatchPoints specifies the number of watch points + * + * \param watchMode is the array of watch mode for the watch points + * + * \param watchAddress is the array of watch address for the watch points + * + * \param watchMask is the array of mask for the watch points + * + * \param watchEvent is the array of event for the watch points + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the number of points <= 0, or other parameters is not specified + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgAddressWatchAMD( + cl_device_id /* device */, cl_uint /* numWatchPoints */, + cl_dbg_address_watch_mode_amd* /* watchMode */, void** /* watchAddress */, + cl_ulong* /* watchMask */, cl_dbg_event_amd* /* watchEvent */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Get the packaet information for kernel execution + * + * \param device specifies the device to be used + * + * \param aqlCodeInfo specifies the kernel code and its size + * + * \param packetInfo points to the memory for the packet information to be returned + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetAqlPacketInfoAMD( + cl_device_id /* device */, const void* /* aqlCodeInfo */, + cl_aql_packet_info_amd* /* packetInfo */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Get the dispatch debug information + * + * \param device specifies the device to be used + * + * \param debugInfo points to the memory for the debug information to be returned + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetDispatchDebugInfoAMD( + cl_device_id /* device */, cl_dispatch_debug_info_amd* /* debugInfo */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Map the video memory for the kernel code to allow host access + * + * \param device specifies the device to be used + * + * \param aqlCodeAddress is the memory points to the returned host memory address for the kernel + * code + * + * \param aqlCodeSize returns the size of the kernel code + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapKernelCodeAMD(cl_device_id /* device */, + void* /* aqlCodeInfo */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Unmap the video memory for the kernel code + * + * \param device specifies the device to be used (no needed, just to be consistent) + * + * \param aqlCodeAddress is the memory points to the mapped memory address for the kernel code + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapKernelCodeAMD(cl_device_id /* device */, + cl_ulong* /* aqlCodeAddress */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Map the shader scratch ring's video memory to allow CPU access + * + * \param device specifies the device to be used + * + * \param scratchRingAddr is the memory points to the returned host memory address for scratch + * ring + * + * \param scratchRingSize returns the size of the scratch ring + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapScratchRingAMD(cl_device_id /* device */, + cl_ulong* /* scratchRingAddr */, + cl_uint* /* scratchRingSize */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Unmap the shader scratch ring's video memory + * + * \param device specifies the device to be used (no needed, just to be consistent) + * + * \param scratchRingAddr is the memory points to the mapped memory address for scratch ring + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapScratchRingAMD(cl_device_id /* device */, + cl_ulong* /* scratchRingAddr */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Get the memory object associated with the kernel parameter + * + * \param device specifies the device to be used + * + * \param paramIdx is the index of of the kernel argument + * + * \param paramMem is pointer of the memory associated with the kernel argument to be returned + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the paramIdx is less than zero, or the paramMem has NULL value + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + * - CL_INVALID_KERNEL_ARGS if it fails to get the memory object for the kernel argument + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelParamMemAMD(cl_device_id /* devicepointer */, + cl_uint /* paramIdx */, + cl_mem* /* paramMem */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Set value of a global memory object + * + * \param device specifies the device to be used + * + * \param memObject is the memory object handle to be assigned the value specified in srcMem. + * + * \param offset is offset of the memory object + * + * \param srcMem points to the memory which contains the values to be assigned to the memory + * + * \param size size (in bytes) of the srcMem + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if memObj or srcPtr has NULL value, size <= 0 or offset < 0 + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetGlobalMemoryAMD(cl_device_id /* device */, + cl_mem /* memObject */, + cl_uint /* offset */, + void* /* srcMem */, + cl_uint /* size */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Install the trap handler of a given type + * + * \param device specifies the device to be used + * + * \param trapType is the type of trap handler + * + * \param trapHandler is the pointer of trap handler (TBA) + * + * \param trapBuffer is the pointer of trap handler buffer (TMA) + * + * \param trapHandlerSize size (in bytes) of the trap handler + * + * \param trapBufferSize size (in bytes) of the trap handler buffer + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if trapHandler is NULL or trapHandlerSize <= 0 + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgInstallTrapAMD(cl_device_id /* device */, + cl_dbg_trap_type_amd /* trapType */, + cl_mem /* trapHandler */, + cl_mem /* trapBuffer */ + ) CL_API_SUFFIX__VERSION_2_0; + + +#ifdef __cplusplus +} /*extern "C"*/ +#endif /*__cplusplus*/ + +#endif /*__CL_DEBUGGER_AMD_H*/ diff --git a/projects/hip/amdocl/cl_icd.cpp b/projects/hip/amdocl/cl_icd.cpp new file mode 100644 index 0000000000..ec2cb48d7d --- /dev/null +++ b/projects/hip/amdocl/cl_icd.cpp @@ -0,0 +1,293 @@ +/* Copyright (c) 2008-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "cl_common.hpp" +#include "vdi_common.hpp" +#ifdef _WIN32 +#include +#include "cl_d3d9_amd.hpp" +#include "cl_d3d10_amd.hpp" +#include "cl_d3d11_amd.hpp" +#endif //_WIN32 + +#include + +#include + +amd::PlatformIDS amd::PlatformID::Platform = //{ NULL }; + {amd::ICDDispatchedObject::icdVendorDispatch_}; + +static cl_int CL_API_CALL icdGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, + size_t param_value_size, void* param_value, + size_t* param_value_size_ret) { + return clGetPlatformInfo(NULL, param_name, param_value_size, param_value, param_value_size_ret); +} + +static cl_int CL_API_CALL icdGetDeviceIDs(cl_platform_id platform, cl_device_type device_type, + cl_uint num_entries, cl_device_id* devices, + cl_uint* num_devices) { + return clGetDeviceIDs(NULL, device_type, num_entries, devices, num_devices); +} + +static cl_int CL_API_CALL icdGetDeviceInfo(cl_device_id device, cl_device_info param_name, + size_t param_value_size, void* param_value, + size_t* param_value_size_ret) { + if (param_name == CL_DEVICE_PLATFORM) { + // Return the ICD platform instead of the default NULL platform. + cl_platform_id platform = reinterpret_cast(&amd::PlatformID::Platform); + return amd::clGetInfo(platform, param_value_size, param_value, param_value_size_ret); + } + + return clGetDeviceInfo(device, param_name, param_value_size, param_value, param_value_size_ret); +} + +cl_icd_dispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = { + {NULL /* should not get called */, icdGetPlatformInfo, icdGetDeviceIDs, icdGetDeviceInfo, + clCreateContext, clCreateContextFromType, clRetainContext, clReleaseContext, clGetContextInfo, + clCreateCommandQueue, clRetainCommandQueue, clReleaseCommandQueue, clGetCommandQueueInfo, + clSetCommandQueueProperty, clCreateBuffer, clCreateImage2D, clCreateImage3D, clRetainMemObject, + clReleaseMemObject, clGetSupportedImageFormats, clGetMemObjectInfo, clGetImageInfo, + clCreateSampler, clRetainSampler, clReleaseSampler, clGetSamplerInfo, + clCreateProgramWithSource, clCreateProgramWithBinary, clRetainProgram, clReleaseProgram, + clBuildProgram, clUnloadCompiler, clGetProgramInfo, clGetProgramBuildInfo, clCreateKernel, + clCreateKernelsInProgram, clRetainKernel, clReleaseKernel, clSetKernelArg, clGetKernelInfo, + clGetKernelWorkGroupInfo, clWaitForEvents, clGetEventInfo, clRetainEvent, clReleaseEvent, + clGetEventProfilingInfo, clFlush, clFinish, clEnqueueReadBuffer, clEnqueueWriteBuffer, + clEnqueueCopyBuffer, clEnqueueReadImage, clEnqueueWriteImage, clEnqueueCopyImage, + clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapBuffer, clEnqueueMapImage, + clEnqueueUnmapMemObject, clEnqueueNDRangeKernel, clEnqueueTask, clEnqueueNativeKernel, + clEnqueueMarker, clEnqueueWaitForEvents, clEnqueueBarrier, clGetExtensionFunctionAddress, + clCreateFromGLBuffer, clCreateFromGLTexture2D, clCreateFromGLTexture3D, + clCreateFromGLRenderbuffer, clGetGLObjectInfo, clGetGLTextureInfo, clEnqueueAcquireGLObjects, + clEnqueueReleaseGLObjects, clGetGLContextInfoKHR, + WINDOWS_SWITCH(clGetDeviceIDsFromD3D10KHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D10BufferKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D10Texture2DKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D10Texture3DKHR, NULL), + WINDOWS_SWITCH(clEnqueueAcquireD3D10ObjectsKHR, NULL), + WINDOWS_SWITCH(clEnqueueReleaseD3D10ObjectsKHR, NULL), clSetEventCallback, clCreateSubBuffer, + clSetMemObjectDestructorCallback, clCreateUserEvent, clSetUserEventStatus, + clEnqueueReadBufferRect, clEnqueueWriteBufferRect, clEnqueueCopyBufferRect, + NULL, NULL, NULL, clCreateEventFromGLsyncKHR, + + /* OpenCL 1.2*/ + clCreateSubDevices, clRetainDevice, clReleaseDevice, clCreateImage, + clCreateProgramWithBuiltInKernels, clCompileProgram, clLinkProgram, clUnloadPlatformCompiler, + clGetKernelArgInfo, clEnqueueFillBuffer, clEnqueueFillImage, clEnqueueMigrateMemObjects, + clEnqueueMarkerWithWaitList, clEnqueueBarrierWithWaitList, + clGetExtensionFunctionAddressForPlatform, clCreateFromGLTexture, + + WINDOWS_SWITCH(clGetDeviceIDsFromD3D11KHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D11BufferKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D11Texture2DKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D11Texture3DKHR, NULL), + WINDOWS_SWITCH(clCreateFromDX9MediaSurfaceKHR, NULL), + WINDOWS_SWITCH(clEnqueueAcquireD3D11ObjectsKHR, NULL), + WINDOWS_SWITCH(clEnqueueReleaseD3D11ObjectsKHR, NULL), + + WINDOWS_SWITCH(clGetDeviceIDsFromDX9MediaAdapterKHR, + NULL), // KHRpfn_clGetDeviceIDsFromDX9MediaAdapterKHR + // clGetDeviceIDsFromDX9MediaAdapterKHR; + WINDOWS_SWITCH( + clEnqueueAcquireDX9MediaSurfacesKHR, + NULL), // KHRpfn_clEnqueueAcquireDX9MediaSurfacesKHR clEnqueueAcquireDX9MediaSurfacesKHR; + WINDOWS_SWITCH( + clEnqueueReleaseDX9MediaSurfacesKHR, + NULL), // KHRpfn_clEnqueueReleaseDX9MediaSurfacesKHR clEnqueueReleaseDX9MediaSurfacesKHR; + + NULL, + NULL, NULL, NULL, + + clCreateCommandQueueWithProperties, clCreatePipe, clGetPipeInfo, clSVMAlloc, clSVMFree, + clEnqueueSVMFree, clEnqueueSVMMemcpy, clEnqueueSVMMemFill, clEnqueueSVMMap, clEnqueueSVMUnmap, + clCreateSamplerWithProperties, clSetKernelArgSVMPointer, clSetKernelExecInfo, + clGetKernelSubGroupInfo, + clCloneKernel, + clCreateProgramWithIL, + clEnqueueSVMMigrateMem, + clGetDeviceAndHostTimer, + clGetHostTimer, + clGetKernelSubGroupInfo, + clSetDefaultDeviceCommandQueue, + + clSetProgramReleaseCallback, + clSetProgramSpecializationConstant }}; + +#if defined(ATI_OS_WIN) +#include + +#pragma comment(lib, "shlwapi.lib") + +static bool ShouldLoadPlatform() { + // Get the OpenCL ICD registry values + HKEY platformsKey = NULL; + if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\Khronos\\OpenCL\\Vendors", 0, KEY_READ, + &platformsKey) != ERROR_SUCCESS) + return true; + + std::vector registryValues; + DWORD dwIndex = 0; + while (true) { + char cszLibraryName[1024] = {0}; + DWORD dwLibraryNameSize = sizeof(cszLibraryName); + DWORD dwLibraryNameType = 0; + DWORD dwValue = 0; + DWORD dwValueSize = sizeof(dwValue); + + if (RegEnumValueA(platformsKey, dwIndex++, cszLibraryName, &dwLibraryNameSize, NULL, + &dwLibraryNameType, (LPBYTE)&dwValue, &dwValueSize) != ERROR_SUCCESS) + break; + // Require that the value be a DWORD and equal zero + if (dwLibraryNameType != REG_DWORD || dwValue != 0) { + continue; + } + registryValues.push_back(cszLibraryName); + } + RegCloseKey(platformsKey); + + HMODULE hm = NULL; + if (!GetModuleHandleExA( + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCSTR)&ShouldLoadPlatform, &hm)) + return true; + + char cszDllPath[1024] = {0}; + if (!GetModuleFileNameA(hm, cszDllPath, sizeof(cszDllPath))) return true; + + // If we are loaded from the DriverStore, then there should be a registry + // value matching our current module absolute path. + if (std::find(registryValues.begin(), registryValues.end(), cszDllPath) == registryValues.end()) + return true; + + LPSTR cszFileName; + char buffer[1024] = {0}; + if (!GetFullPathNameA(cszDllPath, sizeof(buffer), buffer, &cszFileName)) return true; + + // We found an absolute path in the registry that matched this DLL, now + // check if there is also an entry with the same filename. + if (std::find(registryValues.begin(), registryValues.end(), cszFileName) == registryValues.end()) + return true; + + // Lastly, check if there is a DLL with the same name in the System folder. + char cszSystemPath[1024] = {0}; +#if defined(ATI_BITS_32) + if (!GetSystemWow64DirectoryA(cszSystemPath, sizeof(cszSystemPath))) +#endif // defined(ATI_BITS_32) + if (!GetSystemDirectoryA(cszSystemPath, sizeof(cszSystemPath))) return true; + + std::string systemDllPath; + systemDllPath.append(cszSystemPath).append("\\").append(cszFileName); + if (!PathFileExistsA(systemDllPath.c_str())) { + return true; + } + + // If we get here, then all 3 conditions are true: + // - An entry in the registry with an absolute path matches the current DLL + // - An entry in the registry with a relative path matches the current DLL + // - A DLL with the same name was found in the system directory + // + // We should not load this platform! + + return false; +} + +#else + +#include + +// If there is only one platform, load it. +// If there is more than one platform, only load platforms that have visible devices +// If all platforms have no devices available, only load the PAL platform +static bool ShouldLoadPlatform() { + bool shouldLoad = true; + + if (!amd::Runtime::initialized()) { + amd::Runtime::init(); + } + const int numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, false); + + void *otherPlatform = nullptr; + if (amd::IS_LEGACY) { + otherPlatform = dlopen("libamdocl64.so", RTLD_LAZY); + if (otherPlatform != nullptr) { // Present platform exists + shouldLoad = numDevices > 0; + } + } else { + otherPlatform = dlopen("libamdocl-orca64.so", RTLD_LAZY); + if (otherPlatform != nullptr) { // Legacy platform exists + // gcc4.8 doesn't support casting void* to a function pointer + // Work around this by creating a typedef untill we upgrade the compiler + typedef void*(*clGetFunctionAddress_t)(const char *); + typedef cl_int(*clIcdGetPlatformIDs_t)(cl_uint, cl_platform_id *, cl_uint *); + + clGetFunctionAddress_t legacyGetFunctionAddress = + reinterpret_cast(dlsym(otherPlatform, "clGetExtensionFunctionAddress")); + clIcdGetPlatformIDs_t legacyGetPlatformIDs = + reinterpret_cast(legacyGetFunctionAddress("clIcdGetPlatformIDsKHR")); + + cl_uint numLegacyPlatforms = 0; + legacyGetPlatformIDs(0, nullptr, &numLegacyPlatforms); + + shouldLoad = (numDevices > 0) || (numLegacyPlatforms == 0); + } + } + + if (otherPlatform != nullptr) { + dlclose(otherPlatform); + } + + return shouldLoad; +} + +#endif // defined(ATI_OS_WIN) + +CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(cl_uint num_entries, + cl_platform_id* platforms, + cl_uint* num_platforms) { + if (((num_entries > 0 || num_platforms == NULL) && platforms == NULL) || + (num_entries == 0 && platforms != NULL)) { + return CL_INVALID_VALUE; + } + + static bool shouldLoad = true; + + static std::once_flag initOnce; + std::call_once(initOnce, [](){ shouldLoad = ShouldLoadPlatform(); }); + + if (!shouldLoad) { + *not_null(num_platforms) = 0; + return CL_SUCCESS; + } + + if (!amd::Runtime::initialized()) { + amd::Runtime::init(); + } + + if (num_platforms != NULL && platforms == NULL) { + *num_platforms = 1; + return CL_SUCCESS; + } + + assert(platforms != NULL && "check the code above"); + *platforms = reinterpret_cast(&amd::PlatformID::Platform); + + *not_null(num_platforms) = 1; + return CL_SUCCESS; +} diff --git a/projects/hip/amdocl/cl_icd_amd.h b/projects/hip/amdocl/cl_icd_amd.h new file mode 100644 index 0000000000..69408e75ac --- /dev/null +++ b/projects/hip/amdocl/cl_icd_amd.h @@ -0,0 +1,739 @@ +/******************************************************************************* + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_ICD_H +#define __OPENCL_CL_ICD_H + +#include +#include + +#define cl_khr_icd 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +typedef cl_int(CL_API_CALL* clGetPlatformIDs_fn)( + cl_uint /* num_entries */, cl_platform_id* /* platforms */, + cl_uint* /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetPlatformInfo_fn)( + cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetDeviceIDs_fn)( + cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, + cl_device_id* /* devices */, cl_uint* /* num_devices */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetDeviceInfo_fn)( + cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_context(CL_API_CALL* clCreateContext_fn)( + const cl_context_properties* /* properties */, cl_uint /* num_devices */, + const cl_device_id* /* devices */, + void(CL_CALLBACK* /* pfn_notify */)(const char*, const void*, size_t, void*), + void* /* user_data */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_context(CL_API_CALL* clCreateContextFromType_fn)( + const cl_context_properties* /* properties */, cl_device_type /* device_type */, + void(CL_CALLBACK* /* pfn_notify*/)(const char*, const void*, size_t, void*), + void* /* user_data */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainContext_fn)(cl_context /* context */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseContext_fn)(cl_context /* context */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetContextInfo_fn)( + cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_command_queue(CL_API_CALL* clCreateCommandQueue_fn)( + cl_context /* context */, cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainCommandQueue_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseCommandQueue_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetCommandQueueInfo_fn)( + cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clSetCommandQueueProperty_fn)( + cl_command_queue /* command_queue */, cl_command_queue_properties /* properties */, + cl_bool /* enable */, + cl_command_queue_properties* /* old_properties */) /*CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED*/; + +typedef cl_mem(CL_API_CALL* clCreateBuffer_fn)( + cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void* /* host_ptr */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateSubBuffer_fn)( + cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, + const void* /* buffer_create_info */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_mem(CL_API_CALL* clCreateImage2D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format* /* image_format */, + size_t /* image_width */, size_t /* image_height */, size_t /* image_row_pitch */, + void* /* host_ptr */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateImage3D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format* /* image_format */, + size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */, + size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void* /* host_ptr */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainMemObject_fn)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseMemObject_fn)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetSupportedImageFormats_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, cl_image_format* /* image_formats */, + cl_uint* /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetMemObjectInfo_fn)( + cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetImageInfo_fn)( + cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clSetMemObjectDestructorCallback_fn)( + cl_mem /* memobj */, + void(CL_CALLBACK* /*pfn_notify*/)(cl_mem /* memobj */, void* /*user_data*/), + void* /*user_data */) CL_API_SUFFIX__VERSION_1_1; + +/* Sampler APIs */ +typedef cl_sampler(CL_API_CALL* clCreateSampler_fn)( + cl_context /* context */, cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainSampler_fn)(cl_sampler /* sampler */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseSampler_fn)(cl_sampler /* sampler */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetSamplerInfo_fn)( + cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +typedef cl_program(CL_API_CALL* clCreateProgramWithSource_fn)( + cl_context /* context */, cl_uint /* count */, const char** /* strings */, + const size_t* /* lengths */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithIL(cl_context /* context */, + const void * /* strings */, size_t /* lengths */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_2_0; + +typedef cl_program(CL_API_CALL* clCreateProgramWithILKHR_fn)( + cl_context /* context */, const void* /* il */, size_t /* length */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program(CL_API_CALL* clCreateProgramWithBinary_fn)( + cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const size_t* /* lengths */, const unsigned char** /* binaries */, cl_int* /* binary_status */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainProgram_fn)(cl_program /* program */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseProgram_fn)(cl_program /* program */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clBuildProgram_fn)( + cl_program /* program */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* options */, + void(CL_CALLBACK* /* pfn_notify */)(cl_program /* program */, void* /* user_data */), + void* /* user_data */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clUnloadCompiler_fn)(void) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetProgramInfo_fn)( + cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetProgramBuildInfo_fn)( + cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +typedef cl_kernel(CL_API_CALL* clCreateKernel_fn)( + cl_program /* program */, const char* /* kernel_name */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clCreateKernelsInProgram_fn)( + cl_program /* program */, cl_uint /* num_kernels */, cl_kernel* /* kernels */, + cl_uint* /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainKernel_fn)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseKernel_fn)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clSetKernelArg_fn)(cl_kernel /* kernel */, cl_uint /* arg_index */, + size_t /* arg_size */, const void* /* arg_value */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetKernelInfo_fn)( + cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetKernelWorkGroupInfo_fn)( + cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Event Object APIs */ +typedef cl_int(CL_API_CALL* clWaitForEvents_fn)( + cl_uint /* num_events */, const cl_event* /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetEventInfo_fn)( + cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_event(CL_API_CALL* clCreateUserEvent_fn)( + cl_context /* context */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clRetainEvent_fn)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseEvent_fn)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clSetUserEventStatus_fn)( + cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clSetEventCallback_fn)( + cl_event /* event */, cl_int /* command_exec_callback_type */, + void(CL_CALLBACK* /* pfn_notify */)(cl_event, cl_int, void*), + void* /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +/* Profiling APIs */ +typedef cl_int(CL_API_CALL* clGetEventProfilingInfo_fn)( + cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +typedef cl_int(CL_API_CALL* clFlush_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clFinish_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +typedef cl_int(CL_API_CALL* clEnqueueReadBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, + size_t /* offset */, size_t /* cb */, void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueReadBufferRect_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, + const size_t* /* buffer_offset */, const size_t* /* host_offset */, const size_t* /* region */, + size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueWriteBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, + size_t /* offset */, size_t /* cb */, const void* /* ptr */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueWriteBufferRect_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, + const size_t* /* buffer_offset */, const size_t* /* host_offset */, const size_t* /* region */, + size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, const void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueCopyBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, + size_t /* src_offset */, size_t /* dst_offset */, size_t /* cb */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyBufferRect_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, + const size_t* /* src_origin */, const size_t* /* dst_origin */, const size_t* /* region */, + size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */, + size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueReadImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */, + const size_t* /* origin[3] */, const size_t* /* region[3] */, size_t /* row_pitch */, + size_t /* slice_pitch */, void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueWriteImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */, + const size_t* /* origin[3] */, const size_t* /* region[3] */, size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, const void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */, + const size_t* /* src_origin[3] */, const size_t* /* dst_origin[3] */, + const size_t* /* region[3] */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyImageToBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */, + const size_t* /* src_origin[3] */, const size_t* /* region[3] */, size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyBufferToImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */, + size_t /* src_offset */, const size_t* /* dst_origin[3] */, const size_t* /* region[3] */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef void*(CL_API_CALL* clEnqueueMapBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, size_t /* offset */, size_t /* cb */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */, cl_int* /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0; + +typedef void*(CL_API_CALL* clEnqueueMapImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, const size_t* /* origin[3] */, const size_t* /* region[3] */, + size_t* /* image_row_pitch */, size_t* /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */, cl_int* /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueUnmapMemObject_fn)( + cl_command_queue /* command_queue */, cl_mem /* memobj */, void* /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueNDRangeKernel_fn)( + cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */, + const size_t* /* global_work_offset */, const size_t* /* global_work_size */, + const size_t* /* local_work_size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueTask_fn)(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueNativeKernel_fn)( + cl_command_queue /* command_queue */, void(CL_CALLBACK* user_func)(void*), void* /* args */, + size_t /* cb_args */, cl_uint /* num_mem_objects */, const cl_mem* /* mem_list */, + const void** /* args_mem_loc */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueMarker_fn)(cl_command_queue /* command_queue */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueWaitForEvents_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_events */, + const cl_event* /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueBarrier_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +typedef void*(CL_API_CALL* clGetExtensionFunctionAddress_fn)(const char* /* func_name */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLBuffer_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* bufobj */, + int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLTexture2D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, + cl_GLint /* miplevel */, cl_GLuint /* texture */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLTexture3D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, + cl_GLint /* miplevel */, cl_GLuint /* texture */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLRenderbuffer_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* renderbuffer */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetGLObjectInfo_fn)( + cl_mem /* memobj */, cl_gl_object_type* /* gl_object_type */, + cl_GLuint* /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetGLTextureInfo_fn)( + cl_mem /* memobj */, cl_gl_texture_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_event(CL_API_CALL* clCreateEventFromGLsyncKHR_fn)( + cl_context /* context */, cl_GLsync /* cl_GLsync */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueAcquireGLObjects_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_objects */, + const cl_mem* /* mem_objects */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueReleaseGLObjects_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_objects */, + const cl_mem* /* mem_objects */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clCreateSubDevices_fn)( + cl_device_id /* in_device */, const cl_device_partition_property* /* properties */, + cl_uint /* num_entries */, cl_device_id* /* out_devices */, + cl_uint* /* num_devices */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clRetainDevice_fn)(cl_device_id /* device */) + CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clReleaseDevice_fn)(cl_device_id /* device */) + CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL* clCreateImage_fn)(cl_context /* context */, cl_mem_flags /* flags */, + const cl_image_format* /* image_format*/, + const cl_image_desc* /* image_desc*/, + void* /* host_ptr */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program(CL_API_CALL* clCreateProgramWithBuiltInKernels_fn)( + cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* kernel_names */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clCompileProgram_fn)( + cl_program /* program */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* options */, cl_uint /* num_input_headers */, + const cl_program* /* input_headers */, const char** /* header_include_names */, + void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), + void* /* user_data */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program(CL_API_CALL* clLinkProgram_fn)( + cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* options */, cl_uint /* num_input_programs */, + const cl_program* /* input_programs */, + void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), void* /* user_data */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clUnloadPlatformCompiler_fn)(cl_platform_id /* platform */) + CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clGetKernelArgInfo_fn)( + cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueFillBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, const void* /* pattern */, + size_t /* pattern_size */, size_t /* offset */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueFillImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, const void* /* fill_color */, + const size_t* /* origin */, const size_t* /* region */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueMigrateMemObjects_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */, + const cl_mem* /* mem_objects */, cl_mem_migration_flags /* flags */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueMarkerWithWaitList_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueBarrierWithWaitList_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef void*(CL_API_CALL* clGetExtensionFunctionAddressForPlatform_fn)( + cl_platform_id /* platform */, const char* /* funcname */)CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL* clCreateFromGLTexture_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* texture_target */, + cl_GLint /* miplevel */, cl_GLuint /* texture */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_command_queue(CL_API_CALL* clCreateCommandQueueWithProperties_fn)( + cl_context /* context */, cl_device_id /* device */, + const cl_queue_properties* /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_sampler(CL_API_CALL* clCreateSamplerWithProperties_fn)( + cl_context /* context */, const cl_sampler_properties* /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef void*(CL_API_CALL* clSVMAlloc_fn)(cl_context /* context */, cl_svm_mem_flags /* flags */, + size_t /* size */, + cl_uint /* alignment */)CL_API_SUFFIX__VERSION_2_0; + +typedef void(CL_API_CALL* clSVMFree_fn)(cl_context /* context */, + void* /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clSetKernelArgSVMPointer_fn)( + cl_kernel /* kernel */, cl_uint /* arg_index */, + const void* /* arg_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clSetKernelExecInfo_fn)( + cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */, + const void* /* param_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMFree_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, + void* [] /* svm_pointers */, + void(CL_CALLBACK* /* pfn_free_func */)(cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void* [] /* svm_pointers */, void* /* user_data */), + void* /* user_data */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMMemcpy_fn)( + cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void* /* dst_ptr */, + const void* /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMMemFill_fn)( + cl_command_queue /* command_queue */, void* /* svm_ptr */, const void* /* pattern */, + size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMMap_fn)( + cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* flags */, + void* /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMUnmap_fn)(cl_command_queue /* command_queue */, + void* /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_mem(CL_API_CALL* clCreatePipe_fn)(cl_context /* context */, cl_mem_flags /* flags */, + cl_uint /* pipe_packet_size */, + cl_uint /* pipe_max_packets */, + const cl_pipe_properties* /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clGetPipeInfo_fn)( + cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clGetKernelSubGroupInfoKHR_fn)( + cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_sub_group_info /* param_name */, + size_t /* input_value_size */, const void* /* input_value */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + + +typedef cl_int(CL_API_CALL* clSetDefaultDeviceCommandQueue_fn)( + cl_context /* context */, cl_device_id /* device */, + cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_kernel(CL_API_CALL* clCloneKernel_fn)( + cl_kernel /* source_kernel */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int (CL_API_CALL* clEnqueueSVMMigrateMem_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, + const void ** /* svm_pointers */, const size_t * /* sizes */, + cl_mem_migration_flags /* flags */, cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int (CL_API_CALL* clGetDeviceAndHostTimer_fn)( + cl_device_id /* device */, cl_ulong * /* device_timestamp */, + cl_ulong * /* host_timestamp */) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int (CL_API_CALL* clGetHostTimer_fn)( + cl_device_id /* device */, cl_ulong * /* host_timestamp */) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int (CL_API_CALL* clSetProgramSpecializationConstant_fn)( + cl_program /* program */, cl_uint /* spec_id */, size_t /* spec_size */, + const void* /* spec_value */) CL_API_SUFFIX__VERSION_2_2; + +typedef cl_int (CL_API_CALL* clSetProgramReleaseCallback_fn)( + cl_program /* program */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program program, void * user_data), + void * /* user_data */) CL_API_SUFFIX__VERSION_2_2; + +typedef struct _cl_icd_dispatch_table { + /* OpenCL 1.0 */ + clGetPlatformIDs_fn GetPlatformIDs; + clGetPlatformInfo_fn GetPlatformInfo; + clGetDeviceIDs_fn GetDeviceIDs; + clGetDeviceInfo_fn GetDeviceInfo; + clCreateContext_fn CreateContext; + clCreateContextFromType_fn CreateContextFromType; + clRetainContext_fn RetainContext; + clReleaseContext_fn ReleaseContext; + clGetContextInfo_fn GetContextInfo; + clCreateCommandQueue_fn CreateCommandQueue; + clRetainCommandQueue_fn RetainCommandQueue; + clReleaseCommandQueue_fn ReleaseCommandQueue; + clGetCommandQueueInfo_fn GetCommandQueueInfo; + clSetCommandQueueProperty_fn SetCommandQueueProperty; + clCreateBuffer_fn CreateBuffer; + clCreateImage2D_fn CreateImage2D; + clCreateImage3D_fn CreateImage3D; + clRetainMemObject_fn RetainMemObject; + clReleaseMemObject_fn ReleaseMemObject; + clGetSupportedImageFormats_fn GetSupportedImageFormats; + clGetMemObjectInfo_fn GetMemObjectInfo; + clGetImageInfo_fn GetImageInfo; + clCreateSampler_fn CreateSampler; + clRetainSampler_fn RetainSampler; + clReleaseSampler_fn ReleaseSampler; + clGetSamplerInfo_fn GetSamplerInfo; + clCreateProgramWithSource_fn CreateProgramWithSource; + clCreateProgramWithBinary_fn CreateProgramWithBinary; + clRetainProgram_fn RetainProgram; + clReleaseProgram_fn ReleaseProgram; + clBuildProgram_fn BuildProgram; + clUnloadCompiler_fn UnloadCompiler; + clGetProgramInfo_fn GetProgramInfo; + clGetProgramBuildInfo_fn GetProgramBuildInfo; + clCreateKernel_fn CreateKernel; + clCreateKernelsInProgram_fn CreateKernelsInProgram; + clRetainKernel_fn RetainKernel; + clReleaseKernel_fn ReleaseKernel; + clSetKernelArg_fn SetKernelArg; + clGetKernelInfo_fn GetKernelInfo; + clGetKernelWorkGroupInfo_fn GetKernelWorkGroupInfo; + clWaitForEvents_fn WaitForEvents; + clGetEventInfo_fn GetEventInfo; + clRetainEvent_fn RetainEvent; + clReleaseEvent_fn ReleaseEvent; + clGetEventProfilingInfo_fn GetEventProfilingInfo; + clFlush_fn Flush; + clFinish_fn Finish; + clEnqueueReadBuffer_fn EnqueueReadBuffer; + clEnqueueWriteBuffer_fn EnqueueWriteBuffer; + clEnqueueCopyBuffer_fn EnqueueCopyBuffer; + clEnqueueReadImage_fn EnqueueReadImage; + clEnqueueWriteImage_fn EnqueueWriteImage; + clEnqueueCopyImage_fn EnqueueCopyImage; + clEnqueueCopyImageToBuffer_fn EnqueueCopyImageToBuffer; + clEnqueueCopyBufferToImage_fn EnqueueCopyBufferToImage; + clEnqueueMapBuffer_fn EnqueueMapBuffer; + clEnqueueMapImage_fn EnqueueMapImage; + clEnqueueUnmapMemObject_fn EnqueueUnmapMemObject; + clEnqueueNDRangeKernel_fn EnqueueNDRangeKernel; + clEnqueueTask_fn EnqueueTask; + clEnqueueNativeKernel_fn EnqueueNativeKernel; + clEnqueueMarker_fn EnqueueMarker; + clEnqueueWaitForEvents_fn EnqueueWaitForEvents; + clEnqueueBarrier_fn EnqueueBarrier; + clGetExtensionFunctionAddress_fn GetExtensionFunctionAddress; + clCreateFromGLBuffer_fn CreateFromGLBuffer; + clCreateFromGLTexture2D_fn CreateFromGLTexture2D; + clCreateFromGLTexture3D_fn CreateFromGLTexture3D; + clCreateFromGLRenderbuffer_fn CreateFromGLRenderbuffer; + clGetGLObjectInfo_fn GetGLObjectInfo; + clGetGLTextureInfo_fn GetGLTextureInfo; + clEnqueueAcquireGLObjects_fn EnqueueAcquireGLObjects; + clEnqueueReleaseGLObjects_fn EnqueueReleaseGLObjects; + clGetGLContextInfoKHR_fn GetGLContextInfoKHR; + void* _reservedForD3D10KHR[6]; + + /* OpenCL 1.1 */ + clSetEventCallback_fn SetEventCallback; + clCreateSubBuffer_fn CreateSubBuffer; + clSetMemObjectDestructorCallback_fn SetMemObjectDestructorCallback; + clCreateUserEvent_fn CreateUserEvent; + clSetUserEventStatus_fn SetUserEventStatus; + clEnqueueReadBufferRect_fn EnqueueReadBufferRect; + clEnqueueWriteBufferRect_fn EnqueueWriteBufferRect; + clEnqueueCopyBufferRect_fn EnqueueCopyBufferRect; + + void* _reservedForDeviceFissionEXT[3]; + clCreateEventFromGLsyncKHR_fn CreateEventFromGLsyncKHR; + + /* OpenCL 1.2 */ + clCreateSubDevices_fn CreateSubDevices; + clRetainDevice_fn RetainDevice; + clReleaseDevice_fn ReleaseDevice; + clCreateImage_fn CreateImage; + clCreateProgramWithBuiltInKernels_fn CreateProgramWithBuiltInKernels; + clCompileProgram_fn CompileProgram; + clLinkProgram_fn LinkProgram; + clUnloadPlatformCompiler_fn UnloadPlatformCompiler; + clGetKernelArgInfo_fn GetKernelArgInfo; + clEnqueueFillBuffer_fn EnqueueFillBuffer; + clEnqueueFillImage_fn EnqueueFillImage; + clEnqueueMigrateMemObjects_fn EnqueueMigrateMemObjects; + clEnqueueMarkerWithWaitList_fn EnqueueMarkerWithWaitList; + clEnqueueBarrierWithWaitList_fn EnqueueBarrierWithWaitList; + clGetExtensionFunctionAddressForPlatform_fn GetExtensionFunctionAddressForPlatform; + clCreateFromGLTexture_fn CreateFromGLTexture; + + /* cl_khr_d3d11_sharing, cl_khr_dx9_media_sharing */ + void* _reservedForD3DExtensions[10]; + + /* cl_khr_egl_image, cl_khr_egl_event */ + void* _reservedForEGLExtensions[4]; + + /* OpenCL 2.0 */ + clCreateCommandQueueWithProperties_fn CreateCommandQueueWithProperties; + clCreatePipe_fn CreatePipe; + clGetPipeInfo_fn GetPipeInfo; + clSVMAlloc_fn SVMAlloc; + clSVMFree_fn SVMFree; + clEnqueueSVMFree_fn EnqueueSVMFree; + clEnqueueSVMMemcpy_fn EnqueueSVMMemcpy; + clEnqueueSVMMemFill_fn EnqueueSVMMemFill; + clEnqueueSVMMap_fn EnqueueSVMMap; + clEnqueueSVMUnmap_fn EnqueueSVMUnmap; + clCreateSamplerWithProperties_fn CreateSamplerWithProperties; + clSetKernelArgSVMPointer_fn SetKernelArgSVMPointer; + clSetKernelExecInfo_fn SetKernelExecInfo; + /* cl_khr_sub_groups */ + clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfoKHR; + + /* OpenCL 2.1 */ + clCloneKernel_fn CloneKernel; + clCreateProgramWithILKHR_fn CreateProgramWithILKHR; + clEnqueueSVMMigrateMem_fn EnqueueSVMMigrateMem; + clGetDeviceAndHostTimer_fn GetDeviceAndHostTimer; + clGetHostTimer_fn GetHostTimer; + clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfo; + clSetDefaultDeviceCommandQueue_fn SetDefaultDeviceCommandQueue; + + /* OpenCL 2.2 */ + clSetProgramReleaseCallback_fn SetProgramReleaseCallback; + clSetProgramSpecializationConstant_fn SetProgramSpecializationConstant; + +} cl_icd_dispatch_table; + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __OPENCL_CL_ICD_H */ diff --git a/projects/hip/amdocl/cl_kernel.h b/projects/hip/amdocl/cl_kernel.h new file mode 100644 index 0000000000..e0c960d3ea --- /dev/null +++ b/projects/hip/amdocl/cl_kernel.h @@ -0,0 +1,165 @@ +/* Copyright (c) 2012-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef CL_KERNEL_H_ +#define CL_KERNEL_H_ + +struct clk_builtins_t; + +// This must be a multiple of sizeof(cl_ulong16) +#define __CPU_SCRATCH_SIZE 128 + +#define CLK_PRIVATE_MEMORY_SIZE (16 * 1024) + +struct clk_thread_info_block_t { + // Warning! The size of this struct needs to be a multiple + // of 16 when compiling 64 bit + + struct clk_builtins_t const* builtins; + void* local_mem_base; + void* local_scratch; + const void* table_base; + size_t pad; + + uint work_dim; + size_t global_offset[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + size_t global_size[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + + size_t enqueued_local_size[4]; + size_t local_size[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + size_t local_id[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + size_t group_id[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ +}; + +typedef enum clk_value_type_t { + T_VOID, + T_CHAR, + T_SHORT, + T_INT, + T_LONG, + T_FLOAT, + T_DOUBLE, + T_POINTER, + T_CHAR2, + T_CHAR3, + T_CHAR4, + T_CHAR8, + T_CHAR16, + T_SHORT2, + T_SHORT3, + T_SHORT4, + T_SHORT8, + T_SHORT16, + T_INT2, + T_INT3, + T_INT4, + T_INT8, + T_INT16, + T_LONG2, + T_LONG3, + T_LONG4, + T_LONG8, + T_LONG16, + T_FLOAT2, + T_FLOAT3, + T_FLOAT4, + T_FLOAT8, + T_FLOAT16, + T_DOUBLE2, + T_DOUBLE3, + T_DOUBLE4, + T_DOUBLE8, + T_DOUBLE16, + T_SAMPLER, + T_SEMA, + T_STRUCT, + T_QUEUE, + T_PAD +} clk_value_type_t; + +typedef enum clk_address_space_t { + A_PRIVATE, + A_LOCAL, + A_CONSTANT, + A_GLOBAL, + A_REGION +} clk_address_space_t; + +// kernel arg access qualifier and type qualifier +typedef enum clk_arg_qualifier_t { + Q_NONE = 0, + + // for image type only, access qualifier + Q_READ = 1, + Q_WRITE = 2, + + // for pointer type only + Q_CONST = 4, // pointee + Q_RESTRICT = 8, + Q_VOLATILE = 16, // pointee + Q_PIPE = 32 // pipe + +} clk_arg_qualifier_t; + +#pragma pack(push, 4) +struct clk_parameter_descriptor_t { + clk_value_type_t type; + clk_address_space_t space; + uint qualifier; + const char* name; +}; +#pragma pack(pop) + +//#define CLK_LOCAL_MEM_FENCE (1 << 0) +//#define CLK_GLOBAL_MEM_FENCE (1 << 1) + +struct clk_builtins_t { + /* Synchronization functions */ + void (*barrier_ptr)(cl_mem_fence_flags flags); + + /* AMD Only builtins: FIXME_lmoriche (extension) */ + void* reserved; + int (*printf_ptr)(const char* format, ...); +}; + +enum clk_natures_t { KN_HAS_BARRIER = 1 << 0, KN_WG_LEVEL = 1 << 1 }; + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4200) +#endif + +#if !defined(__OPENCL_VERSION__) || __OPENCL_VERSION__ >= 200 + +typedef struct clk_pipe_t { + size_t read_idx; + size_t write_idx; + size_t end_idx; + char padding[128 - 3 * sizeof(size_t)]; + char packets[]; +} clk_pipe_t; + +#endif + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#endif /*CL_KERNEL_H_*/ diff --git a/projects/hip/amdocl/cl_profile_amd.h b/projects/hip/amdocl/cl_profile_amd.h new file mode 100644 index 0000000000..7adca946e0 --- /dev/null +++ b/projects/hip/amdocl/cl_profile_amd.h @@ -0,0 +1,189 @@ +/* Copyright (c) 2009-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef __CL_PROFILE_AMD_H +#define __CL_PROFILE_AMD_H + +#include "CL/cl_platform.h" + +#ifdef __cplusplus +extern "C" { +#endif /*__cplusplus*/ + +typedef struct _cl_perfcounter_amd* cl_perfcounter_amd; +typedef cl_ulong cl_perfcounter_property; +typedef cl_uint cl_perfcounter_info; + +/* cl_perfcounter_info */ +enum PerfcounterInfo { + CL_PERFCOUNTER_NONE = 0x0, + CL_PERFCOUNTER_REFERENCE_COUNT = 0x1, + CL_PERFCOUNTER_DATA = 0x2, + CL_PERFCOUNTER_GPU_BLOCK_INDEX = 0x3, + CL_PERFCOUNTER_GPU_COUNTER_INDEX = 0x4, + CL_PERFCOUNTER_GPU_EVENT_INDEX = 0x5, + CL_PERFCOUNTER_LAST +}; + +/********************************* +* Set device clock mode data +*********************************/ +enum cl_DeviceClockMode_AMD { + CL_DEVICE_CLOCK_MODE_DEFAULT_AMD = 0x0, /*Device clocks and other power settings are restored to default*/ + CL_DEVICE_CLOCK_MODE_QUERY_AMD = 0x1, /*Queries the current device clock ratios. Leaves the clock mode of the device unchanged*/ + CL_DEVICE_CLOCK_MODE_PROFILING_AMD = 0x2, /*Scale down from peak ratio*/ + CL_DEVICE_CLOCK_MODE_MINIMUMMEMORY_AMD = 0x3, /* Memory clock is set to the lowest available level*/ + CL_DEVICE_CLOCK_MODE_MINIMUMENGINE_AMD = 0x4, /*Engine clock is set to the lowest available level*/ + CL_DEVICE_CLOCK_MODE_PEAK_AMD = 0x5, /*Clocks set to maximum when possible. Fan set to maximum.*/ + CL_DEVICE_CLOCK_MODE_QUERYPROFILING_AMD = 0x6, /*Queries the profiling device clock ratios. Leaves the clock mode of the device unchanged*/ + CL_DEVICE_CLOCK_MODE_QUERYPEAK_AMD = 0x7, /*Queries the peak device clock ratios.Leaves the clock mode of the device unchanged*/ + CL_DEVICE_CLOCK_MODE_COUNT_AMD = 0x8, /*Maxmium count of device clock mode*/ +}; + +typedef struct _cl_set_device_clock_mode_input_amd +{ + /* specify the clock mode for AMD GPU device*/ + cl_DeviceClockMode_AMD clock_mode; +} cl_set_device_clock_mode_input_amd; + +typedef struct _cl_set_device_clock_mode_output_amd +{ + /*Ratio of current mem clock to peak clock as obtained from DeviceProperties::maxGpuClock*/ + cl_float memory_clock_ratio_to_peak; + /*Ratio of current gpu core clock to peak clock as obtained from DeviceProperties::maxGpuClock*/ + cl_float engine_clock_ratio_to_peak; +} cl_set_device_clock_mode_output_amd; + +/*! \brief Creates a new HW performance counter + * for the specified OpenCL context. + * + * \param device must be a valid OpenCL device. + * + * \param properties the list of properties of the hardware counter + * + * \param errcode_ret A non zero value if OpenCL failed to create PerfCounter + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_CONTEXT if the specified context is invalid. + * - CL_OUT_OF_RESOURCES if we couldn't create the object + * + * \return the created perfcounter object + */ +extern CL_API_ENTRY cl_perfcounter_amd CL_API_CALL clCreatePerfCounterAMD( + cl_device_id /* device */, cl_perfcounter_property* /* properties */, cl_int* /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Destroy a performance counter object. + * + * \param perf_counter the perfcounter object for release + * + * \return A non zero value if OpenCL failed to release PerfCounter + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_OPERATION if we failed to release the object + */ +extern CL_API_ENTRY cl_int CL_API_CALL clReleasePerfCounterAMD(cl_perfcounter_amd /* perf_counter */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Increments the perfcounter object reference count. + * + * \param perf_counter the perfcounter object for retain + * + * \return A non zero value if OpenCL failed to retain PerfCounter + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_OPERATION if we failed to release the object + */ +extern CL_API_ENTRY cl_int CL_API_CALL clRetainPerfCounterAMD(cl_perfcounter_amd /* perf_counter */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Enqueues the begin command for the specified counters. + * + * \param command_queue must be a valid OpenCL command queue. + * + * \param num_perf_counters the number of perfcounter objects in the array. + * + * \param perf_counters specifies an array of perfcounter objects. + * + * \return A non zero value if OpenCL failed to release PerfCounter + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_OPERATION if we failed to enqueue the begin operation + */ +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBeginPerfCounterAMD( + cl_command_queue /* command_queue */, cl_uint /* num_perf_counters */, + cl_perfcounter_amd* /* perf_counters */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Enqueues the end command for the specified counters. + * + * \param command_queue must be a valid OpenCL command queue. + * + * \param num_perf_counters the number of perfcounter objects in the array. + * + * \param perf_counters specifies an array of perfcounter objects. + * + * \param event the event object associated with the end operation. + * + * \return A non zero value if OpenCL failed to release PerfCounter + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_OPERATION if we failed to enqueue the end operation + */ +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueEndPerfCounterAMD( + cl_command_queue /* command_queue */, cl_uint /* num_perf_counters */, + cl_perfcounter_amd* /* perf_counters */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Retrieves the results from the counter objects. + * + * \param perf_counter specifies a perfcounter objects for query. + * + * \param param_name specifies the information to query. + * + * \param param_value is a pointer to memory where the appropriate result + * being queried is returned. If \a param_value is NULL, it is ignored. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type. + * + * \param param_value_size_ret returns the actual size in bytes of data copied + * to \a param_value. If \a param_value_size_ret is NULL, it is ignored. + * + * \param values must be a valid pointer to an array of 64-bit values + * and the array size must be equal to num_perf_counters. + * + * \return + * - CL_SUCCESS if the function is executed successfully. + * - CL_PROFILING_INFO_NOT_AVAILABLE if event isn't finished. + * - CL_INVALID_OPERATION if we failed to get the data + */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetPerfCounterInfoAMD( + cl_perfcounter_amd /* perf_counter */, cl_perfcounter_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, size_t* /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL clSetDeviceClockModeAMD( + cl_device_id /* device*/, cl_set_device_clock_mode_input_amd /* Clock_Mode_Input */, + cl_set_device_clock_mode_output_amd* /* Clock_Mode_Output */ + ) CL_API_SUFFIX__VERSION_1_0; + +#ifdef __cplusplus +} /*extern "C"*/ +#endif /*__cplusplus*/ + +#endif /*__CL_PROFILE_AMD_H*/ diff --git a/projects/hip/amdocl/cl_thread_trace_amd.h b/projects/hip/amdocl/cl_thread_trace_amd.h new file mode 100644 index 0000000000..fe9aed6f34 --- /dev/null +++ b/projects/hip/amdocl/cl_thread_trace_amd.h @@ -0,0 +1,363 @@ +/* Copyright (c) 2012-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef __CL_THREAD_TRACE_AMD_H +#define __CL_THREAD_TRACE_AMD_H + +#include "CL/cl_platform.h" + +#ifdef __cplusplus +extern "C" { +#endif /*__cplusplus*/ + +typedef struct _cl_threadtrace_amd* cl_threadtrace_amd; +typedef cl_uint cl_thread_trace_param; +typedef cl_uint cl_threadtrace_info; + +/* cl_command_type */ +#define CL_COMMAND_THREAD_TRACE_MEM 0x4500 +#define CL_COMMAND_THREAD_TRACE 0x4501 + +/* cl_threadtrace_command_name_amd enumeration */ +typedef enum _cl_threadtrace_command_name_amd { + CL_THREAD_TRACE_BEGIN_COMMAND, + CL_THREAD_TRACE_END_COMMAND, + CL_THREAD_TRACE_PAUSE_COMMAND, + CL_THREAD_TRACE_RESUME_COMMAND +} cl_threadtrace_command_name_amd; + +// Thread trace parameters +enum ThreadTraceParameter { + CL_THREAD_TRACE_PARAM_TOKEN_MASK, + CL_THREAD_TRACE_PARAM_REG_MASK, + CL_THREAD_TRACE_PARAM_COMPUTE_UNIT_TARGET, + CL_THREAD_TRACE_PARAM_SHADER_ARRAY_TARGET, + CL_THREAD_TRACE_PARAM_SIMD_MASK, + CL_THREAD_TRACE_PARAM_VM_ID_MASK, + CL_THREAD_TRACE_PARAM_RANDOM_SEED, + CL_THREAD_TRACE_PARAM_CAPTURE_MODE, + CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK, + CL_THREAD_TRACE_PARAM_USER_DATA, + CL_THREAD_TRACE_PARAM_IS_WRAPPED +}; + +// CL_THREAD_TRACE_PARAM_TOKEN_MASK data selects for SI +enum CL_THREAD_TRACE_TOKEN_MASK { + // Time passed + CL_THREAD_TRACE_TOKEN_MASK_TIME_SI = 0x00000001, + // Resync the timestamp + CL_THREAD_TRACE_TOKEN_MASK_TIMESTAMP_SI = 0x00000002, + // A register write has occurred + CL_THREAD_TRACE_TOKEN_MASK_REG_SI = 0x00000004, + // A wavefront has started + CL_THREAD_TRACE_TOKEN_MASK_WAVE_START_SI = 0x00000008, + // Output space has been allocated for color/Z [Should be used for cl-gl] + CL_THREAD_TRACE_TOKEN_MASK_WAVE_PS_ALLOC_SI = 0x00000010, + // Output space has been allocated for vertex position [Should be used for cl-gl] + CL_THREAD_TRACE_TOKEN_MASK_WAVE_VS_ALLOC_SI = 0x00000020, + // Wavefront completion + CL_THREAD_TRACE_TOKEN_MASK_WAVE_END_SI = 0x00000040, + // An event has reached the top of a shader stage. In-order with WAVE_START + CL_THREAD_TRACE_TOKEN_MASK_EVENT_SI = 0x00000080, + // An event has reached the top of a compute shader stage. In-order with WAVE_START + CL_THREAD_TRACE_TOKEN_MASK_EVENT_CS_SI = 0x00000100, + // An event has reached the top of a shader stage for the second GFX pipe. In-order with + // WAVE_START. + //[Should be used for cl-gl] + CL_THREAD_TRACE_TOKEN_MASK_EVENT_GFX_SI = 0x00000200, + // The kernel has executed an instruction + CL_THREAD_TRACE_TOKEN_MASK_INST_SI = 0x00000400, + // The kernel has explicitly written the PC value + CL_THREAD_TRACE_TOKEN_MASK_INST_PC_SI = 0x00000800, + // The kernel has written user data into the thread trace buffer + CL_THREAD_TRACE_TOKEN_MASK_INST_USERDATA_SI = 0x00001000, + // Provides information about instruction scheduling + CL_THREAD_TRACE_TOKEN_MASK_ISSUE_SI = 0x00002000, + // The performance counter delta has been updated + CL_THREAD_TRACE_TOKEN_MASK_PERF_SI = 0x00004000, + // A miscellaneous event has been sent + CL_THREAD_TRACE_TOKEN_MASK_MISC_SI = 0x00008000, + // All possible tokens + CL_THREAD_TRACE_TOKEN_MASK_ALL_SI = 0x0000ffff, +}; + +// CL_THREAD_TRACE_PARAM_REG_MASK data selects +enum CL_THREAD_TRACE_REG_MASK { + // Event initiator + CL_THREAD_TRACE_REG_MASK_EVENT_SI = 0x00000001, + // Draw initiator [Should be used for cl-gl] + CL_THREAD_TRACE_REG_MASK_DRAW_SI = 0x00000002, + // Dispatch initiator + CL_THREAD_TRACE_REG_MASK_DISPATCH_SI = 0x00000004, + // User data from host + CL_THREAD_TRACE_REG_MASK_USERDATA_SI = 0x00000008, + // GFXDEC register (8-state) [Should be used for cl-gl] + CL_THREAD_TRACE_REG_MASK_GFXDEC_SI = 0x00000020, + // SHDEC register (many state) + CL_THREAD_TRACE_REG_MASK_SHDEC_SI = 0x00000040, + // Other registers + CL_THREAD_TRACE_REG_MASK_OTHER_SI = 0x00000080, + // All possible registers types + CL_THREAD_TRACE_REG_MASK_ALL_SI = 0x000000ff, +}; + +// CL_THREAD_TRACE_PARAM_VM_ID_MASK data selects +enum CL_THREAD_TRACE_VM_ID_MASK { + // Capture only data from the VM_ID used to write {SQTT}_BASE + CL_THREAD_TRACE_VM_ID_MASK_SINGLE = 0, + // Capture all data from all VM_IDs + CL_THREAD_TRACE_VM_ID_MASK_ALL = 1, + // Capture all data but only get target (a.k.a. detail) data from VM_ID used to write {SQTT}_BASE + CL_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL = 2 +}; + +// CL_THREAD_TRACE_PARAM_CAPTURE_MODE data +enum CL_THREAD_TRACE_CAPTURE_MODE { + // Capture all data in the thread trace buffer + CL_THREAD_TRACE_CAPTURE_ALL = 0, + // Capture only data between THREAD_TRACE_START and THREAD_TRACE_STOP events + CL_THREAD_TRACE_CAPTURE_SELECT = 1, + // Capture data between THREAD_TRACE_START and THREAD_TRACE_/STOP events, + // and global/reference data at all times + CL_THREAD_TRACE_CAPTURE_SELECT_DETAIL = 2 +}; + +// CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK data selects +enum CL_THREAD_TRACE_INSTRUCTION_MASK { + // Generate {SQTT}_TOKEN_INST tokens for all instructions + CL_THREAD_TRACE_INST_MASK_ALL, + // Generate {SQTT}_TOKEN_INST tokens for stalled instructions only + CL_THREAD_TRACE_INST_MASK_STALLED, + // Generate {SQTT}_TOKEN_INST messages for stalled and other (no op/wait/set prio/etc) + // instructions + CL_THREAD_TRACE_INST_MASK_STALLED_AND_IMMEDIATE, + // Generate {SQTT}_TOKEN_INST messages for immediate instructions only only [ Should be used only + // for CI] + CL_THREAD_TRACE_INST_MASK_IMMEDIATE_CI, +}; + +enum ThreadTraceInfo { + CL_THREAD_TRACE_SE, + CL_THREAD_TRACE_BUFFERS_FILLED, + CL_THREAD_TRACE_BUFFERS_SIZE +}; + + +/*! \brief Creates a new cl_threadtrace_amd object + * + * \param device must be a valid OpenCL device. + * + * \param errcode_ret A non zero value if OpenCL failed to create threadTrace + * -CL_INVALID_DEVICE if devices contains an invalid device. + * -CL_DEVICE_NOT_AVAILABLE if a device is currently not available even + * though the device was returned by clGetDeviceIDs. + * -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the + * OpenCL implementation on the device. + * -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + OpenCL implementation on the host. + * + * \return the created threadTrace object + */ +extern CL_API_ENTRY cl_threadtrace_amd CL_API_CALL clCreateThreadTraceAMD( + cl_device_id /* device */, cl_int* /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Destroys a cl_threadtrace_amd object. + * + * \param threadTrace the cl_threadtrace_amd object for release + * + * \return A non zero value if OpenCL failed to release threadTrace + * -CL_INVALID_VALUE if the thread_trace is not a valid OpenCL thread trace object + (cl_threadtrace_amd) . + * -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the + * OpenCL implementation on the device. + * -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + OpenCL implementation on the host. + */ +extern CL_API_ENTRY cl_int CL_API_CALL clReleaseThreadTraceAMD(cl_threadtrace_amd /* threadTrace */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Increments the cl_threadtrace_amd object reference count. + * + * \param threadTrace the cl_threadtrace_amd object for retain + * + * \return A non zero value if OpenCL failed to retain threadTrace + * -CL_INVALID_VALUE if the thread_trace is not a valid thread trace object (cl_threadtrace_amd) . + * -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the + OpenCL implementation on the device. + * -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + OpenCL implementation on the host. + */ +extern CL_API_ENTRY cl_int CL_API_CALL clRetainThreadTraceAMD(cl_threadtrace_amd /* threadTrace */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Sets the cl_threadtrace_amd object configuration parameter. + * + * \param thread_trace the cl_threadtrace_amd object to set configuration parameter + * + * \param config_param the cl_thread_trace_param + * + * \param param_value corresponding to configParam + * + * \return A non zero value if OpenCL failed to set threadTrace buffer parameter + * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object. + * - CL_INVALID_VALUE if the invalid config_param or param_value enum values , are used. + * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or + event_wait_list is not NULL and num_events_in_wait_list is 0, + * - or if event objects in event_wait_list are not valid events. + * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL + implementation on the device. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + OpenCL implementation on the host. + */ + +extern CL_API_ENTRY cl_int CL_API_CALL clSetThreadTraceParamAMD( + cl_threadtrace_amd /*thread_trace*/, cl_thread_trace_param /*config_param*/, + cl_uint /*param_value*/ + ) CL_API_SUFFIX__VERSION_1_0; + +/* \brief Enqueues the binding command to bind cl_threadtrace_amd to cl_mem object for trace + * recording.. + * + * \param command_queue must be a valid OpenCL command queue. + * + * \param thread_trace specifies the cl_threadtrace_amd object. + * + * \param mem_objects the cl_mem objects for trace recording + * + * \param mem_objects_num the number of cl_mem objects in the mem_objects + * + * \param buffer_size the size of each cl_mem object from mem_objects + * + * \param event_wait_list specify [is a pointer to] events that need to + * complete before this particular command can be executed. + * If \a event_wait_list is NULL, then this particular command does not wait + * on any event to complete. If \a event_wait_list is NULL, + * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL, + * the list of events pointed to by \a event_wait_list must be valid and + * \a num_events_in_wait_list must be greater than 0. The events specified in + * \a event_wait_list act as synchronization points. + * + * \param num_events_in_wait_list specify the number of events in + * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be + * greater than 0 if \a event_wait_list is not NULL. + * + * \param event returns an event object that identifies this particular + * command and can be used to query or queue a wait for this particular + * command to complete. \a event can be NULL in which case it will not be + * possible for the application to query the status of this command or queue a + * wait for this command to complete. + * \return A non zero value if OpenCL failed to set threadTrace buffer parameter + * - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list + * are not the same. + * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object. + * - CL_INVALID_VALUE if the buffer_size is negative or zero. + * - CL_INVALID_VALUE if the sub_buffers_num I less than 1. + * - CL_INVALID_OPERATION if the mem_objects_num is not equal to the number of Shader Engines of + * the [GPU] device. + * - CL_INVALID_MEM_OBJECT if one on memory objects in the mem_objects array is not a valid memory + * object or memory_objects is NULL. + * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for the data store + * associated from the memory objects of the mem_objects array. + * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or + * event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in + * event_wait_list are not valid events. + * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL + * implementation on the device. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + * OpenCL implementation on the host. + */ +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBindThreadTraceBufferAMD( + cl_command_queue command_queue, cl_threadtrace_amd /*thread_trace*/, cl_mem* /*mem_objects*/, + cl_uint /*mem_objects_num*/, cl_uint /*buffer_size*/, cl_uint /*num_events_in_wait_list*/, + const cl_event* /*event_wait_list*/, cl_event* /*event*/ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Get specific information about the OpenCL Thread Trace. + * + * \param thread_trace_info_param is an enum that identifies the Thread Trace information being + * queried. + * + * \param param_value is a pointer to memory location where appropriate values + * for a given \a threadTrace_info_param will be returned. If \a param_value is NULL, + * it is ignored. + * + * \param param_value_size specifies the size in bytes of memory pointed to by + * \a param_value. This size in bytes must be >= size of return type. + * + * \param param_value_size_ret returns the actual size in bytes of data being + * queried by param_value. If \a param_value_size_ret is NULL, it is ignored. + * + * \return One of the following values: + * CL_INVALID_OPERATION if cl_threadtrace_amd object is not valid + * - CL_INVALID_VALUE if \a param_name is not one of the supported + * values or if size in bytes specified by \a param_value_size is < size of + * return type and \a param_value is not a NULL value. + * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + * OpenCL implementation on the host. + * CL_SUCCESS if the function is executed successfully. + */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetThreadTraceInfoAMD( + cl_threadtrace_amd /* thread_trace */, cl_threadtrace_info /*thread_trace_info_param*/, + size_t /*param_value_size*/, void* /*param_value*/, size_t* /*param_value_size_ret*/ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Enqueues the thread trace command for the specified thread trace object. + * + * \param command_queue must be a valid OpenCL command queue. + * + * \param threadTraces specifies an array of cl_threadtrace_amd objects. + * + * \return A non zero value if OpenCL failed to release threadTrace + * - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list + * are not the same. + * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object . + * - CL_INVALID_VALUE if the invalid command name enum value , not described in the + * cl_threadtrace_command_name_amd, is used. + * - CL_INVALID_OPERATION if the command enqueue failed. It can happen in the following cases: + * o BEGIN_COMMAND is queued for thread trace object for which memory object/s was/were not + * bound.. + * o END_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not queued. + * o PAUSE_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not + * queued. + * o RESUME_COMMAND is queued for thread trace object, for which PAUSE_COMMAND was not + * queued. + * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or + * event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in + * event_wait_list are not valid events. + * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL + * implementation on the device. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL + * implementation on the host. + */ +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueThreadTraceCommandAMD( + cl_command_queue /*command_queue*/, cl_threadtrace_amd /*thread_trace*/, + cl_threadtrace_command_name_amd /*command_name*/, cl_uint /*num_events_in_wait_list*/, + const cl_event* /*event_wait_list*/, cl_event* /*event*/ + ) CL_API_SUFFIX__VERSION_1_0; + + +#ifdef __cplusplus +} /*extern "C"*/ +#endif /*__cplusplus*/ + +#endif /*__CL_THREAD_TRACE_AMD_H*/ diff --git a/projects/hip/amdocl/gl_functions.hpp b/projects/hip/amdocl/gl_functions.hpp new file mode 100644 index 0000000000..2d184bc2e6 --- /dev/null +++ b/projects/hip/amdocl/gl_functions.hpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2010-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +GLPREFIX(GLubyte*, glGetString, (GLenum name)) + +GLPREFIX(void, glBindBuffer, (GLenum target, GLuint buffer)) +//GLPREFIX(void, glBindFramebufferEXT, (GLenum target, GLuint framebuffer)) +GLPREFIX(void, glBindRenderbuffer, (GLenum target, GLuint renderbuffer)) +GLPREFIX(void, glBindTexture, (GLenum target, GLuint texture)) +GLPREFIX(void, glBufferData, (GLenum target, GLsizeiptr size, const GLvoid* data, GLenum usage)) + +GLPREFIX(GLenum, glCheckFramebufferStatusEXT, (GLenum target)) + +GLPREFIX(void, glDeleteBuffers, (GLsizei n, const GLuint* buffers)) +GLPREFIX(void, glDrawPixels, (GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels)) + +//GLPREFIX(void, glFramebufferRenderbufferEXT, (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer)) + +GLPREFIX(void, glGenBuffers, (GLsizei n, GLuint* buffers)) +//GLPREFIX(void, glGenFramebuffersEXT, (GLsizei n, GLuint* framebuffers)) +//10 +GLPREFIX(void, glGetBufferParameteriv, (GLenum target, GLenum pname, GLint* params)) +GLPREFIX(GLenum, glGetError, (void)) +GLPREFIX(void, glFinish, (void)) +GLPREFIX(void, glFlush, (void)) +GLPREFIX(GLenum, glClientWaitSync, (GLsync sync, GLbitfield flags, GLuint64 timeout)) +GLPREFIX(void, glGetIntegerv, (GLenum pname, GLint *params)) +GLPREFIX(void, glGetRenderbufferParameterivEXT, (GLenum target, GLenum pname, GLint* params)) +//GLPREFIX(GLubyte*, glGetString, (GLenum name)) +GLPREFIX(void, glGetTexImage, (GLenum target, GLint level, GLenum format, GLenum type, GLvoid *pixels)) +GLPREFIX(void, glGetTexLevelParameteriv, (GLenum target, GLint level, GLenum pname, GLint *params)) +GLPREFIX(void, glGetTexParameteriv, (GLenum target, GLenum pname, GLint *params)) + +GLPREFIX(GLboolean, glIsBuffer, (GLuint buffer)) +GLPREFIX(GLboolean, glIsRenderbufferEXT, (GLuint renderbuffer)) +GLPREFIX(GLboolean, glIsTexture, (GLuint texture)) +//20 +GLPREFIX(GLvoid*, glMapBuffer, (GLenum target, GLenum access)) + +GLPREFIX(void, glReadPixels, (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid *pixels)) + +GLPREFIX(void, glTexImage2D, (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels)) +GLPREFIX(void, glTexImage3D, (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels)) + +GLPREFIX(GLboolean, glUnmapBuffer, (GLenum target)) + +#undef GLPREFIX diff --git a/projects/hip/amdocl/icd/loader/icd_dispatch.h b/projects/hip/amdocl/icd/loader/icd_dispatch.h new file mode 100644 index 0000000000..84a3e305a7 --- /dev/null +++ b/projects/hip/amdocl/icd/loader/icd_dispatch.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016-2019 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * OpenCL is a trademark of Apple Inc. used under license by Khronos. + */ + +#ifndef _ICD_DISPATCH_H_ +#define _ICD_DISPATCH_H_ + +#ifndef CL_USE_DEPRECATED_OPENCL_1_0_APIS +#define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif + +#ifndef CL_USE_DEPRECATED_OPENCL_1_1_APIS +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif + +#ifndef CL_USE_DEPRECATED_OPENCL_1_2_APIS +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif + +#ifndef CL_USE_DEPRECATED_OPENCL_2_0_APIS +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif + +// cl.h +#include + +// cl_gl.h and required files +#ifdef _WIN32 +#include +#include +#include +#include +#include +#include +#endif +#include +#include +#include +#include +#include + +/* + * + * vendor dispatch table structure + * + */ + +struct _cl_platform_id +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_device_id +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_context +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_command_queue +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_mem +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_program +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_kernel +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_event +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_sampler +{ + cl_icd_dispatch *dispatch; +}; + +#endif // _ICD_DISPATCH_H_ + diff --git a/projects/hip/bin/hipcc b/projects/hip/bin/hipcc index 74ca844629..8214e508a6 100755 --- a/projects/hip/bin/hipcc +++ b/projects/hip/bin/hipcc @@ -222,12 +222,12 @@ if ($HIP_PLATFORM eq "clang") { $HIPCXXFLAGS .= " -Xclang -fallow-half-arguments-and-returns -D__HIP_HCC_COMPAT_MODE__=1"; } - if ($HIP_RUNTIME eq "HCC" ) { - $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; - $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; - $HIPCFLAGS .= " -isystem $HSA_PATH/include"; - } else { - $HIPCXXFLAGS .= " -fhip-new-launch-api"; + $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; + $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; + $HIPCFLAGS .= " -isystem $HSA_PATH/include"; + if (!($HIP_RUNTIME eq "HCC")) { + $HIPCXXFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api"; + $HIPCFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api"; } } elsif ($HIP_PLATFORM eq "hcc") { @@ -245,6 +245,9 @@ if ($HIP_PLATFORM eq "clang") { $HCC_VERSION_MAJOR=$HCC_VERSION; $HCC_VERSION_MAJOR=~s/\..*//; + $HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'} // 1; + $marker_path = "$ROCM_PATH/profiler/CXLActivityLogger"; + # HCC* may be used to compile src/hip_hcc.o (and also feed the HIPCXXFLAGS below) $HCC = "$HCC_HOME/bin/hcc"; $HCCFLAGS = "-hc -D__HIPCC__ -isystem $HCC_HOME/include "; @@ -292,6 +295,20 @@ if ($HIP_PLATFORM eq "clang") { $HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am "; # $HIPLDFLAGS .= " -L$HCC_HOME/compiler/lib -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMMC -lLLVMCore -lLLVMSupport "; + # Add trace marker library: + # TODO - once we cleanly separate the HIP API headers from HIP library headers this logic should move to CMakebuild option - apps do not need to see the marker library. + if ($HIP_ATP_MARKER) { + $marker_inc_path = "$marker_path/include"; + if (-e $marker_inc_path) { + $HIPCXXFLAGS .= " -isystem $marker_inc_path"; + } + } + + $marker_lib_path = "$marker_path/bin/x86_64"; + if (-e $marker_lib_path) { + $HIPLDFLAGS .= " -L$marker_lib_path -lCXLActivityLogger -Wl,--rpath=$marker_lib_path"; + } + if (not $isWindows) { $HIPLDFLAGS .= " -lm"; } @@ -336,7 +353,10 @@ my $runCmd = 1; my $buildDeps = 0; my $linkType = 1; my $setLinkType = 0; -my $coFormatv3 = 0; +my $coFormatv3 = 1; +if(defined $HIP_COMPILER and $HIP_COMPILER eq "hcc") { + $coFormatv3 = 0; +} my $funcSupp = 0; # enable function support my @options = (); @@ -474,13 +494,6 @@ foreach $arg (@ARGV) $optArg = $arg; } - ## This is a temporary workaround for CMake detection of OpenMP support. - ## It should be removed when the OpenMP detection c++ test in CMake is updated - ## and corrected CMake version is available. - if((defined $HIP_COMPILER) and ($HIP_COMPILER eq "clang") and ($arg eq '-fopenmp')) { - $HIPCXXFLAGS .= " -D_OPENMP " - } - ## process linker response file for hip-clang ## extract object files from static library and pass them directly to ## hip-clang in command line. diff --git a/projects/hip/bin/hipify-perl b/projects/hip/bin/hipify-perl index baaca2ae71..83e72ee711 100755 --- a/projects/hip/bin/hipify-perl +++ b/projects/hip/bin/hipify-perl @@ -223,6 +223,10 @@ sub simpleSubstitutions { $ft{'memory'} += s/\bcuMemcpy2DAsync\b/hipMemcpyParam2DAsync/g; $ft{'memory'} += s/\bcuMemcpy2DAsync_v2\b/hipMemcpyParam2DAsync/g; $ft{'memory'} += s/\bcuMemcpy2D_v2\b/hipMemcpyParam2D/g; + $ft{'memory'} += s/\bcuMemcpy3D\b/hipDrvMemcpy3D/g; + $ft{'memory'} += s/\bcuMemcpy3DAsync\b/hipDrvMemcpy3DAsync/g; + $ft{'memory'} += s/\bcuMemcpy3D_v2\b/hipDrvMemcpy3D/g; + $ft{'memory'} += s/\bcuMemcpy3DAsync_v2\b/hipDrvMemcpy3DAsync/g; $ft{'memory'} += s/\bcuMemcpyAtoH\b/hipMemcpyAtoH/g; $ft{'memory'} += s/\bcuMemcpyAtoH_v2\b/hipMemcpyAtoH/g; $ft{'memory'} += s/\bcuMemcpyDtoD\b/hipMemcpyDtoD/g; @@ -979,6 +983,8 @@ sub simpleSubstitutions { $ft{'type'} += s/\bCUDA_ARRAY_DESCRIPTOR_st\b/HIP_ARRAY_DESCRIPTOR/g; $ft{'type'} += s/\bCUDA_MEMCPY2D\b/hip_Memcpy2D/g; $ft{'type'} += s/\bCUDA_MEMCPY2D_st\b/hip_Memcpy2D/g; + $ft{'type'} += s/\bCUDA_MEMCPY3D\b/HIP_MEMCPY3D/g; + $ft{'type'} += s/\bCUDA_MEMCPY3D_st\b/HIP_MEMCPY3D/g; $ft{'type'} += s/\bCUaddress_mode\b/hipTextureAddressMode/g; $ft{'type'} += s/\bCUaddress_mode_enum\b/hipTextureAddressMode/g; $ft{'type'} += s/\bCUarray\b/hipArray */g; diff --git a/projects/hip/cmake/FindROCR.cmake b/projects/hip/cmake/FindROCR.cmake new file mode 100644 index 0000000000..2b198dcf8f --- /dev/null +++ b/projects/hip/cmake/FindROCR.cmake @@ -0,0 +1,16 @@ +# Try to find ROCR (Radeon Open Compute Runtime) +# +# Once found, this will define: +# - ROCR_FOUND - ROCR status (found or not found) +# - ROCR_INCLUDES - Required ROCR include directories +# - ROCR_LIBRARIES - Required ROCR libraries +find_path(FIND_ROCR_INCLUDES hsa.h HINTS /opt/rocm/include /opt/rocm/hsa/include PATH_SUFFIXES hsa) +find_library(FIND_ROCR_LIBRARIES hsa-runtime64 HINTS /opt/rocm/lib /opt/rocm/hsa/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ROCR DEFAULT_MSG + FIND_ROCR_INCLUDES FIND_ROCR_LIBRARIES) +mark_as_advanced(FIND_ROCR_INCLUDES FIND_ROCR_LIBRARIES) + +set(ROCR_INCLUDES ${FIND_ROCR_INCLUDES}) +set(ROCR_LIBRARIES ${FIND_ROCR_LIBRARIES}) diff --git a/projects/hip/cmake/FindROCT.cmake b/projects/hip/cmake/FindROCT.cmake new file mode 100644 index 0000000000..37f08fcff7 --- /dev/null +++ b/projects/hip/cmake/FindROCT.cmake @@ -0,0 +1,16 @@ +# Try to find ROCT (Radeon Open Compute Thunk) +# +# Once found, this will define: +# - ROCT_FOUND - ROCT status (found or not found) +# - ROCT_INCLUDES - Required ROCT include directories +# - ROCT_LIBRARIES - Required ROCT libraries +find_path(FIND_ROCT_INCLUDES hsakmt.h HINTS /opt/rocm/include) +find_library(FIND_ROCT_LIBRARIES hsakmt HINTS /opt/rocm/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ROCT DEFAULT_MSG + FIND_ROCT_INCLUDES FIND_ROCT_LIBRARIES) +mark_as_advanced(FIND_ROCT_INCLUDES FIND_ROCT_LIBRARIES) + +set(ROCT_INCLUDES ${FIND_ROCT_INCLUDES}) +set(ROCT_LIBRARIES ${FIND_ROCT_LIBRARIES}) diff --git a/projects/hip/configure b/projects/hip/configure new file mode 100644 index 0000000000..e69de29bb2 diff --git a/projects/hip/docs/markdown/hip_profiling.md b/projects/hip/docs/markdown/hip_profiling.md new file mode 100644 index 0000000000..28ed37e321 --- /dev/null +++ b/projects/hip/docs/markdown/hip_profiling.md @@ -0,0 +1,279 @@ +# Profiling HIP Code + +This section describes the profiling and debugging capabilities that HIP provides. +Profiling information can viewed in the CodeXL visualization tool or printed directly to stderr as the application runs. +This document starts with some of the general capabilities of CodeXL and then describes some of the additional HIP marker and debug features. + + + +- [CodeXL Profiling](#codexl-profiling) + * [Collecting and Viewing Traces](#collecting-and-viewing-traces) + + [Using rocm-profiler timestamp profiling](#using-rocm-profiler-timestamp-profiling) + + [Using rocm-profiler performance counter collection:](#using-rocm-profiler-performance-counter-collection) + + [Using CodeXL to view profiling results:](#using-codexl-to-view-profiling-results) + + [More information on CodeXL](#more-information-on-codexl) + * [HIP Markers](#hip-markers) + + [Profiling HIP APIs](#profiling-hip-apis) + + [Adding markers to applications](#adding-markers-to-applications) + * [Additional HIP Profiling Features](#additional-hip-profiling-features) + + [Demangling C++ Kernel Names](#demangling-c-kernel-names) + + [Controlling when profiling starts and ends](#controlling-when-profiling-starts-and-ends) + + [Reducing timeline trace output file size](#reducing-timeline-trace-output-file-size) + + [How to enable profiling at HIP build time](#how-to-enable-profiling-at-hip-build-time) +- [Tracing and Debug](#tracing-and-debug) + * [Tracing HIP APIs](#tracing-hip-apis) + + [Color](#color) + + + +## CodeXL Profiling + +### Collecting and Viewing Traces + +#### Using rocm-profiler timestamp profiling +rocm-profiler is a command-line tool for tracing any application that uses ROCr API, including HCC and HIP. +rocm-profiler's timeline trace will show the beginning and end for all kernel commands, data transfer commands, and HSA Runtime (ROCr) API calls. The trace results are saved into a file, which by convention uses the "atp" extension. Here is an example that shows how to run the command-line profiler: +```shell +$ /opt/rocm/bin/rocm-profiler -o -A -T +``` + +#### Using rocm-profiler performance counter collection: +rocm-profiler can record performance counter information to provide greater insight inside a kernel, such as the memory bandwidth, ALU busy percentage, and cache statistics. +Collecting the common set of useful counters requires passing the counter configuration files for two passes: +``` +$ /opt/rocm/bin/rocm-profiler -C -O --counterfile /opt/rocm/profiler/counterfiles/counters_HSA_Fiji_pass1 --counterfile /opt/rocm/profiler/counterfiles/counters_HSA_Fiji_pass2 +``` + + +#### Using CodeXL to view profiling results: +The trace can be loaded and viewed in the CodeXL visualization tool: + +- Open the CodeXL GUI, create an new project, and switch to "Profile Mode": + - $ CodeXL & + - [File->New Project, leave fields as is, just click "OK"] + - [Profile->Switch to Profile Mode] +- Load timestamp tracing results into a timeline view: + - Right click on the project in the CodeXL Explorer view + - Click "Import Session..." + - Select to $HOME/apitrace.atp (or appropriate .atp file if you used another file name) + +- Load the performance counter results + - Right click on the project in the CodeXL Explorer view + - Click "Import Session..." + - Select $HOME/Session1.csv (or appropriate .csv file if you used another file name) + + +#### More information on CodeXL +rocm-profiler --help will show additional options and usage guidelines. + +See this [blog](http://gpuopen.com/getting-up-to-speed-with-the-codexl-gpu-profiler-and-radeon-open-compute/) for more information on profiling ROCm apps (including HIP) with CodeXL. + +The 2.2 version of Windows CodeXL does not correctly handle Linux line-endings. If you are collecting a trace on Linux and then viewing it with the 2.2 Windows CodeXL, first convert the line ending in the .atp file to Windows-style line endings. + +### HIP Markers +#### Profiling HIP APIs +HIP can generate markers at function beginning and end which are displayed on the CodeXL timeline view. +HIP 1.0 compiles marker support by default, and you can enable it by setting the HIP_PROFILE_API environment variable and then running the rocm-profiler: + +```shell + +# Use profile to generate timeline view: +export HIP_PROFILE_API=1 +$ /opt/rocm/bin/rocm-profiler -A -T + +Or +$ /opt/rocm/bin/rocm-profiler -e HIP_PROFILE_API=1 -A -T +``` + +HIP_PROFILE_API supports two levels of information. +- HIP_PROFILE_API=1 : Short format. Print name of API but no arguments. For example: +`hipMemcpy` +- HIP_PROFILE_API=2 : Long format. Print name of API + values of all function arguments. For example: +`hipMemcpy (0x7f32154db010, 0x50446e000, 4000000, hipMemcpyDeviceToHost)` + +#### Adding markers to applications + +Markers can be used to define application-specific events that will be recorded in the ATP file and displayed in the CodeXL GUI. +This can be particularly useful for visualizing how the higher-level phases of application behavior relate to the lower level HIP APIs, kernel launches, and data transfers. +For example, an instrumented machine learning framework could show the beginning and ending of each layer in the network. + +Markers have a specific begin and end time, and can be nested. Nested calls are displayed hierarchically in the CodeXL GUI, with each level of the hierarchy occupying a different row. + +The HIP APis are defined in "hip_profile.h": +``` +#include + +HIP_BEGIN_MARKER(const char *markerName, const char *groupName); +HIP_END_MARKER(); + +HIP_BEGIN_MARKER("Setup", "MyAppGroup"); +// ... +// application code for setup +// ... +HIP_END_MARKER(); +``` + +For C++ codes, HIP also provides a scoped marker which records the start time when constructed and the end time when the scoped marker is destructed at the end of the scope. This provides a convenient, single-line mechanism to record an event that neatly corresponds to a region of code. + +```cxx +void FunctionFoo(...) +{ + HIP_SCOPED_MARKER("FunctionFoo", "MyAppGroup"); // Marker starts recording here. + + // ... + // Function implementation + // ... + + // Marker destroyed here and records end time stamp. +}; +``` + +The HIP marker API is only supported on ROCm platform. The marker macros are defined on CUDA platforms and will compile, but are silently ignored at runtime. + +This [HIP sample](https://github.com/ROCm-Developer-Tools/HIP/tree/master/samples/2_Cookbook/2_Profiler) shows the profiler marker API used in a small application. + +More information on the marker API can be found in the profiler header file and PDF in a ROCm installation: +- /opt/rocm/profiler/CXLActivityLogger/include/CXLActivityLogger.h +- /opt/rocm/profiler/CXLActivityLogger/doc/CXLActivityLogger.pdf + +### Additional HIP Profiling Features +#### Demangling C++ Kernel Names +HIP includes the `hipdemangleatp` tool which can post-process an ATP file to "demangle" C++ names. +Mangled kernel names encode the C++ arguments and other information, and are guaranteed to be unique even for cases such as operator overloading. However, the mangled names can be quite verbose. For example: + +`ZZ39gemm_NoTransA_MICRO_NBK_M_N_K_TS16XMTS4RN2hc16accelerator_viewEPKflS3_lPfliiiiiiffEN3_EC__719__cxxamp_trampolineElililiiiiiiS3_iS3_S4_ff` + +`hipdemangleatp` will convert this into the more readable: +`gemm_NoTransA_MICRO_NBK_M_N_K_TS16XMTS4` + +The `hipdemangleatp` tool operates on the ATP file "in-place" and thus replaces the input file with the demangled version. + +``` +$ hipdemangleatp myfile.atp +``` + +The kernel name is also shown in some of the summary htlm files (Top10 kernels). These can be regenerated from the demangled ATP file by re-running rocm-profiler: +``` +$ rocm-profiler -T --atpfile myfile.atp +``` + +A future version of CodeXL may directly integrate demangle functionality. + + +#### Controlling when profiling starts and ends +hipProfilerStart() and hipProfilerEnd() can be inserted into an application to control which phases of the applications are profiled. +These APIs can be used to skip initialization code or to focus profiling on a desired region, and are particularly useful for large long-running applications. +See the API documentation for more information. These APIs work on both ROCm and CUDA paths. + +On ROCm, the following environment variables can be used to control when profiling occurs: + +``` +HIP_DB_START_API : Comma-separated list of tid.api_seq_num for when to start debug and profiling. +HIP_DB_STOP_API : Comma-separated list of tid.api_seq_num for when to stop debug and profiling. +``` + +HIP/ROCm assigns a monotonically increasing sequence number to the APIs called from each thread. The thread and API sequence number can be used in the above API to control when tracing starts and stops. These flags also control the HIP_DB messages (described below). + +When using these options, start the profiler with profiling disabled: +``` +# ROCm: +$ rocm-profiler --startdisabled ... + +# CUDA: +$ nvprof --profile-from-start-off ... +``` + +This feature is under development. + +#### Reducing timeline trace output file size +If the application is already recording the HIP APIs, the HSA APIs are somewhat redundant and the ATP file size can be substantially reduced by not recording these APIs. HIP includes a text file that lists all of the HSA APIs and can assist in this filtering: + +``` +$ rocm-profiler -F hip/bin/hsa-api-filter-cxl.txt +``` + +This file can be copied and edited to provide more selective HSA event recording. + + +#### How to enable profiling at HIP build time +Pre-built packages of HIP are not built with profiling support enabled.You must enable marker support manually when compiling HIP. + +1. Build HIP with ATP markers enabled +HIP pre-built packages are enabled with ATP marker support by default. +To enable ATP marker support when building HIP from source, use the option ```-DCOMPILE_HIP_ATP_MARKER=1``` during the cmake configure step. Build and install HIP. +```shell +$ mkdir build && cd build +$ cmake .. -DCOMPILE_HIP_ATP_MARKER +$ make install +``` + +2. Install ROCm-Profiler +Installing HIP from the [rocm](http://gpuopen.com/getting-started-with-boltzmann-components-platforms-installation/) pre-built packages, installs the ROCm-Profiler as well. +Alternatively, you can build ROCm-Profiler using the instructions [here](https://github.com/RadeonOpenCompute/ROCm-Profiler#building-the-rocm-profiler). + +3. Recompile the target application + +Then follow the steps above to collect a marker-enabled trace. + + +## Tracing and Debug + +### Tracing HIP APIs +The HIP runtime can print the HIP function strings to stderr using HIP_TRACE_API environment variable. +The trace prints two messages for each API - one at the beginning of the API call (line starts with "<<") and one at the end of the API call (line ends with ">>"). +Here's an example for one API followed by a description for the sections of the trace: + +``` +<> +``` + +- `<> +info: running on device gfx803 +info: allocate host mem ( 7.63 MB) +info: allocate device mem ( 7.63 MB) +<> +<> +info: copy Host2Device +<> +info: launch 'vector_square' kernel +1.5 hipLaunchKernel 'HIP_KERNEL_NAME(vector_square)' gridDim:{512,1,1} groupDim:{256,1,1} sharedMem:+0 stream#0.0 +info: copy Device2Host +<> +info: check result +PASSED! +``` + +HIP_TRACE_API supports multiple levels of debug information: + - 0x1 = print all HIP APIs. This is the most verbose setting; the flags below allow selecting a subset. + - 0x2 = print HIP APIs which initiate GPU kernel commands. Includes hipLaunchKernel, hipLaunchModuleKernel + - 0x4 = print HIP APIs which initiate GPU memory commands. Includes hipMemcpy*, hipMemset*. + - 0x8 = print HIP APIs which allocate or free memory. Includes hipMalloc, hipHostMalloc, hipFree, hipHostFree. + +These can be combined. For example, HIP_TRACE_API=6 shows a concise view of the HIP commands (both kernel and memory) that are sent to the GPU. + + +#### Color +Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors. +You can change the color used for the trace mode with the HIP_TRACE_API_COLOR environment variable. Possible values are None/Red/Green/Yellow/Blue/Magenta/Cyan/White. +None will disable use of color control codes for both the opening and closing and may be useful when saving the trace file or when a pure text trace is desired. + + + diff --git a/projects/hip/hip-config.cmake.in b/projects/hip/hip-config.cmake.in index ccfbf2b04f..baa7c1607f 100644 --- a/projects/hip/hip-config.cmake.in +++ b/projects/hip/hip-config.cmake.in @@ -137,11 +137,11 @@ if(HIP_COMPILER STREQUAL "clang") ) set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" + INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}/.." ) set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}/.." ) foreach(GPU_TARGET ${GPU_TARGETS}) diff --git a/projects/hip/include/hip/hcc_detail/channel_descriptor.h b/projects/hip/include/hip/hcc_detail/channel_descriptor.h index 38acff9951..a69558c8e4 100644 --- a/projects/hip/include/hip/hcc_detail/channel_descriptor.h +++ b/projects/hip/include/hip/hcc_detail/channel_descriptor.h @@ -29,8 +29,14 @@ THE SOFTWARE. #ifdef __cplusplus +#if __HIP_VDI__ +extern "C" { +#endif HIP_PUBLIC_API hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f); +#if __HIP_VDI__ +} +#endif static inline hipChannelFormatDesc hipCreateChannelDescHalf() { int e = (int)sizeof(unsigned short) * 8; diff --git a/projects/hip/include/hip/hcc_detail/driver_types.h b/projects/hip/include/hip/hcc_detail/driver_types.h index 1941f44617..ae8e8b1757 100644 --- a/projects/hip/include/hip/hcc_detail/driver_types.h +++ b/projects/hip/include/hip/hcc_detail/driver_types.h @@ -135,6 +135,47 @@ typedef enum hipResourceType { hipResourceTypePitch2D = 0x03 }hipResourceType; +typedef enum HIPresourcetype_enum { + HIP_RESOURCE_TYPE_ARRAY = 0x00, /**< Array resoure */ + HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */ + HIP_RESOURCE_TYPE_LINEAR = 0x02, /**< Linear resource */ + HIP_RESOURCE_TYPE_PITCH2D = 0x03 /**< Pitch 2D resource */ +} HIPresourcetype; + +/** + * hip address modes + */ +typedef enum HIPaddress_mode_enum { + HIP_TR_ADDRESS_MODE_WRAP = 0, + HIP_TR_ADDRESS_MODE_CLAMP = 1, + HIP_TR_ADDRESS_MODE_MIRROR = 2, + HIP_TR_ADDRESS_MODE_BORDER = 3 +} HIPaddress_mode; + +/** + * hip filter modes + */ +typedef enum HIPfilter_mode_enum { + HIP_TR_FILTER_MODE_POINT = 0, + HIP_TR_FILTER_MODE_LINEAR = 1 +} HIPfilter_mode; + +/** + * Texture descriptor + */ +typedef struct HIP_TEXTURE_DESC_st { + HIPaddress_mode addressMode[3]; /**< Address modes */ + HIPfilter_mode filterMode; /**< Filter mode */ + unsigned int flags; /**< Flags */ + unsigned int maxAnisotropy; /**< Maximum anisotropy ratio */ + HIPfilter_mode mipmapFilterMode; /**< Mipmap filter mode */ + float mipmapLevelBias; /**< Mipmap level bias */ + float minMipmapLevelClamp; /**< Mipmap minimum level clamp */ + float maxMipmapLevelClamp; /**< Mipmap maximum level clamp */ + float borderColor[4]; /**< Border Color */ + int reserved[12]; +} HIP_TEXTURE_DESC; + /** * hip texture resource view formats */ @@ -176,6 +217,45 @@ typedef enum hipResourceViewFormat { hipResViewFormatUnsignedBlockCompressed7 = 0x22 }hipResourceViewFormat; +typedef enum HIPresourceViewFormat_enum +{ + HIP_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */ + HIP_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */ + HIP_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */ + HIP_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */ + HIP_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */ + HIP_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, /**< 1 channel 32-bit floating point */ + HIP_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, /**< 2 channel 32-bit floating point */ + HIP_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, /**< 4 channel 32-bit floating point */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, /**< Block compressed 1 */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, /**< Block compressed 2 */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, /**< Block compressed 3 */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, /**< Block compressed 4 unsigned */ + HIP_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, /**< Block compressed 4 signed */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, /**< Block compressed 5 unsigned */ + HIP_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, /**< Block compressed 5 signed */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, /**< Block compressed 6 unsigned half-float */ + HIP_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, /**< Block compressed 6 signed half-float */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 /**< Block compressed 7 */ +} HIPresourceViewFormat; + /** * HIP resource descriptor */ @@ -204,6 +284,39 @@ typedef struct hipResourceDesc { } res; }hipResourceDesc; +typedef struct HIP_RESOURCE_DESC_st +{ + HIPresourcetype resType; /**< Resource type */ + + union { + struct { + hipArray_t hArray; /**< HIP array */ + } array; + struct { + hipMipmappedArray_t hMipmappedArray; /**< HIP mipmapped array */ + } mipmap; + struct { + hipDeviceptr_t devPtr; /**< Device pointer */ + hipArray_Format format; /**< Array format */ + unsigned int numChannels; /**< Channels per array element */ + size_t sizeInBytes; /**< Size in bytes */ + } linear; + struct { + hipDeviceptr_t devPtr; /**< Device pointer */ + hipArray_Format format; /**< Array format */ + unsigned int numChannels; /**< Channels per array element */ + size_t width; /**< Width of the array in elements */ + size_t height; /**< Height of the array in elements */ + size_t pitchInBytes; /**< Pitch between two rows in bytes */ + } pitch2D; + struct { + int reserved[32]; + } reserved; + } res; + + unsigned int flags; /**< Flags (must be zero) */ +} HIP_RESOURCE_DESC; + /** * hip resource view descriptor */ @@ -218,6 +331,22 @@ struct hipResourceViewDesc { unsigned int lastLayer; }; +/** + * Resource view descriptor + */ +typedef struct HIP_RESOURCE_VIEW_DESC_st +{ + HIPresourceViewFormat format; /**< Resource view format */ + size_t width; /**< Width of the resource view */ + size_t height; /**< Height of the resource view */ + size_t depth; /**< Depth of the resource view */ + unsigned int firstMipmapLevel; /**< First defined mipmap level */ + unsigned int lastMipmapLevel; /**< Last defined mipmap level */ + unsigned int firstLayer; /**< First layer index */ + unsigned int lastLayer; /**< Last layer index */ + unsigned int reserved[16]; +} HIP_RESOURCE_VIEW_DESC; + /** * Memory copy types * @@ -263,26 +392,29 @@ typedef struct hipMemcpy3DParms { } hipMemcpy3DParms; typedef struct HIP_MEMCPY3D { - size_t Depth; - size_t Height; - size_t WidthInBytes; - hipDeviceptr_t dstDevice; - size_t dstHeight; - void* dstHost; - size_t dstLOD; - hipMemoryType dstMemoryType; - size_t dstPitch; - size_t dstXInBytes; - size_t dstY; - size_t dstZ; - void* reserved0; - void* reserved1; - hipDeviceptr_t srcDevice; - size_t srcHeight; - const void* srcHost; - size_t srcLOD; - hipMemoryType srcMemoryType; - size_t srcPitch; + unsigned int srcXInBytes; + unsigned int srcY; + unsigned int srcZ; + unsigned int srcLOD; + hipMemoryType srcMemoryType; + const void* srcHost; + hipDeviceptr_t srcDevice; + hipArray_t srcArray; + unsigned int srcPitch; + unsigned int srcHeight; + unsigned int dstXInBytes; + unsigned int dstY; + unsigned int dstZ; + unsigned int dstLOD; + hipMemoryType dstMemoryType; + void* dstHost; + hipDeviceptr_t dstDevice; + hipArray_t dstArray; + unsigned int dstPitch; + unsigned int dstHeight; + unsigned int WidthInBytes; + unsigned int Height; + unsigned int Depth; } HIP_MEMCPY3D; static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz, diff --git a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp index 8f07e48d46..efe6a60197 100644 --- a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp @@ -192,16 +192,6 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, stream, &config[0]); } -inline -__attribute__((visibility("hidden"))) -hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, - int numDevices, unsigned int flags) { - hip_impl::hip_init(); - auto& ps = hip_impl::get_program_state(); - return ihipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags, ps); - -} - template inline __attribute__((visibility("hidden"))) diff --git a/projects/hip/include/hip/hcc_detail/hip_fp16.h b/projects/hip/include/hip/hcc_detail/hip_fp16.h index 77a7bba60d..6fa86e94b9 100644 --- a/projects/hip/include/hip/hcc_detail/hip_fp16.h +++ b/projects/hip/include/hip/hcc_detail/hip_fp16.h @@ -229,7 +229,7 @@ THE SOFTWARE. __host__ __device__ operator __half_raw() const { return __half_raw{data}; } __host__ __device__ - operator volatile __half_raw() const volatile + operator __half_raw() const volatile { return __half_raw{data}; } diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime.h b/projects/hip/include/hip/hcc_detail/hip_runtime.h index 582e0cdefa..28d3ae7051 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime.h @@ -108,9 +108,12 @@ extern int HIP_TRACE_API; #include #include #include -#include #if __HCC__ #include + #include +#else + #include + #include #endif // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define. #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__) @@ -385,7 +388,7 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri #elif defined(__clang__) && defined(__HIP__) #define HIP_KERNEL_NAME(...) __VA_ARGS__ -#define HIP_SYMBOL(X) #X +#define HIP_SYMBOL(X) X typedef int hipLaunchParm; diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime_api.h b/projects/hip/include/hip/hcc_detail/hip_runtime_api.h index b0d1c3570d..81e241e362 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime_api.h @@ -1482,18 +1482,18 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name); -hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName); -hipError_t hipGetSymbolSize(size_t* size, const void* symbolName); -hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol); +hipError_t hipGetSymbolSize(size_t* size, const void* symbol); +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)); -hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); -hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t sizeBytes, size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)); -hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); @@ -1933,6 +1933,15 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent); */ hipError_t hipFreeArray(hipArray* array); +/** + * @brief Frees a mipmapped array on the device + * + * @param[in] mipmappedArray - Pointer to mipmapped array to free + * + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray); + /** * @brief Allocate an array on the device. * @@ -1947,6 +1956,39 @@ hipError_t hipFreeArray(hipArray* array); hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc, struct hipExtent extent, unsigned int flags); + +/** + * @brief Allocate a mipmapped array on the device + * + * @param[out] mipmappedArray - Pointer to allocated mipmapped array in device memory + * @param[in] desc - Requested channel format + * @param[in] extent - Requested allocation size (width field in elements) + * @param[in] numLevels - Number of mipmap levels to allocate + * @param[in] flags - Flags for extensions + * + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + */ +hipError_t hipMallocMipmappedArray( + hipMipmappedArray_t *mipmappedArray, + const struct hipChannelFormatDesc* desc, + struct hipExtent extent, + unsigned int numLevels, + unsigned int flags __dparm(0)); + +/** + * @brief Gets a mipmap level of a HIP mipmapped array + * + * @param[out] levelArray - Returned mipmap level HIP array + * @param[in] mipmappedArray - HIP mipmapped array + * @param[in] level - Mipmap level + * + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipGetMipmappedArrayLevel( + hipArray_t *levelArray, + hipMipmappedArray_const_t mipmappedArray, + unsigned int level); + /** * @brief Copies data between host and device. * @@ -2159,6 +2201,31 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p); */ hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] pCopy 3D memory copy parameters + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy); + +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] pCopy 3D memory copy parameters + * @param[in] stream Stream to use + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream); + // doxygen end Memory /** * @} @@ -2957,17 +3024,6 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags); -/** - * @brief Returns occupancy for a device function. - * - * @param [out] numBlocks Returned occupancy - * @param [in] func Kernel function for which occupancy is calulated - * @param [in] blockSize Block size the kernel is intended to be launched with - * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block - */ -hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( - int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk); - /** * @brief Returns occupancy for a device function. * @@ -2979,6 +3035,29 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor( int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (only default supported) + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); + +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] func Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk); + /** * @brief Returns occupancy for a device function. * @@ -2992,18 +3071,20 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault)); /** - * @brief Returns occupancy for a device function. + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel * - * @param [out] numBlocks Returned occupancy - * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated - * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block - * @param [in] flags Extra flags for occupancy calculation (only default supported) + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue */ -hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + const void* f, size_t dynSharedMemPerBlk, + int blockSizeLimit); -#if __HIP_VDI__ && !defined(__HCC__) /** * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched * on respective streams before enqueuing any other work on the specified streams from any other threads @@ -3018,7 +3099,6 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags); -#endif // doxygen end Version Management /** @@ -3260,6 +3340,206 @@ hipError_t hipLaunchKernel(const void* function_address, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0)); +#if __HIP_VDI__ +hipError_t hipBindTexture( + size_t* offset, + const textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size = UINT_MAX); + +hipError_t hipBindTexture2D( + size_t* offset, + const textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch); + +hipError_t hipBindTextureToArray( + const textureReference* tex, + hipArray_const_t array, + const hipChannelFormatDesc* desc); + +hipError_t hipBindTextureToMipmappedArray( + const textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc); + +hipError_t hipGetTextureAlignmentOffset( + size_t* offset, + const textureReference* texref); + +hipError_t hipGetTextureReference( + const textureReference** texref, + const void* symbol); + +hipError_t hipUnbindTexture(const textureReference* tex); + +hipError_t hipCreateTextureObject( + hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc); + +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); + +hipError_t hipGetChannelDesc( + hipChannelFormatDesc* desc, + hipArray_const_t array); + +hipError_t hipGetTextureObjectResourceDesc( + hipResourceDesc* pResDesc, + hipTextureObject_t textureObject); + +hipError_t hipGetTextureObjectResourceViewDesc( + hipResourceViewDesc* pResViewDesc, + hipTextureObject_t textureObject); + +hipError_t hipGetTextureObjectTextureDesc( + hipTextureDesc* pTexDesc, + hipTextureObject_t textureObject); + +hipError_t hipTexRefGetAddress( + hipDeviceptr_t* dev_ptr, + const textureReference* texRef); + +hipError_t hipTexRefGetAddressMode( + hipTextureAddressMode* pam, + const textureReference* texRef, + int dim); + +hipError_t hipTexRefGetFilterMode( + hipTextureFilterMode* pfm, + const textureReference* texRef); + +hipError_t hipTexRefGetFlags( + unsigned int* pFlags, + const textureReference* texRef); + +hipError_t hipTexRefGetFormat( + hipArray_Format* pFormat, + int* pNumChannels, + const textureReference* texRef); + +hipError_t hipTexRefGetMaxAnisotropy( + int* pmaxAnsio, + const textureReference* texRef); + +hipError_t hipTexRefGetMipmapFilterMode( + hipTextureFilterMode* pfm, + const textureReference* texRef); + +hipError_t hipTexRefGetMipmapLevelBias( + float* pbias, + const textureReference* texRef); + +hipError_t hipTexRefGetMipmapLevelClamp( + float* pminMipmapLevelClamp, + float* pmaxMipmapLevelClamp, + const textureReference* texRef); + +hipError_t hipTexRefGetMipMappedArray( + hipMipmappedArray_t* pArray, + const textureReference* texRef); + +hipError_t hipTexRefSetAddress( + size_t* ByteOffset, + textureReference* texRef, + hipDeviceptr_t dptr, + size_t bytes); + +hipError_t hipTexRefSetAddress2D( + textureReference* texRef, + const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t dptr, + size_t Pitch); + +hipError_t hipTexRefSetAddressMode( + textureReference* texRef, + int dim, + hipTextureAddressMode am); + +hipError_t hipTexRefSetArray( + textureReference* tex, + hipArray_const_t array, + unsigned int flags); + +hipError_t hipTexRefSetBorderColor( + textureReference* texRef, + float* pBorderColor); + +hipError_t hipTexRefSetFilterMode( + textureReference* texRef, + hipTextureFilterMode fm); + +hipError_t hipTexRefSetFlags( + textureReference* texRef, + unsigned int Flags); + +hipError_t hipTexRefSetFormat( + textureReference* texRef, + hipArray_Format fmt, + int NumPackedComponents); + +hipError_t hipTexRefSetMaxAnisotropy( + textureReference* texRef, + unsigned int maxAniso); + +hipError_t hipTexRefSetMipmapFilterMode( + textureReference* texRef, + hipTextureFilterMode fm); + +hipError_t hipTexRefSetMipmapLevelBias( + textureReference* texRef, + float bias); + +hipError_t hipTexRefSetMipmapLevelClamp( + textureReference* texRef, + float minMipMapLevelClamp, + float maxMipMapLevelClamp); + +hipError_t hipTexRefSetMipmappedArray( + textureReference* texRef, + hipMipmappedArray* mipmappedArray, + unsigned int Flags); + +hipError_t hipMipmappedArrayCreate( + hipMipmappedArray_t* pHandle, + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, + unsigned int numMipmapLevels); + +hipError_t hipMipmappedArrayDestroy( + hipMipmappedArray_t hMipmappedArray); + +hipError_t hipMipmappedArrayGetLevel( + hipArray_t* pLevelArray, + hipMipmappedArray_t hMipMappedArray, + unsigned int level); + +hipError_t hipTexObjectCreate( + hipTextureObject_t* pTexObject, + const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc); + +hipError_t hipTexObjectDestroy( + hipTextureObject_t texObject); + +hipError_t hipTexObjectGetResourceDesc( + HIP_RESOURCE_DESC* pResDesc, + hipTextureObject_t texObject); + +hipError_t hipTexObjectGetResourceViewDesc( + HIP_RESOURCE_VIEW_DESC* pResViewDesc, + hipTextureObject_t texObject); + +hipError_t hipTexObjectGetTextureDesc( + HIP_TEXTURE_DESC* pTexDesc, + hipTextureObject_t texObject); +#endif + /** * @} */ @@ -3269,6 +3549,60 @@ hipError_t hipLaunchKernel(const void* function_address, } /* extern "c" */ #endif +#if defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__) +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f),dynSharedMemPerBlk,blockSizeLimit); +} + +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f),dynSharedMemPerBlk,blockSizeLimit); +} +#endif // defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__) + +#if defined(__cplusplus) && !defined(__HCC__) + +template +hipError_t hipGetSymbolAddress(void** devPtr, const T &symbol) { + return ::hipGetSymbolAddress(devPtr, (const void *)&symbol); +} + +template +hipError_t hipGetSymbolSize(size_t* size, const T &symbol) { + return ::hipGetSymbolSize(size, (const void *)&symbol); +} + +template +hipError_t hipMemcpyToSymbol(const T& symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) { + return ::hipMemcpyToSymbol((const void*)&symbol, src, sizeBytes, offset, kind); +} + +template +hipError_t hipMemcpyToSymbolAsync(const T& symbol, const void* src, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyToSymbolAsync((const void*)&symbol, src, sizeBytes, offset, kind, stream); +} + +template +hipError_t hipMemcpyFromSymbol(void* dst, const T &symbol, + size_t sizeBytes, size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) { + return ::hipMemcpyFromSymbol(dst, (const void*)&symbol, sizeBytes, offset, kind); +} + +template +hipError_t hipMemcpyFromSymbolAsync(void* dst, const T& symbol, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyFromSymbolAsync(dst, (const void*)&symbol, sizeBytes, offset, kind, stream); +} + +#endif + #if USE_PROF_API #include #endif @@ -3307,12 +3641,16 @@ inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( class TlsData; +#if !__HIP_VDI__ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, size_t size = UINT_MAX); +#endif +#if !__HIP_VDI__ hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t size, textureReference* tex); +#endif /* * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture @@ -3329,11 +3667,13 @@ hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode re * @param[in] size - Size of the memory area pointed to by devPtr * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown **/ +#if !__HIP_VDI__ template hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &desc, size, &tex); } +#endif /* * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture @@ -3349,81 +3689,114 @@ hipError_t hipBindTexture(size_t* offset, struct texture& tex, * @param[in] size - Size of the memory area pointed to by devPtr * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown **/ +#if !__HIP_VDI__ template hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, size_t size = UINT_MAX) { return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex); } +#endif // C API +#if !__HIP_VDI__ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch); +#endif +#if !__HIP_VDI__ hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t width, size_t height, textureReference* tex, size_t pitch); +#endif +#if !__HIP_VDI__ template hipError_t hipBindTexture2D(size_t* offset, struct texture& tex, const void* devPtr, size_t width, size_t height, size_t pitch) { return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), width, height, &tex); } +#endif +#if !__HIP_VDI__ template hipError_t hipBindTexture2D(size_t* offset, struct texture& tex, const void* devPtr, const struct hipChannelFormatDesc& desc, size_t width, size_t height, size_t pitch) { return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &desc, width, height, &tex); } +#endif // C API +#if !__HIP_VDI__ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, const hipChannelFormatDesc* desc); +#endif +#if !__HIP_VDI__ hipError_t ihipBindTextureToArrayImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, hipArray_const_t array, const struct hipChannelFormatDesc& desc, textureReference* tex); +#endif +#if !__HIP_VDI__ template hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array) { return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, tex.channelDesc, &tex); } +#endif +#if !__HIP_VDI__ template hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array, const struct hipChannelFormatDesc& desc) { return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, desc, &tex); } +#endif +#if !__HIP_VDI__ template inline static hipError_t hipBindTextureToArray(struct texture *tex, hipArray_const_t array, const struct hipChannelFormatDesc* desc) { return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, *desc, tex); } +#endif // C API +#if !__HIP_VDI__ hipError_t hipBindTextureToMipmappedArray(const textureReference* tex, hipMipmappedArray_const_t mipmappedArray, const hipChannelFormatDesc* desc); +#endif +#if !__HIP_VDI__ template hipError_t hipBindTextureToMipmappedArray(const texture& tex, hipMipmappedArray_const_t mipmappedArray) { return hipSuccess; } +#endif +#if !__HIP_VDI__ template hipError_t hipBindTextureToMipmappedArray(const texture& tex, hipMipmappedArray_const_t mipmappedArray, const hipChannelFormatDesc& desc) { return hipSuccess; } +#endif #if __HIP_VDI__ && !defined(__HCC__) + +template +inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + F kernel, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) { +return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize,(hipFunction_t)kernel, dynSharedMemPerBlk, blockSizeLimit); +} + template inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) { @@ -3453,15 +3826,22 @@ inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchPara * * @return #hipSuccess **/ +#if !__HIP_VDI__ hipError_t hipUnbindTexture(const textureReference* tex); +#endif +#if !__HIP_VDI__ extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject); +#endif +#if !__HIP_VDI__ template hipError_t hipUnbindTexture(struct texture& tex) { return ihipUnbindTextureImpl(tex.textureObject); } +#endif +#if !__HIP_VDI__ hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array); hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref); hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol); @@ -3499,11 +3879,110 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex); hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, hipDeviceptr_t devPtr, size_t pitch); +#endif hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc); hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject); +#if __HIP_VDI__ +template +static inline hipError_t hipBindTexture( + size_t *offset, + const struct texture &tex, + const void *devPtr, + size_t size = UINT_MAX) +{ + return hipBindTexture(offset, tex, devPtr, tex.channelDesc, size); +} + +template +static inline hipError_t hipBindTexture( + size_t *offset, + const struct texture &tex, + const void *devPtr, + const struct hipChannelFormatDesc &desc, + size_t size = UINT_MAX) +{ + return hipBindTexture(offset, &tex, devPtr, &desc, size); +} + +template +static inline hipError_t hipBindTexture2D( + size_t *offset, + const struct texture &tex, + const void *devPtr, + size_t width, + size_t height, + size_t pitch) +{ + return hipBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, height, pitch); +} + +template +static inline hipError_t hipBindTexture2D( + size_t *offset, + const struct texture &tex, + const void *devPtr, + const struct hipChannelFormatDesc &desc, + size_t width, + size_t height, + size_t pitch) +{ + return hipBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitch); +} + +template +static inline hipError_t hipBindTextureToArray( + const struct texture &tex, + hipArray_const_t array) +{ + struct cudaChannelFormatDesc desc; + hipError_t err = hipGetChannelDesc(&desc, array); + return (err == hipSuccess) ? hipBindTextureToArray(tex, array, desc) : err; +} + +template +static inline hipError_t hipBindTextureToArray( + const struct texture &tex, + hipArray_const_t array, + const struct hipChannelFormatDesc &desc) +{ + return hipBindTextureToArray(&tex, array, &desc); +} + +template +static inline hipError_t hipBindTextureToMipmappedArray( + const struct texture &tex, + hipMipmappedArray_const_t mipmappedArray) +{ + struct hipChannelFormatDesc desc; + hipArray_t levelArray; + hipError_t err = hipGetMipmappedArrayLevel(&levelArray, mipmappedArray, 0); + if (err != hipSuccess) { + return err; + } + err = hipGetChannelDesc(&desc, levelArray); + return (err == hipSuccess) ? hipBindTextureToMipmappedArray(tex, mipmappedArray, desc) : err; +} + +template +static inline hipError_t hipBindTextureToMipmappedArray( + const struct texture &tex, + hipMipmappedArray_const_t mipmappedArray, + const struct cudaChannelFormatDesc &desc) +{ + return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc); +} + +template +static inline hipError_t hipUnbindTexture( + const struct texture &tex) +{ + return hipUnbindTexture(&tex); +} +#endif + // doxygen end Texture /** * @} diff --git a/projects/hip/include/hip/hcc_detail/hip_texture_types.h b/projects/hip/include/hip/hcc_detail/hip_texture_types.h index fcd6d69dbe..e92babfd5a 100644 --- a/projects/hip/include/hip/hcc_detail/hip_texture_types.h +++ b/projects/hip/include/hip/hcc_detail/hip_texture_types.h @@ -57,25 +57,27 @@ struct __HIP_TEXTURE_ATTRIB texture : public textureReference { texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint, enum hipTextureAddressMode aMode = hipAddressModeClamp) { normalized = norm; - readMode = hipReadModeNormalizedFloat; + readMode = mode; filterMode = fMode; addressMode[0] = aMode; addressMode[1] = aMode; addressMode[2] = aMode; channelDesc = hipCreateChannelDesc(); sRGB = 0; + textureObject = nullptr; } texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode, struct hipChannelFormatDesc desc) { normalized = norm; - readMode = hipReadModeNormalizedFloat; + readMode = mode; filterMode = fMode; addressMode[0] = aMode; addressMode[1] = aMode; addressMode[2] = aMode; channelDesc = desc; sRGB = 0; + textureObject = nullptr; } }; diff --git a/projects/hip/include/hip/hcc_detail/hiprtc.h b/projects/hip/include/hip/hcc_detail/hiprtc.h index ec9c85716a..fecea75340 100644 --- a/projects/hip/include/hip/hcc_detail/hiprtc.h +++ b/projects/hip/include/hip/hcc_detail/hiprtc.h @@ -28,7 +28,9 @@ extern "C" { #include +#if !defined(_WIN32) #pragma GCC visibility push (default) +#endif enum hiprtcResult { HIPRTC_SUCCESS = 0, @@ -81,7 +83,9 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code); hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet); +#if !defined(_WIN32) #pragma GCC visibility pop +#endif #ifdef __cplusplus } diff --git a/projects/hip/include/hip/hcc_detail/ockl_image.h b/projects/hip/include/hip/hcc_detail/ockl_image.h new file mode 100644 index 0000000000..b32b23fda0 --- /dev/null +++ b/projects/hip/include/hip/hcc_detail/ockl_image.h @@ -0,0 +1,135 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +extern "C" { + +#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4))) + +__device__ float4::Native_vec_ __ockl_image_load_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c); + +__device__ float4::Native_vec_ __ockl_image_load_1Db(unsigned int ADDRESS_SPACE_CONSTANT*i, int c); + +__device__ float4::Native_vec_ __ockl_image_load_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f); + +__device__ float4::Native_vec_ __ockl_image_load_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f); + +__device__ float4::Native_vec_ __ockl_image_load_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l); + +__device__ void __ockl_image_store_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ float4::Native_vec_ __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c); + +__device__ float4::Native_vec_ __ockl_image_sample_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float dx, float dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float dx, float dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float4::Native_vec_ dx, float4::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_gather4r_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4g_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4b_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4a_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +}; \ No newline at end of file diff --git a/projects/hip/include/hip/hcc_detail/texture_fetch_functions.h b/projects/hip/include/hip/hcc_detail/texture_fetch_functions.h new file mode 100644 index 0000000000..03c1780030 --- /dev/null +++ b/projects/hip/include/hip/hcc_detail/texture_fetch_functions.h @@ -0,0 +1,386 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + +#include +#include +#include + +#include + +#define TEXTURE_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; + +template +struct __hip_is_tex_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_tex_channel_type> +{ + static constexpr bool value = + __hip_is_tex_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 4)); +}; + +template +struct __hip_is_tex_normalized_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_tex_normalized_channel_type> +{ + static constexpr bool value = + __hip_is_tex_normalized_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 4)); +}; + +template < + typename T, + hipTextureReadMode readMode, + typename Enable = void> +struct __hip_tex_ret +{ + static_assert(std::is_same::value, "Invalid channel type!"); +}; + +template < + typename T, + hipTextureReadMode readMode> +using __hip_tex_ret_t = typename __hip_tex_ret::type; + +template +struct __hip_tex_ret< + T, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type::value, bool>::type> +{ + using type = T; +}; + +template< + typename T, + unsigned int rank> +struct __hip_tex_ret< + HIP_vector_type, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type>::value, bool>::type> +{ + using type = HIP_vector_type<__hip_tex_ret_t, rank>; +}; + +template +struct __hip_tex_ret< + T, + hipReadModeNormalizedFloat, + typename std::enable_if<__hip_is_tex_normalized_channel_type::value, bool>::type> +{ + using type = float; +}; + +template< + typename T, + unsigned int rank> +struct __hip_tex_ret< + HIP_vector_type, + hipReadModeNormalizedFloat, + typename std::enable_if<__hip_is_tex_normalized_channel_type>::value, bool>::type> +{ + using type = HIP_vector_type<__hip_tex_ret_t, rank>; +}; + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1Dfetch(texture t, int x) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_load_1Db(i, x); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1D(texture t, float x) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_1D(i, s, x); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2D(texture t, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLayered(texture t, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLayered(texture t, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex3D(texture t, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemap(texture t, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLod(texture t, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_1D(i, s, x, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLod(texture t, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLayeredLod(texture t, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLayeredLod(texture t, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex3DLod(texture t, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLod(texture t, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLayered(texture t, float x, float y, float z, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLayeredLod(texture t, float x, float y, float z, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapGrad(texture t, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); + return {}; +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLayeredGrad(texture t, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); + return {}; +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DGrad(texture t, float x, float dPdx, float dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DGrad(texture t, float x, float y, float2 dPdx, float2 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLayeredGrad(texture t, float x, int layer, float dPdx, float dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLayeredGrad(texture t, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex3DGrad(texture t, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template < + typename T, + hipTextureReadMode readMode, + typename Enable = void> +struct __hip_tex2dgather_ret +{ + static_assert(std::is_same::value, "Invalid channel type!"); +}; + +template < + typename T, + hipTextureReadMode readMode> +using __hip_tex2dgather_ret_t = typename __hip_tex2dgather_ret::type; + +template +struct __hip_tex2dgather_ret< + T, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type::value, bool>::type> +{ + using type = HIP_vector_type; +}; + +template< + typename T, + unsigned int rank> +struct __hip_tex2dgather_ret< + HIP_vector_type, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type>::value, bool>::type> +{ + using type = HIP_vector_type; +}; + +template +struct __hip_tex2dgather_ret< + T, + hipReadModeNormalizedFloat, + typename std::enable_if<__hip_is_tex_normalized_channel_type::value, bool>::type> +{ + using type = float4; +}; + +template +static __forceinline__ __device__ __hip_tex2dgather_ret_t tex2Dgather(texture t, float x, float y, int comp=0) +{ + TEXTURE_PARAMETERS_INIT; + switch (comp) { + case 1: { + auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + case 2: { + auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + case 3: { + auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + default: { + auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + } + return {}; +} + +#endif diff --git a/projects/hip/include/hip/hcc_detail/texture_indirect_functions.h b/projects/hip/include/hip/hcc_detail/texture_indirect_functions.h new file mode 100644 index 0000000000..2fe33f3ede --- /dev/null +++ b/projects/hip/include/hip/hcc_detail/texture_indirect_functions.h @@ -0,0 +1,501 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + +#include +#include +#include + +#include + +#define TEXTURE_OBJECT_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; + +template +struct __hip_is_itex_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_itex_channel_type> +{ + static constexpr bool value = + __hip_is_itex_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 4)); +}; + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1Dfetch(hipTextureObject_t textureObject, int x) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_load_1Db(i, x); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1Dfetch(T *ptr, hipTextureObject_t textureObject, int x) +{ + *ptr = tex1Dfetch(textureObject, x); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1D(hipTextureObject_t textureObject, float x) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1D(i, s, x); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1D(T *ptr, hipTextureObject_t textureObject, float x) +{ + *ptr = tex1D(textureObject, x); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2D(hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2D(T *ptr, hipTextureObject_t textureObject, float x, float y) +{ + *ptr = tex2D(textureObject, x, y); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex3D(T *ptr, hipTextureObject_t textureObject, float x, float y, float z) +{ + *ptr = tex3D(textureObject, x, y, z); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1DLayered(T *ptr, hipTextureObject_t textureObject, float x, int layer) +{ + *ptr = tex1DLayered(textureObject, x, layer); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2DLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer) +{ + *ptr = tex1DLayered(textureObject, x, y, layer); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemap(hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemap(T *ptr, hipTextureObject_t textureObject, float x, float y, float z) +{ + *ptr = texCubemap(textureObject, x, y, z); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemapLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer) +{ + *ptr = texCubemapLayered(textureObject, x, y, z, layer); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + switch (comp) { + case 1: { + auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + case 2: { + auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + case 3: { + auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + default: { + auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + }; + return {}; +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2Dgather(T *ptr, hipTextureObject_t textureObject, float x, float y, int comp = 0) +{ + *ptr = texCubemapLayered(textureObject, x, y, comp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1DLod(hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_1D(i, s, x, level); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1DLod(T *ptr, hipTextureObject_t textureObject, float x, float level) +{ + *ptr = tex1DLod(textureObject, x, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float level) +{ + *ptr = tex2DLod(textureObject, x, y, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex3DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + *ptr = tex3DLod(textureObject, x, y, z, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, int layer, float level) +{ + *ptr = tex1DLayeredLod(textureObject, x, layer, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + *ptr = tex2DLayeredLod(textureObject, x, y, layer, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemapLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + *ptr = texCubemapLod(textureObject, x, y, z, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemapGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast(&tmp); + return {}; +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemapGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + *ptr = texCubemapGrad(textureObject, x, y, z, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemapLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float level) +{ + *ptr = texCubemapLayeredLod(textureObject, x, y, z, layer, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dPdx, float dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1DGrad(T *ptr, hipTextureObject_t textureObject, float x, float dPdx, float dPdy) +{ + *ptr = tex1DGrad(textureObject, x, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy) +{ + *ptr = tex2DGrad(textureObject, x, y, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex3DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + *ptr = tex3DGrad(textureObject, x, y, z, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy) +{ + *ptr = tex1DLayeredGrad(textureObject, x, layer, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + *ptr = tex2DLayeredGrad(textureObject, x, y, layer, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemapLayeredGrad(hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast(&tmp); + return {}; +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemapLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + *ptr = texCubemapLayeredGrad(textureObject, x, y, z, layer, dPdx, dPdy); +} + +#endif diff --git a/projects/hip/include/hip/hip_ext.h b/projects/hip/include/hip/hip_ext.h index a618462d6e..9b54f7fa57 100644 --- a/projects/hip/include/hip/hip_ext.h +++ b/projects/hip/include/hip/hip_ext.h @@ -109,7 +109,9 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipEvent_t stopEvent = nullptr) __attribute__((deprecated("use hipExtModuleLaunchKernel instead"))); -#if !__HIP_VDI__ && defined(__cplusplus) +//#if !__HIP_VDI__ && defined(__cplusplus) +#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) +//kernel_descriptor and hip_impl::make_kernarg are in "grid_launch_GGL.hpp" namespace hip_impl { inline diff --git a/projects/hip/include/hip/hip_runtime_api.h b/projects/hip/include/hip/hip_runtime_api.h index 3d76e92632..b0974aeef6 100644 --- a/projects/hip/include/hip/hip_runtime_api.h +++ b/projects/hip/include/hip/hip_runtime_api.h @@ -336,7 +336,7 @@ typedef enum hipDeviceAttribute_t { hipDeviceAttributeTexturePitchAlignment, /// /dev/null +} + +popd () { + command popd "$@" > /dev/null +} + + +ROCMDIR=@ROCM_PATH@ +ROCMLIBDIR=$ROCMDIR/lib +HIPDIR=$ROCMDIR/hip +HIPLIBDIR=$ROCMDIR/hip/lib + +# Soft-link to library files +HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$]) +mkdir -p $ROCMLIBDIR +mkdir -p $ROCMLIBDIR/cmake +pushd $ROCMLIBDIR +for f in $HIPLIBFILES +do + ln -s -r -f $HIPLIBDIR/$f $(basename $f) +done +# Make the hip cmake directory link. +pushd cmake +ln -s -r -f $HIPLIBDIR/cmake/hip hip +popd +popd diff --git a/projects/hip/packaging/hip-vdi.prerm b/projects/hip/packaging/hip-vdi.prerm new file mode 100755 index 0000000000..bf2be36523 --- /dev/null +++ b/projects/hip/packaging/hip-vdi.prerm @@ -0,0 +1,31 @@ +#!/bin/bash + +pushd () { + command pushd "$@" > /dev/null +} + +popd () { + command popd "$@" > /dev/null +} + +ROCMDIR=@ROCM_PATH@ +ROCMLIBDIR=$ROCMDIR/lib +HIPDIR=$ROCMDIR/hip +HIPLIBDIR=$ROCMDIR/hip/lib +([ ! -d $ROCMDIR ] || [ ! -d $HIPDIR ]) && exit 0 +([ ! -d $ROCMLIBDIR ] || [ ! -d $HIPLIBDIR ]) && exit 0 + +# Remove soft-links to libraries +HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$]) +pushd $ROCMLIBDIR +for f in $HIPLIBFILES; do + [ -e $f ] || continue + rm $(basename $f) +done +pushd cmake +unlink hip +popd +rmdir --ignore-fail-on-non-empty cmake +popd +rmdir --ignore-fail-on-non-empty $ROCMLIBDIR + diff --git a/projects/hip/packaging/hip-vdi.txt b/projects/hip/packaging/hip-vdi.txt new file mode 100644 index 0000000000..2bbe4331d0 --- /dev/null +++ b/projects/hip/packaging/hip-vdi.txt @@ -0,0 +1,57 @@ +cmake_minimum_required(VERSION 2.8.3) +project(hip_vdi) + +install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64_static.a DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libhip_hcc.so DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libhiprtc.so DESTINATION lib) + +install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) + +############################# +# Packaging steps +############################# +set(CPACK_SET_DESTDIR TRUE) +set(CPACK_INSTALL_PREFIX "/opt/rocm/hip") + +## cmake generated target files contains IMPORTED_LOCATION_RELEASE etc. which +## is installation path when building the project, which may be different from +## the intallation path for packaging. These paths have to be replaced by +## the package installation path, otherwise apps using pkg-config will fail. +file(GLOB _target_files @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets*.cmake) +foreach(_target_file ${_target_files}) + execute_process(COMMAND sed -i s:@CMAKE_INSTALL_PREFIX@:${CPACK_INSTALL_PREFIX}:g ${_target_file}) +endforeach() +install(FILES ${_target_files} DESTINATION lib/cmake/hip) + +set(CPACK_PACKAGE_NAME "hip-vdi") +set(HCC_PACKAGE_NAME "vdi") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [VDI]") +set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") +set(CPACK_PACKAGE_CONTACT "Maneesh Gupta ") +set(CPACK_PACKAGE_VERSION @HIP_VERSION_MAJOR@.@HIP_VERSION_MINOR@.@HIP_VERSION_PATCH@) +set(CPACK_PACKAGE_VERSION_MAJOR @HIP_VERSION_MAJOR@) +set(CPACK_PACKAGE_VERSION_MINOR @HIP_VERSION_MINOR@) +set(CPACK_PACKAGE_VERSION_PATCH @HIP_VERSION_PATCH@) +set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}) +set(CPACK_GENERATOR "TGZ;DEB;RPM") +set(CPACK_BINARY_DEB "ON") +set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base (= ${CPACK_PACKAGE_VERSION}), comgr (>= 1.1), llvm-amdgpu") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_vdi, hip-hcc (= ${CPACK_PACKAGE_VERSION})") +set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_vdi") +set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_vdi") +set(CPACK_BINARY_RPM "ON") +set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") +set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") +set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") +set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") +string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION}) +set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base = ${HIP_BASE_VERSION}, comgr >= 1.1, llvm-amdgpu") +set(CPACK_RPM_PACKAGE_PROVIDES "hip_vdi, hip-hcc = ${HIP_BASE_VERSION}") +set(CPACK_RPM_PACKAGE_OBSOLETES "hip_vdi") +set(CPACK_RPM_PACKAGE_CONFLICTS "hip_vdi") +set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") +set(CPACK_SOURCE_GENERATOR "TGZ") +include(CPack) diff --git a/projects/hip/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp b/projects/hip/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp index 625d8cd742..b528b0c75d 100644 --- a/projects/hip/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp +++ b/projects/hip/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp @@ -109,7 +109,7 @@ int main() { /***********************************************************************************/ //Timing directly the dispatch -#ifdef __HIP_PLATFORM_HCC__ +#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { hipExtLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, start, stop, 0); hipEventSynchronize(stop); diff --git a/projects/hip/samples/2_Cookbook/13_occupancy/occupancy.cpp b/projects/hip/samples/2_Cookbook/13_occupancy/occupancy.cpp index 01fa7aafed..e772e82b1d 100644 --- a/projects/hip/samples/2_Cookbook/13_occupancy/occupancy.cpp +++ b/projects/hip/samples/2_Cookbook/13_occupancy/occupancy.cpp @@ -44,6 +44,8 @@ void multiplyCPU(float* C, float* A, float* B, int N){ } } +#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) + void launchKernel(float* C, float* A, float* B, bool manual){ hipDeviceProp_t devProp; @@ -93,8 +95,10 @@ void launchKernel(float* C, float* A, float* B, bool manual){ std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl; } } +#endif int main() { +#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) float *A, *B, *C0, *C1, *cpuC; float *Ad, *Bd, *C0d, *C1d; int errors=0; @@ -173,4 +177,8 @@ int main() { free(C0); free(C1); free(cpuC); +#else + std::cout <<"hipOccupancyMaxPotentialBlockSize template not support for Clang compiler"< + +// hip header file +#include "hip/hip_runtime.h" +#include "hip/hip_profile.h" + +#define WIDTH 1024 + +#define NUM (WIDTH * WIDTH) + +#define THREADS_PER_BLOCK_X 4 +#define THREADS_PER_BLOCK_Y 4 +#define THREADS_PER_BLOCK_Z 1 + +#define ITERATIONS 10 + +// Cmdline parms to control start and stop triggers +int startTriggerIteration = -1; +int stopTriggerIteration = -1; + +// Device (Kernel) function, it must be void +__global__ void matrixTranspose(float* out, float* in, const int width) { + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + out[y * width + x] = in[x * width + y]; +} + +// CPU implementation of matrix transpose +void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { + for (unsigned int j = 0; j < width; j++) { + for (unsigned int i = 0; i < width; i++) { + output[i * width + j] = input[j * width + i]; + } + } +} + + +// Use a separate function to demonstrate how to use function name as part of scoped marker: +void runGPU(float* Matrix, float* TransposeMatrix, float* gpuMatrix, float* gpuTransposeMatrix) { + // __func__ is a standard C++ macro which expands to the name of the function, in this case + // "runGPU" + HIP_SCOPED_MARKER(__func__, "MyGroup"); + + for (int i = 0; i < ITERATIONS; i++) { + if (i == startTriggerIteration) { + hipProfilerStart(); + } + if (i == stopTriggerIteration) { + hipProfilerStop(); + } + + float eventMs = 0.0f; + + hipEvent_t start, stop; + hipEventCreate(&start); + hipEventCreate(&stop); + + + // Record the start event + hipEventRecord(start, NULL); + + // Memory transfer from host to device + hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + + // Record the stop event + hipEventRecord(stop, NULL); + hipEventSynchronize(stop); + + hipEventElapsedTime(&eventMs, start, stop); + + printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs); + + // Record the start event + hipEventRecord(start, NULL); + + // Lauching kernel from host + hipLaunchKernelGGL(matrixTranspose, + dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, + gpuMatrix, WIDTH); + + // Record the stop event + hipEventRecord(stop, NULL); + hipEventSynchronize(stop); + hipEventElapsedTime(&eventMs, start, stop); + + printf("kernel Execution time = %6.3fms\n", eventMs); + + // Record the start event + hipEventRecord(start, NULL); + + // Memory transfer from device to host + hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + + // Record the stop event + hipEventRecord(stop, NULL); + hipEventSynchronize(stop); + + hipEventElapsedTime(&eventMs, start, stop); + + printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs); + } +}; + + +int main(int argc, char* argv[]) { + if (argc >= 2) { + startTriggerIteration = atoi(argv[1]); + printf("info : will start tracing at iteration:%d\n", startTriggerIteration); + } + if (argc >= 3) { + stopTriggerIteration = atoi(argv[2]); + printf("info : will stop tracing at iteration:%d\n", stopTriggerIteration); + } + + float* Matrix; + float* TransposeMatrix; + float* cpuTransposeMatrix; + + float* gpuMatrix; + float* gpuTransposeMatrix; + + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); + + std::cout << "Device name " << devProp.name << std::endl; + + { + // Show example of how to create a "scoped marker". + // The scoped marker records the time spent inside the { scope } of the marker - the begin + // timestamp is at the beginning of the code scope, and the end is recorded when the SCOPE + // exits. This can be viewed in CodeXL timeline relative to other GPU and CPU events. This + // marker captures the time spent in setup including host allocation, initialization, and + // device memory allocation. + HIP_SCOPED_MARKER("Setup", "MyGroup"); + + + Matrix = (float*)malloc(NUM * sizeof(float)); + TransposeMatrix = (float*)malloc(NUM * sizeof(float)); + cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); + + // initialize the input data + for (int i = 0; i < NUM; i++) { + Matrix[i] = (float)i * 10.0f; + } + + + // allocate the memory on the device side + hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); + hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + + // FYI, the scoped-marker will be destroyed here when the scope exits, and will record its + // "end" timestamp. + } + + runGPU(Matrix, TransposeMatrix, gpuMatrix, gpuTransposeMatrix); + + + // show how to use explicit begin/end markers: + // We begin the timed region with HIP_BEGIN_MARKER, passing in the markerName and group: + // The region will stop when HIP_END_MARKER is called + // This is another way to mark begin/end - as an alternative to scoped markers. + HIP_BEGIN_MARKER("Check&TearDown", "MyGroup"); + + int errors = 0; + + // CPU MatrixTranspose computation + matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); + + // verify the results + double eps = 1.0E-6; + for (int i = 0; i < NUM; i++) { + if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { + errors++; + } + } + if (errors != 0) { + printf("FAILED: %d errors\n", errors); + } else { + printf("PASSED!\n"); + } + + // free the resources on device side + hipFree(gpuMatrix); + hipFree(gpuTransposeMatrix); + + // free the resources on host side + free(Matrix); + free(TransposeMatrix); + free(cpuTransposeMatrix); + + // This ends the last marker started in this thread, in this case "Check&TearDown" + HIP_END_MARKER(); + + return errors; +} diff --git a/projects/hip/samples/2_Cookbook/2_Profiler/Readme.md b/projects/hip/samples/2_Cookbook/2_Profiler/Readme.md new file mode 100644 index 0000000000..8b32beb348 --- /dev/null +++ b/projects/hip/samples/2_Cookbook/2_Profiler/Readme.md @@ -0,0 +1,47 @@ +## Using hipEvents to measure performance ### + +This tutorial is follow-up of the previous two tutorial where we learn how to write our first hip program, in which we compute Matrix Transpose and in second one, we added feature to measure time taken for memory transfer and kernel execution. In this tutorial, we'll explain how to use the codexl/rocm-profiler for hip timeline tracing. Also, we will augment the source code with additional markers so we can see the high-level application flow alongside the information that CodeXL automatically collects. + + +## Introduction: + +CodeXL and rocm-profiler are the tool used for profiling the application, which is of prominent use in optimizing the application by means of finding the memory bottlenecks and etc. + +## Requirement: +[CodeXL Installation](http://gpuopen.com/compute-product/codexl/) + +## prerequiste knowledge: + +Programmers familiar with CUDA, OpenCL will be able to quickly learn and start coding with the HIP API. In case you are not, don't worry. You choose to start with the best one. We'll be explaining everything assuming you are completely new to gpgpu programming. + +## Simple Matrix Transpose + +We will be using the Simple Matrix Transpose source code from the previous tutorial as it is. + +## Using CodeXL markers for HIP Functions + +HIP can generate markers at function being/end which are displayed on the CodeXL timeline view. To do this, you need to install ROCm-Profiler and enable HIP to generate the markers: + +1. Install ROCm-Profiler Installing HIP from the rocm pre-built packages, installs the ROCm-Profiler as well. Alternatively, you can build ROCm-Profiler using the instructions given below. + + +2. Run with profiler enabled to generate ATP file. +(These steps are also captured in the Makefile) +The HIP_PROFILE_API enables display of the HIP APIs on the CodeXL trimeline view. +`/opt/rocm/bin/rocm-profiler -o -A -e HIP_PROFILE_API=1 ` + +##Using HIP_TRACE_API + +You can also print the HIP function strings to stderr using HIP_TRACE_API environment variable. This can also be combined with the more detailed debug information provided by the HIP_DB switch. For example: +`HIP_TRACE_API=1 HIP_DB=0x2 ./myHipApp` +Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors. + +## More Info: +- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_faq.md) +- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md) +- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) +- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) +- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) +- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/projects/hip/src/h2f.cpp b/projects/hip/src/h2f.cpp index a8c60e7c48..84d067166f 100644 --- a/projects/hip/src/h2f.cpp +++ b/projects/hip/src/h2f.cpp @@ -57,12 +57,14 @@ static inline std::uint32_t __convert_float_to_half(float a) noexcept { // On machines without fp16 instructions, clang lowers llvm.convert.from.fp16 // to call of this function. -extern "C" float __gnu_h2f_ieee(unsigned short h){ +extern "C" __attribute__((visibility("default"))) +float __gnu_h2f_ieee(unsigned short h){ return __convert_half_to_float((std::uint32_t) h); } // On machines without fp16 instructions, clang lowers llvm.convert.to.fp16 // to call of this function. -extern "C" unsigned short __gnu_f2h_ieee(float f){ +extern "C" __attribute__((visibility("default"))) +unsigned short __gnu_f2h_ieee(float f){ return (unsigned short)__convert_float_to_half(f); } diff --git a/projects/hip/src/hip_clang.cpp b/projects/hip/src/hip_clang.cpp index 93f5a82a2f..85aa0ad810 100644 --- a/projects/hip/src/hip_clang.cpp +++ b/projects/hip/src/hip_clang.cpp @@ -28,6 +28,7 @@ THE SOFTWARE. #include "hip_hcc_internal.h" #include "hip_fatbin.h" #include "trace_helper.h" +#include "program_state.inl" #ifdef __GNUC__ #pragma GCC visibility push (default) @@ -94,8 +95,10 @@ __hipRegisterFatBinary(const void* data) module->executable, agent); if (module->executable.handle) { - modules->at(deviceId) = module; - tprintf(DB_FB, "Loaded code object for %s\n", name); + hip_impl::program_state_impl::read_kernarg_metadata(image, module->kernargs); + modules->at(deviceId) = module; + + tprintf(DB_FB, "Loaded code object for %s, args size=%ld\n", name, module->kernargs.size()); } else { fprintf(stderr, "Failed to load code object for %s\n", name); abort(); @@ -157,16 +160,215 @@ extern "C" void __hipRegisterFunction( g_functions.insert(std::make_pair(hostFunction, std::move(functions))); } +static inline const char* hsa_strerror(hsa_status_t status) { + const char* str = nullptr; + if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) { + return str; + } + return "Unknown error"; +} + +struct RegisteredVar { +public: + RegisteredVar(): size_(0), devicePtr_(nullptr) {} + ~RegisteredVar() {} + + static inline const char* hsa_strerror(hsa_status_t status) { + const char* str = nullptr; + if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) { + return str; + } + return "Unknown error"; +} + +hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; + size_t getvarsize() const { return size_; }; + + size_t size_; // Size of the variable + hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. +}; + +struct DeviceVar { + void* shadowVptr; + std::string hostVar; + size_t size; + std::vector* modules; + std::vector rvars; + bool dyn_undef; +}; + +std::unordered_multimap g_vars; + +//The logic follows PlatformState::getGlobalVar in VDI RT +static DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod) { + DeviceVar* dvar = nullptr; + if (hmod != nullptr) { + // If module is provided, then get the var only from that module + auto var_range = g_vars.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + if ((*it->second.modules)[deviceId] == hmod) { + dvar = &(it->second); + break; + } + } + } else { + // If var count is < 2, return the var + if (g_vars.count(hostVar) < 2) { + auto it = g_vars.find(hostVar); + dvar = ((it == g_vars.end()) ? nullptr : &(it->second)); + } else { + // If var count is > 2, return the original var, + // if original var count != 1, return g_vars.end()/Invalid + size_t orig_global_count = 0; + auto var_range = g_vars.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + // when dyn_undef is set, it is a shadow var + if (it->second.dyn_undef == false) { + ++orig_global_count; + dvar = &(it->second); + } + } + dvar = ((orig_global_count == 1) ? dvar : nullptr); + } + } + return dvar; +} + +hipError_t ihipGetGlobalVar(hipDeviceptr_t* dev_ptr, size_t* size_ptr, + const char* hostVar, hipModule_t hmod) { + GET_TLS(); + auto ctx = ihipGetTlsDefaultCtx(); + + if (!ctx) return hipErrorInvalidValue; + + auto device = ctx->getDevice(); + + if (!device) return hipErrorInvalidValue; + + ihipDevice_t* currentDevice = ihipGetDevice(device->_deviceId); + + if (!currentDevice) return hipErrorInvalidValue; + + int deviceId = device->_deviceId; + + DeviceVar* dvar = findVar(std::string(hostVar), deviceId, hmod); + if (dvar == nullptr) return hipErrorInvalidValue; + + if (dvar->rvars[deviceId].getdeviceptr() == nullptr) return hipErrorInvalidValue; + + *size_ptr = dvar->rvars[deviceId].getvarsize(); + *dev_ptr = dvar->rvars[deviceId].getdeviceptr(); + return hipSuccess; +} + +static bool createGlobalVarObj(const hsa_executable_t& hsaExecutable, const hsa_agent_t& hasAgent, + const char* global_name, void** device_pptr, size_t* bytes) { + hsa_status_t status = HSA_STATUS_SUCCESS; + hsa_symbol_kind_t sym_type; + hsa_executable_symbol_t global_symbol; + std::string buildLog; + + /* Find HSA Symbol by name */ + status = hsa_executable_get_symbol_by_name(hsaExecutable, global_name, &hasAgent, + &global_symbol); + if (status != HSA_STATUS_SUCCESS) { + buildLog += "Error: Failed to find the Symbol by Name: "; + buildLog += hsa_strerror(status); + tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str()); + return false; + } + + /* Find HSA Symbol Type */ + status = hsa_executable_symbol_get_info(global_symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, + &sym_type); + if (status != HSA_STATUS_SUCCESS) { + buildLog += "Error: Failed to find the Symbol Type : "; + buildLog += hsa_strerror(status); + tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str()); + return false; + } + + /* Make sure symbol type is VARIABLE */ + if (sym_type != HSA_SYMBOL_KIND_VARIABLE) { + buildLog += "Error: Symbol is not of type VARIABLE : "; + buildLog += hsa_strerror(status); + tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str()); + return false; + } + + /* Retrieve the size of the variable */ + status = hsa_executable_symbol_get_info(global_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, bytes); + + if (status != HSA_STATUS_SUCCESS) { + buildLog += "Error: Failed to retrieve the Symbol Size : "; + buildLog += hsa_strerror(status); + tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str()); + return false; + } + + /* Find HSA Symbol Address */ + status = hsa_executable_symbol_get_info(global_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, device_pptr); + if (status != HSA_STATUS_SUCCESS) { + buildLog += "Error: Failed to find the Symbol Address : "; + buildLog += hsa_strerror(status); + tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str()); + return false; + } else { + tprintf(DB_FB, "createGlobalVarObj: var %s : device=%p, size=%zu\n", global_name, *device_pptr, *bytes); + } + + return true; +} + +// Registers a device-side global variable. +// For each global variable in device code, there is a corresponding shadow +// global variable in host code. The shadow host variable is used to keep +// track of the value of the device side global variable between kernel +// executions. +// The basic logic is taken from VDI RT, but there is much difference. extern "C" void __hipRegisterVar( - std::vector* modules, - char* hostVar, - char* deviceVar, - const char* deviceName, - int ext, - int size, - int constant, - int global) + std::vector* modules, // The device modules containing code object + char* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + const char* deviceVar, // Variable name in device code + int ext, // Whether this variable is external + int size, // Size of the variable + int constant, // Whether this variable is constant + int global) // Unknown, always 0 { + HIP_INIT_API(__hipRegisterVar, modules, var, hostVar, deviceVar, ext, size, constant, global); + + DeviceVar dvar{var, std::string{ hostVar }, static_cast(size), modules, + std::vector{ g_deviceCnt }, false }; + + for (int deviceId = 0; deviceId < g_deviceCnt; deviceId++) { + auto device = ihipGetDevice(deviceId); + if(!device) { + continue; + } + hsa_executable_t& executable = (*modules)[deviceId]->executable; + hsa_agent_t& agent = g_allAgents[deviceId + 1]; + size_t bytes = 0; + hipDeviceptr_t devicePtr = nullptr; + + bool success = createGlobalVarObj(executable, agent, hostVar, &devicePtr, &bytes); + if(!success) { + return; + } + dvar.rvars[deviceId].devicePtr_ = devicePtr; + dvar.rvars[deviceId].size_ = bytes; + + hc::AmPointerInfo ptrInfo(nullptr, devicePtr, devicePtr, bytes, device->_acc, true, false); + hc::am_memtracker_add(devicePtr, ptrInfo); + + #if USE_APP_PTR_FOR_CTX + hc::am_memtracker_update(devicePtr, device->_deviceId, 0u, ihipGetTlsDefaultCtx()); + #else + hc::am_memtracker_update(devicePtr, device->_deviceId, 0u); + #endif + } + g_vars.insert(std::make_pair(std::string(hostVar), dvar)); } extern "C" void __hipUnregisterFatBinary(std::vector* modules) @@ -226,6 +428,41 @@ extern "C" hipError_t __hipPopCallConfiguration( return hipSuccess; } +int getCurrentDeviceId() +{ + GET_TLS(); + + int deviceId = 0; + auto ctx = ihipGetTlsDefaultCtx(); + + if(!ctx) return deviceId; + + LockedAccessor_CtxCrit_t crit(ctx->criticalData()); + + if(crit->_execStack.size() != 0) + { + auto &exec = crit->_execStack.top(); + + if (exec._hStream) { + deviceId = exec._hStream->getDevice()->_deviceId; + } else if (ctx->getDevice()) { + deviceId = ctx->getDevice()->_deviceId; + } + } else if (ctx->getDevice()) { + deviceId = ctx->getDevice()->_deviceId; + } + return deviceId; +} + +hipFunction_t ihipGetDeviceFunction(const void *hostFunction) +{ + int deviceId = getCurrentDeviceId(); + auto it = g_functions.find(hostFunction); + if (it == g_functions.end() || !it->second[deviceId]) { + return nullptr; + } + return it->second[deviceId]; +} hipError_t hipSetupArgument( const void *arg, diff --git a/projects/hip/src/hip_hcc_internal.h b/projects/hip/src/hip_hcc_internal.h index b1777955aa..803abe28e2 100644 --- a/projects/hip/src/hip_hcc_internal.h +++ b/projects/hip/src/hip_hcc_internal.h @@ -33,7 +33,7 @@ THE SOFTWARE. #include "hip_prof_api.h" #include "hip_util.h" #include "env.h" - +#include #if (__hcc_workweek__ < 16354) #error("This version of HIP requires a newer version of HCC."); @@ -1009,6 +1009,18 @@ hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod, hipStream_t ihipSyncAndResolveStream(hipStream_t, bool lockAcquired = 0); hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream); +/** + * @brief Copies the memory address and size of symbol @p symbolName + * + * @param[in] symbolName - Symbol on device + * @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol + * @param[out] size - Pointer to the size of the symbol + * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound, #hipErrorInvalidValue + * + */ +hipError_t ihipGetGlobalVar(hipDeviceptr_t* dev_ptr, size_t* size_ptr, const char* hostVar, + hipModule_t hmod = nullptr); + // Stream printf functions: inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s) { os << "stream:"; @@ -1080,4 +1092,14 @@ static inline ihipCtx_t* iihipGetTlsDefaultCtx(TlsData* tls) { return tls->defaultCtx; } +/** + * @brief Get device function from host kernel function pointer + * Needed only for clang + HIP-HCC RT + * + * @param [in] hostFunction host kernel function pointer + * + * @returns hipFuntion_t, nullptr + */ +hipFunction_t ihipGetDeviceFunction(const void *hostFunction); + #endif diff --git a/projects/hip/src/hip_module.cpp b/projects/hip/src/hip_module.cpp index e98afa3294..0f608d9843 100644 --- a/projects/hip/src/hip_module.cpp +++ b/projects/hip/src/hip_module.cpp @@ -344,6 +344,8 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList if (kds[i] == nullptr) { return hipErrorInvalidValue; } + if (!kds[i]->_kernarg_layout.empty()) continue; + hip_impl::kernargs_size_align kargs = ps.get_kernargs_size_align( reinterpret_cast(lp.func)); kds[i]->_kernarg_layout = *reinterpret_cast>*>( @@ -397,6 +399,14 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList return result; } +__attribute__((visibility("default"))) +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags); + auto& ps = hip_impl::get_program_state(); + return ihipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags, ps); +} + void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size_t* usedLDS) { if (f->_is_code_object_v3) { @@ -736,7 +746,6 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL mg_sync *mg_sync_ptr = 0; vector mg_info_ptr; - result = hip_internal::ihipHostMalloc(tls, (void **)&mg_sync_ptr, sizeof(mg_sync), hipHostMallocDefault, true); if (result != hipSuccess) { return hipErrorInvalidValue; @@ -1091,7 +1100,12 @@ namespace hip_impl { hipError_t agent_globals::read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, const char* name) { - return impl->read_agent_global_from_process(dptr, bytes, name); + hipError_t result = impl->read_agent_global_from_process(dptr, bytes, name); + if(result != hipSuccess) { + // For Clang Compiler + Hcc Rt + result = ihipGetGlobalVar(dptr, bytes, name); + } + return result; } } // Namespace hip_impl. @@ -1259,19 +1273,34 @@ hipError_t ihipModuleGetFunction(TlsData *tls, hipFunction_t* func, hipModule_t if (!*func) return hipErrorInvalidValue; std::string name_str(name); + std::string namekd_str(name_str + ".kd"); + bool kernel_by_namekd = false; + auto kernel = find_kernel_by_name(hmod->executable, name_str.c_str(), agent); if (kernel.handle == 0u) { - name_str.append(".kd"); - kernel = find_kernel_by_name(hmod->executable, name_str.c_str(), agent); + kernel_by_namekd = true; //Find kernel by namekd_str + kernel = find_kernel_by_name(hmod->executable, namekd_str.c_str(), agent); } if (kernel.handle == 0u) return hipErrorNotFound; + //For hipModuleLoad(), hmod->kernargs must contain an args with key + //name_str or namekd_str. + //For hipLaunchKernelGGL(), hmod->kernargs is empty, thus we need + //insert hmod->kernargs[name_str] + auto it = hmod->kernargs.find(name_str); //Look up args from the original name + if (it == hmod->kernargs.end()) { + it = hmod->kernargs.find(namekd_str); //Look up args from .kd name + } + // TODO: refactor the whole ihipThisThat, which is a mess and yields the // below, due to hipFunction_t being a pointer to ihipModuleSymbol_t. + func[0][0] = *static_cast( - Kernel_descriptor{kernel_object(kernel), name_str, hmod->kernargs[name_str]}); + Kernel_descriptor{kernel_object(kernel), + kernel_by_namekd ? namekd_str : name_str, + it != hmod->kernargs.end() ? it->second : hmod->kernargs[name_str]}); return hipSuccess; } diff --git a/projects/hip/src/hiprtc.cpp b/projects/hip/src/hiprtc.cpp index 4efdbad653..a11207f337 100644 --- a/projects/hip/src/hiprtc.cpp +++ b/projects/hip/src/hiprtc.cpp @@ -250,7 +250,7 @@ struct _hiprtcProgram { const auto it{find_if(reader.sections.begin(), reader.sections.end(), [](const section* x) { - return x->get_name() == ".kernel"; + return (x->get_name() == ".hip_fatbin") || (x->get_name() == ".kernel"); })}; if (it == reader.sections.end()) return false; @@ -513,7 +513,7 @@ extern "C" hiprtcResult hiprtcCompileProgram(hiprtcProgram p, int n, const char* const auto src{p->writeTemporaryFiles(tmp.path())}; - vector args{hipcc, "-shared"}; + vector args{hipcc, "-fPIC -shared"}; if (n) args.insert(args.cend(), o, o + n); handleTarget(args); diff --git a/projects/hip/src/program_state.inl b/projects/hip/src/program_state.inl index 760dafea22..0314c7d4ed 100644 --- a/projects/hip/src/program_state.inl +++ b/projects/hip/src/program_state.inl @@ -19,6 +19,8 @@ #include #include #include "hc.hpp" +#include "hip_hcc_internal.h" +#include "trace_helper.h" #include @@ -734,6 +736,27 @@ public: != AMD_COMGR_STATUS_SUCCESS) return; + //Look up “.value_kind” to decide whether to ignore it + //See http://llvm.org/docs/AMDGPUUsage.html#code-object-v3-metadata-mattr-code-object-v3 + amd_comgr_metadata_node_t arg_value_kind_md; + if (amd_comgr_metadata_lookup(arg_md, ".value_kind", &arg_value_kind_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + + std::string arg_value_kind{ metadata_to_string(arg_value_kind_md) }; + + if (amd_comgr_destroy_metadata(arg_value_kind_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + + if (arg_value_kind.find("hidden_") == 0) { + if (amd_comgr_destroy_metadata(arg_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + + continue; //Ignore hidden arg + } + amd_comgr_metadata_node_t arg_size_md; if (amd_comgr_metadata_lookup(arg_md, ".size", &arg_size_md) != AMD_COMGR_STATUS_SUCCESS) @@ -937,14 +960,16 @@ public: auto it0 = get_functions(agent).find(function_address); - if (it0 == get_functions(agent).cend()) { - hip_throw(std::runtime_error{ + if (it0 != get_functions(agent).cend()) return it0->second; + + // For hip-clang compiler + Hcc RT + hipFunction_t f = ihipGetDeviceFunction((const void*)function_address); + if (f) return reinterpret_cast(*f); + + hip_throw(std::runtime_error{ "No device code available for function: " + std::string(name(function_address)) + ", for agent: " + name(agent)}); - } - - return it0->second; } const std::vector>& diff --git a/projects/hip/tests/hip_tests.txt b/projects/hip/tests/hip_tests.txt index f3ea49a0f9..67a4238520 100644 --- a/projects/hip/tests/hip_tests.txt +++ b/projects/hip/tests/hip_tests.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 2.8.3) project(hip_tests) # Setup -set(HIP_PATH @CMAKE_INSTALL_PREFIX@) -set(ENV{HIP_PATH} ${HIP_PATH}) +#set(HIP_PATH @CMAKE_INSTALL_PREFIX@) +#set(ENV{HIP_PATH} ${HIP_PATH}) set(HIP_SRC_PATH @hip_SOURCE_DIR@) set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) include(${HIP_SRC_PATH}/tests/hit/HIT.cmake) diff --git a/projects/hip/tests/hit/HIT.cmake b/projects/hip/tests/hit/HIT.cmake index 828fbe2cc7..1c7f7ff464 100644 --- a/projects/hip/tests/hit/HIT.cmake +++ b/projects/hip/tests/hit/HIT.cmake @@ -109,11 +109,11 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt elseif(_link_options_found) list(APPEND ${_link_options} ${arg}) elseif(_exclude_platforms_found) - set(${_exclude_platforms} ${arg}) + list(APPEND ${_exclude_platforms} ${arg}) elseif(_exclude_runtime_found) - set(${_exclude_runtime} ${arg}) + list(APPEND ${_exclude_runtime} ${arg}) elseif(_exclude_compiler_found) - set(${_exclude_compiler} ${arg}) + list(APPEND ${_exclude_compiler} ${arg}) elseif(_depends_found) list(APPEND ${_depends} ${arg}) else() @@ -160,11 +160,11 @@ macro(PARSE_CUSTOMBUILD_COMMAND _target _buildcmd _exclude_platforms _exclude_ru set(_depends_found TRUE) else() if(_exclude_platforms_found) - set(${_exclude_platforms} ${arg}) + list(APPEND ${_exclude_platforms} ${arg}) elseif(_exclude_runtime_found) - set(${_exclude_runtime} ${arg}) + list(APPEND ${_exclude_runtime} ${arg}) elseif(_exclude_compiler_found) - set(${_exclude_compiler} ${arg}) + list(APPEND ${_exclude_compiler} ${arg}) elseif(_depends_found) list(APPEND ${_depends} ${arg}) else() @@ -203,11 +203,11 @@ macro(PARSE_TEST_COMMAND _target _arguments _exclude_platforms _exclude_runtime set(_exclude_compiler_found TRUE) else() if(_exclude_platforms_found) - set(${_exclude_platforms} ${arg}) + list(APPEND ${_exclude_platforms} ${arg}) elseif(_exclude_runtime_found) - set(${_exclude_runtime} ${arg}) + list(APPEND ${_exclude_runtime} ${arg}) elseif(_exclude_compiler_found) - set(${_exclude_compiler} ${arg}) + list(APPEND ${_exclude_compiler} ${arg}) else() list(APPEND ${_arguments} ${arg}) endif() @@ -246,11 +246,11 @@ macro(PARSE_TEST_NAMED_COMMAND _target _testname _arguments _exclude_platforms _ set(_exclude_compiler_found TRUE) else() if(_exclude_platforms_found) - set(${_exclude_platforms} ${arg}) + list(APPEND ${_exclude_platforms} ${arg}) elseif(_exclude_runtime_found) - set(${_exclude_runtime} ${arg}) + list(APPEND ${_exclude_runtime} ${arg}) elseif(_exclude_compiler_found) - set(${_exclude_compiler} ${arg}) + list(APPEND ${_exclude_compiler} ${arg}) else() list(APPEND ${_arguments} ${arg}) endif() @@ -297,13 +297,13 @@ macro(HIT_ADD_FILES _dir _label _parent) string(REGEX REPLACE " " ";" _cmd "${_cmd}") parse_build_command(_target _sources _hipcc_options _hcc_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _depends ${_dir} ${_cmd}) string(REGEX REPLACE "/" "." target ${_label}/${_target}) - if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM}) + if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME}) + elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) insert_into_map("_exclude" "${target}" TRUE) - elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND ${HIP_COMPILER} IN_LIST _exclude_compiler) insert_into_map("_exclude" "${target}" TRUE) else() set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) @@ -336,13 +336,13 @@ macro(HIT_ADD_FILES _dir _label _parent) string(REGEX REPLACE " " ";" _cmd "${_cmd}") parse_custombuild_command(_target _buildcmd _exclude_platforms _exclude_runtime _exclude_compiler _depends ${_cmd}) string(REGEX REPLACE "/" "." target ${_label}/${_target}) - if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM}) + if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME}) + elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) insert_into_map("_exclude" "${target}" TRUE) - elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND ${HIP_COMPILER} IN_LIST _exclude_compiler) insert_into_map("_exclude" "${target}" TRUE) else() string(REGEX REPLACE ";" " " _buildcmd "${_buildcmd}") @@ -370,10 +370,10 @@ macro(HIT_ADD_FILES _dir _label _parent) parse_test_command(_target _arguments _exclude_platforms _exclude_runtime _exclude_compiler ${_cmd}) string(REGEX REPLACE "/" "." target ${_label}/${_target}) read_from_map("_exclude" "${target}" _exclude_test_from_build) - if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM}) - elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER}) - elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME}) - elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) + elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) + elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) + elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) elseif(_exclude_test_from_build STREQUAL TRUE) else() make_test(${_label}/${_target} ${_arguments}) @@ -392,10 +392,10 @@ macro(HIT_ADD_FILES _dir _label _parent) parse_test_named_command(_target _testname _arguments _exclude_platforms _exclude_runtime _exclude_compiler ${_cmd}) string(REGEX REPLACE "/" "." target ${_label}/${_target}) read_from_map("_exclude" "${target}" _exclude_test_from_build) - if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM}) - elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER}) - elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME}) - elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) + elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) + elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) + elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) elseif(_exclude_test_from_build STREQUAL TRUE) else() make_named_test(${_label}/${_target} ${_label}/${_testname}.tst ${_arguments}) diff --git a/projects/hip/tests/src/Negative/memory/hipMemory.cpp b/projects/hip/tests/src/Negative/memory/hipMemory.cpp index b062d05cc1..a71ee948f5 100644 --- a/projects/hip/tests/src/Negative/memory/hipMemory.cpp +++ b/projects/hip/tests/src/Negative/memory/hipMemory.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp b/projects/hip/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp index 8a1dc07b62..6f0662b82d 100644 --- a/projects/hip/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp +++ b/projects/hip/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/dynamicLoading/complex_loading_behavior.cpp b/projects/hip/tests/src/dynamicLoading/complex_loading_behavior.cpp index 70066dc46d..954fa7da35 100644 --- a/projects/hip/tests/src/dynamicLoading/complex_loading_behavior.cpp +++ b/projects/hip/tests/src/dynamicLoading/complex_loading_behavior.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. /* HIT_START * BUILD_CMD: libfoo_amd %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM nvcc - * BUILD_CMD: libfoo_nvidia %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM hcc + * BUILD_CMD: libfoo_nvidia %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM hcc vdi * BUILD_CMD: %t %hc %S/%s -o %T/%t -ldl * TEST: %t * HIT_END diff --git a/projects/hip/tests/src/gcc/LaunchKernel.c b/projects/hip/tests/src/gcc/LaunchKernel.c index fc59fa9d30..d2fc854510 100644 --- a/projects/hip/tests/src/gcc/LaunchKernel.c +++ b/projects/hip/tests/src/gcc/LaunchKernel.c @@ -19,10 +19,10 @@ /* HIT_START - * BUILD_CMD: gpu.o %hc -I%hip-path/include -g -c %S/gpu.cpp -o %T/gpu.o EXCLUDE_HIP_PLATFORM nvcc - * BUILD_CMD: launchkernel.o %cc -D__HIP_PLATFORM_HCC__ -g -I%hip-path/include -c %S/LaunchKernel.c -o %T/launchkernel.o EXCLUDE_HIP_PLATFORM nvcc - * BUILD_CMD: LaunchKernel %hc %T/launchkernel.o %T/gpu.o -g -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/%t DEPENDS gpu.o launchkernel.o EXCLUDE_HIP_PLATFORM nvcc - * TEST: %t EXCLUDE_HIP_PLATFORM nvcc + * BUILD_CMD: gpu.o %hc -I%hip-path/include -g -c %S/gpu.cpp -o %T/gpu.o EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD_CMD: launchkernel.o %hc -D__HIP_PLATFORM_HCC__ -g -I%hip-path/include -c %S/LaunchKernel.c -o %T/launchkernel.o EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD_CMD: LaunchKernel %hc %T/launchkernel.o %T/gpu.o -g -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/%t DEPENDS gpu.o launchkernel.o EXCLUDE_HIP_PLATFORM nvcc vdi + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc vdi * HIT_END */ @@ -36,7 +36,7 @@ bool LaunchKernelArg() dim3 blocks = {1,1,1}; dim3 threads = {1,1,1}; - HIPCHECK(hipLaunchKernel(kernel, blocks, threads, NULL, 0, 0)); + HIPCHECK(hipLaunchKernel((const void *)kernel, blocks, threads, NULL, 0, 0)); return true; } @@ -52,7 +52,7 @@ bool LaunchKernelArg1() HIPCHECK(hipMalloc((void**)&A_d, sizeof(int))); void* Args[]={&A_d}; - HIPCHECK(hipLaunchKernel(kernel1, blocks, threads, Args, 0, 0)); + HIPCHECK(hipLaunchKernel((const void *)kernel1, blocks, threads, Args, 0, 0)); // Get the result back to host memory HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); @@ -84,7 +84,7 @@ bool LaunchKernelArg2() HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice)); void* Args[]={&A_d, &B_d}; - HIPCHECK(hipLaunchKernel(kernel2, blocks, threads, Args,0,0)); + HIPCHECK(hipLaunchKernel((const void *)kernel2, blocks, threads, Args,0,0)); // Get the result back to host memory HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); @@ -123,7 +123,7 @@ bool LaunchKernelArg3() HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice)); void* Args[]={&A_d, &B_d, &C_d}; - HIPCHECK(hipLaunchKernel(kernel3, blocks, threads, Args,0,0)); + HIPCHECK(hipLaunchKernel((const void *)kernel3, blocks, threads, Args,0,0)); // Get the result back to host memory HIPCHECK(hipMemcpy(&C, C_d, sizeof(int), hipMemcpyDeviceToHost)); @@ -154,7 +154,7 @@ bool LaunchKernelArg4() struct things t = {2,20,200}; void* Args[]={&A_d, &c, &s, &i, &t}; - HIPCHECK(hipLaunchKernel(kernel4, blocks, threads, Args, 0, 0)); + HIPCHECK(hipLaunchKernel((const void *)kernel4, blocks, threads, Args, 0, 0)); // Get the result back to host memory HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); diff --git a/projects/hip/tests/src/gcc/hipMalloc.c b/projects/hip/tests/src/gcc/hipMalloc.c index 19e90d5222..ebf163de28 100644 --- a/projects/hip/tests/src/gcc/hipMalloc.c +++ b/projects/hip/tests/src/gcc/hipMalloc.c @@ -18,10 +18,10 @@ * */ /* HIT_START - * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_NVCC__ -I%hip-path/include -I/usr/local/cuda/include %S/%s -o %T/hipMalloc_nv -L/usr/local/cuda/lib64 -lcudart EXCLUDE_HIP_PLATFORM hcc - * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_HCC__ -I%hip-path/include %S/%s -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc - * TEST: hipMalloc_nv EXCLUDE_HIP_PLATFORM hcc - * TEST: hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc + * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_NVCC__ -I%hip-path/include -I/usr/local/cuda/include %S/%s -o %T/hipMalloc_nv -L/usr/local/cuda/lib64 -lcudart EXCLUDE_HIP_PLATFORM hcc vdi + * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_HCC__ -I%hip-path/include %S/%s -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc vdi + * TEST: hipMalloc_nv EXCLUDE_HIP_PLATFORM hcc vdi + * TEST: hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc vdi * HIT_END */ diff --git a/projects/hip/tests/src/hiprtc/hiprtcGetLoweredName.cpp b/projects/hip/tests/src/hiprtc/hiprtcGetLoweredName.cpp index e7b88d26d2..a63e13af64 100644 --- a/projects/hip/tests/src/hiprtc/hiprtcGetLoweredName.cpp +++ b/projects/hip/tests/src/hiprtc/hiprtcGetLoweredName.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/hiprtc/saxpy.cpp b/projects/hip/tests/src/hiprtc/saxpy.cpp index 156a44afe4..a08c1c2399 100755 --- a/projects/hip/tests/src/hiprtc/saxpy.cpp +++ b/projects/hip/tests/src/hiprtc/saxpy.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ @@ -143,7 +143,7 @@ int main() hipMemcpyDtoH(hOut.get(), dOut, bufferSize); for (size_t i = 0; i < n; ++i) { - if (a * hX[i] + hY[i] != hOut[i]) { failed("Validation failed."); } + if (fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i])* 1e-6) { failed("Validation failed."); } } hipFree(dX); diff --git a/projects/hip/tests/src/hostcall/hipHostcallFuncCall.cpp b/projects/hip/tests/src/hostcall/hipHostcallFuncCall.cpp index 4b73352bb7..15ac3e1f4e 100644 --- a/projects/hip/tests/src/hostcall/hipHostcallFuncCall.cpp +++ b/projects/hip/tests/src/hostcall/hipHostcallFuncCall.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/projects/hip/tests/src/hostcall/hipHostcallPrintThings.cpp b/projects/hip/tests/src/hostcall/hipHostcallPrintThings.cpp index 742798c0b9..3d1a0c4e07 100644 --- a/projects/hip/tests/src/hostcall/hipHostcallPrintThings.cpp +++ b/projects/hip/tests/src/hostcall/hipHostcallPrintThings.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/projects/hip/tests/src/kernel/hipExtLaunchKernelGGL.cpp b/projects/hip/tests/src/kernel/hipExtLaunchKernelGGL.cpp index 4164a87f1e..39c660322b 100644 --- a/projects/hip/tests/src/kernel/hipExtLaunchKernelGGL.cpp +++ b/projects/hip/tests/src/kernel/hipExtLaunchKernelGGL.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. void test(size_t N) { size_t Nbytes = N * sizeof(int); - +#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; @@ -51,6 +51,7 @@ void test(size_t N) { HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); +#endif } int main(int argc, char* argv[]) { diff --git a/projects/hip/tests/src/p2p/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/p2p/hipPeerToPeer_simple.cpp index 205e3fcc04..90e7112356 100644 --- a/projects/hip/tests/src/p2p/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/p2p/hipPeerToPeer_simple.cpp @@ -24,9 +24,9 @@ THE SOFTWARE. /* HIT_START * BUILD: %t %s ../test_common.cpp - * TEST: %t EXCLUDE_HIP_PLATFORM hcc - * TEST: %t --memcpyWithPeer EXCLUDE_HIP_PLATFORM hcc - * TEST: %t --mirrorPeers EXCLUDE_HIP_PLATFORM hcc + * TEST: %t EXCLUDE_HIP_PLATFORM hcc vdi + * TEST: %t --memcpyWithPeer EXCLUDE_HIP_PLATFORM hcc vdi + * TEST: %t --mirrorPeers EXCLUDE_HIP_PLATFORM hcc vdi * HIT_END */ diff --git a/projects/hip/tests/src/printf/hipPrintfAltForms.cpp b/projects/hip/tests/src/printf/hipPrintfAltForms.cpp new file mode 100644 index 0000000000..062b688ed8 --- /dev/null +++ b/projects/hip/tests/src/printf/hipPrintfAltForms.cpp @@ -0,0 +1,76 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +__global__ void test_kernel() { + printf("%#o\n", 042); + printf("%#x\n", 0x42); + printf("%#X\n", 0x42); + printf("%#08x\n", 0x42); + printf("%#f\n", -123.456); + printf("%#F\n", 123.456); + printf("%#e\n", 123.456); + printf("%#E\n", -123.456); + printf("%#g\n", -123.456); + printf("%#G\n", 123.456); + printf("%#a\n", 123.456); + printf("%#A\n", -123.456); + printf("%#.8x\n", 0x42); + printf("%#16.8x\n", 0x42); + printf("%-#16.8x\n", 0x42); +} + +int main(int argc, char **argv) { + std::string reference(R"here(042 +0x42 +0X42 +0x000042 +-123.456000 +123.456000 +1.234560e+02 +-1.234560E+02 +-123.456 +123.456 +0x1.edd2f1a9fbe77p+6 +-0X1.EDD2F1A9FBE77P+6 +0x00000042 + 0x00000042 +0x00000042 +)here"); + + CaptureStream captured(stdout); + hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + std::string device_output = gulp(CapturedData); + + HIPASSERT(device_output == reference); + passed(); +} diff --git a/projects/hip/tests/src/printf/hipPrintfBasic.cpp b/projects/hip/tests/src/printf/hipPrintfBasic.cpp new file mode 100644 index 0000000000..db2ee269db --- /dev/null +++ b/projects/hip/tests/src/printf/hipPrintfBasic.cpp @@ -0,0 +1,275 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" +#include + +// Global string constants don't work inside device functions, so we +// use a macro to repeat the declaration in host and device contexts. +DECLARE_DATA(); + +__global__ void kernel_uniform0(int *retval) { + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + retval[tid] = printf("Hello World\n"); +} + +static void test_uniform0(int *retval, uint num_blocks, + uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_uniform0, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + HIPASSERT(retval[ii] == strlen("Hello World\n")); + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 1); + HIPASSERT(linecount["Hello World"] == num_threads); +} + +__global__ void kernel_uniform1(int *retval) { + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + retval[tid] = printf("Six times Eight is %d\n", 42); +} + +static void test_uniform1(int *retval, uint num_blocks, + uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_uniform1, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + HIPASSERT(retval[ii] == strlen("Six times Eight is 42") + 1); + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 1); + HIPASSERT(linecount["Six times Eight is 42"] == num_threads); +} + +__global__ void kernel_divergent0(int *retval) { + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + retval[tid] = printf("Thread ID: %d\n", tid); +} + +static void test_divergent0(int *retval, uint num_blocks, + uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_divergent0, dim3(num_blocks), + dim3(threads_per_block), 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != 10; ++ii) { + HIPASSERT(retval[ii] == 13); + } + + for (uint ii = 10; ii != num_threads; ++ii) { + HIPASSERT(retval[ii] == 14); + } + + std::vector threadIds; + for (std::string line; std::getline(CapturedData, line);) { + auto pos = line.find(':'); + HIPASSERT(line.substr(0, pos) == "Thread ID"); + threadIds.push_back(std::stoul(line.substr(pos + 2))); + } + + std::sort(threadIds.begin(), threadIds.end()); + HIPASSERT(threadIds.size() == num_threads); + HIPASSERT(threadIds.back() == num_threads - 1); +} + +__global__ void kernel_divergent1(int *retval) { + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + if (tid % 2) { + retval[tid] = printf("Hello World\n"); + } else { + retval[tid] = -1; + } +} + +static void test_divergent1(int *retval, uint num_blocks, + uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_divergent1, dim3(num_blocks), + dim3(threads_per_block), 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + if (ii % 2) { + HIPASSERT(retval[ii] == strlen("Hello World\n")); + } else { + HIPASSERT(retval[ii] == -1); + } + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 1); + HIPASSERT(linecount["Hello World"] == num_threads / 2); +} + +__global__ void kernel_series(int *retval) { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int result = 0; + + result += printf("%s\n", msg_long1); + result += printf("%s\n", msg_short); + result += printf("%s\n", msg_long2); + + retval[tid] = result; +} + +static void test_series(int *retval, uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_series, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + HIPASSERT(retval[ii] == + strlen(msg_long1) + strlen(msg_short) + strlen(msg_long2) + 3); + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[msg_long1] == num_threads); + HIPASSERT(linecount[msg_long2] == num_threads); + HIPASSERT(linecount[msg_short] == num_threads); +} + +__global__ void kernel_divergent_loop() { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int result = 0; + + for (int i = 0; i <= tid; ++i) { + printf("%d\n", i); + } +} + +static void test_divergent_loop(uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + + hipLaunchKernelGGL(kernel_divergent_loop, dim3(num_blocks), dim3(threads_per_block), + 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + std::map count; + while (true) { + int i; + CapturedData >> i; + if (CapturedData.fail()) + break; + count[i]++; + } + + HIPASSERT(count.size() == num_threads); + for (int i = 0; i != num_threads; ++i) { + HIPASSERT(count[i] == num_threads - i); + } +} + +int main() { + uint num_blocks = 1; + uint threads_per_block = 64; + uint num_threads = num_blocks * threads_per_block; + + void *retval_void; + HIPCHECK(hipHostMalloc(&retval_void, 4 * num_threads)); + auto retval = reinterpret_cast(retval_void); + + test_uniform0(retval, num_blocks, threads_per_block); + test_uniform1(retval, num_blocks, threads_per_block); + test_divergent0(retval, num_blocks, threads_per_block); + test_divergent1(retval, num_blocks, threads_per_block); + test_series(retval, num_blocks, threads_per_block); + test_divergent_loop(num_blocks, threads_per_block); + + passed(); +} diff --git a/projects/hip/tests/src/printf/hipPrintfFlags.cpp b/projects/hip/tests/src/printf/hipPrintfFlags.cpp new file mode 100644 index 0000000000..e9feb2e5e8 --- /dev/null +++ b/projects/hip/tests/src/printf/hipPrintfFlags.cpp @@ -0,0 +1,68 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +__global__ void test_kernel() { + printf("%08d\n", 42); + printf("%08i\n", -42); + printf("%08u\n", 42); + printf("%08g\n", 123.456); + printf("%0+8d\n", 42); + printf("%+d\n", -42); + printf("%+08d\n", 42); + printf("%-8s\n", "xyzzy"); + printf("% i\n", -42); + printf("%-16.8d\n", 42); + printf("%16.8d\n", 42); +} + +int main(int argc, char **argv) { + std::string reference(R"here(00000042 +-0000042 +00000042 +0123.456 ++0000042 +-42 ++0000042 +xyzzy +-42 +00000042 + 00000042 +)here"); + + CaptureStream captured(stdout); + hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + std::string device_output = gulp(CapturedData); + + HIPASSERT(device_output == reference); + passed(); +} diff --git a/projects/hip/tests/src/printf/hipPrintfManyDevices.cpp b/projects/hip/tests/src/printf/hipPrintfManyDevices.cpp new file mode 100644 index 0000000000..44c8ed5741 --- /dev/null +++ b/projects/hip/tests/src/printf/hipPrintfManyDevices.cpp @@ -0,0 +1,77 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +DECLARE_DATA(); + +__global__ void print_things() { + DECLARE_DATA(); + + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + const char *msg[] = {msg_short, msg_long1, msg_long2}; + + printf("%s\n", msg[tid % 3]); + if (tid % 3 == 0) + printf("%s\n", msg_short); + printf("%s\n", msg[(tid + 1) % 3]); + printf("%s\n", msg[(tid + 2) % 3]); +} + +int main() { + uint num_blocks = 14; + uint threads_per_block = 250; + uint threads_per_device = num_blocks * threads_per_block; + + int num_devices = 0; + hipGetDeviceCount(&num_devices); + + CaptureStream captured(stdout); + for (int i = 0; i != num_devices; ++i) { + hipSetDevice(i); + hipLaunchKernelGGL(print_things, dim3(num_blocks), dim3(threads_per_block), + 0, 0); + hipDeviceSynchronize(); + } + auto CapturedData = captured.getCapturedData(); + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + uint num_threads = threads_per_device * num_devices; + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[msg_long1] == num_threads); + HIPASSERT(linecount[msg_long2] == num_threads); + HIPASSERT(linecount[msg_short] == + num_threads + ((threads_per_device + 2) / 3) * num_devices); + + passed(); +} diff --git a/projects/hip/tests/src/printf/hipPrintfManyWaves.cpp b/projects/hip/tests/src/printf/hipPrintfManyWaves.cpp new file mode 100644 index 0000000000..dae6dde307 --- /dev/null +++ b/projects/hip/tests/src/printf/hipPrintfManyWaves.cpp @@ -0,0 +1,301 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" +#include + +// Global string constants don't work inside device functions, so we +// use a macro to repeat the declaration in host and device contexts. +DECLARE_DATA(); + +__global__ void kernel_mixed0(int *retval) { + DECLARE_DATA(); + + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + ulong result = 0; + + // Three strings passed as divergent values to the same hostcall. + const char *msg; + switch (tid % 3) { + case 0: + msg = msg_short; + break; + case 1: + msg = msg_long1; + break; + case 2: + msg = msg_long2; + break; + } + + retval[tid] = printf("%s\n", msg); +} + +static void test_mixed0(int *retval, uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_mixed0, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + switch (ii % 3) { + case 0: + HIPASSERT(retval[ii] == strlen(msg_short) + 1); + break; + case 1: + HIPASSERT(retval[ii] == strlen(msg_long1) + 1); + break; + case 2: + HIPASSERT(retval[ii] == strlen(msg_long2) + 1); + break; + } + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3); + HIPASSERT(linecount[msg_long1] == (num_threads + 1) / 3); + HIPASSERT(linecount[msg_long2] == (num_threads + 0) / 3); +} + +__global__ void kernel_mixed1(int *retval) { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + + // Three strings passed to divergent hostcalls. + switch (tid % 3) { + case 0: + retval[tid] = printf("%s\n", msg_short); + break; + case 1: + retval[tid] = printf("%s\n", msg_long1); + break; + case 2: + retval[tid] = printf("%s\n", msg_long2); + break; + } +} + +static void test_mixed1(int *retval, uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_mixed1, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + switch (ii % 3) { + case 0: + HIPASSERT(retval[ii] == strlen(msg_short) + 1); + break; + case 1: + HIPASSERT(retval[ii] == strlen(msg_long1) + 1); + break; + case 2: + HIPASSERT(retval[ii] == strlen(msg_long2) + 1); + break; + } + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3); + HIPASSERT(linecount[msg_long1] == (num_threads + 1) / 3); + HIPASSERT(linecount[msg_long2] == (num_threads + 0) / 3); +} + +__global__ void kernel_mixed2(int *retval) { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + + // Three different strings. All workitems print all three, but + // in different orders. + const char *msg[] = {msg_short, msg_long1, msg_long2}; + retval[tid] = + printf("%s%s%s\n", msg[tid % 3], msg[(tid + 1) % 3], msg[(tid + 2) % 3]); +} + +static void test_mixed2(int *retval, uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_mixed2, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + HIPASSERT(retval[ii] == + strlen(msg_short) + strlen(msg_long1) + strlen(msg_long2) + 1); + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + std::string str1 = + std::string(msg_short) + std::string(msg_long1) + std::string(msg_long2); + std::string str2 = + std::string(msg_long1) + std::string(msg_long2) + std::string(msg_short); + std::string str3 = + std::string(msg_long2) + std::string(msg_short) + std::string(msg_long1); + + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[str1] == (num_threads + 2) / 3); + HIPASSERT(linecount[str2] == (num_threads + 1) / 3); + HIPASSERT(linecount[str3] == (num_threads + 0) / 3); +} + +__global__ void kernel_mixed3(int *retval) { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int result = 0; + + result += printf("%s\n", msg_long1); + if (tid % 3 == 0) { + result += printf("%s\n", msg_short); + } + result += printf("%s\n", msg_long2); + + retval[tid] = result; +} + +static void test_mixed3(int *retval, uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_mixed3, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + if (ii % 3 == 0) { + HIPASSERT(retval[ii] == + strlen(msg_long1) + strlen(msg_short) + strlen(msg_long2) + 3); + } else { + HIPASSERT(retval[ii] == strlen(msg_long1) + strlen(msg_long2) + 2); + } + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[msg_long1] == num_threads); + HIPASSERT(linecount[msg_long2] == num_threads); + HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3); +} + +__global__ void kernel_numbers() { + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + for (uint i = 0; i != 7; ++i) { + uint base = tid * 21 + i * 3; + printf("%d %d %d\n", base, base + 1, base + 2); + } +} + +static void test_numbers(uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + uint num_threads = num_blocks * threads_per_block; + + hipLaunchKernelGGL(kernel_numbers, dim3(num_blocks), dim3(threads_per_block), + 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + std::vector points; + while (true) { + uint i; + CapturedData >> i; + if (CapturedData.fail()) + break; + points.push_back(i); + } + + std::sort(points.begin(), points.end()); + points.erase(std::unique(points.begin(), points.end()), points.end()); + HIPASSERT(points.size() == 21 * num_threads); + HIPASSERT(points.back() == 21 * num_threads - 1); + + passed(); +} + +int main(int argc, char **argv) { + uint num_blocks = 150; + uint threads_per_block = 250; + uint num_threads = num_blocks * threads_per_block; + + void *retval_void; + HIPCHECK(hipHostMalloc(&retval_void, 4 * num_threads)); + auto retval = reinterpret_cast(retval_void); + + test_mixed0(retval, num_blocks, threads_per_block); + test_mixed1(retval, num_blocks, threads_per_block); + test_mixed2(retval, num_blocks, threads_per_block); + test_mixed3(retval, num_blocks, threads_per_block); + test_numbers(num_blocks, threads_per_block); + + passed(); +} diff --git a/projects/hip/tests/src/printf/hipPrintfSpecifiers.cpp b/projects/hip/tests/src/printf/hipPrintfSpecifiers.cpp new file mode 100644 index 0000000000..009c76a968 --- /dev/null +++ b/projects/hip/tests/src/printf/hipPrintfSpecifiers.cpp @@ -0,0 +1,90 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +__global__ void test_kernel() { + const char *N = nullptr; + const char *s = "hello world"; + + printf("xyzzy\n"); + printf("%%\n"); + printf("hello %% world\n"); + printf("%%s\n"); + // Two special tests to make sure that the compiler pass correctly + // skips over a '%%' without affecting the logic for locating + // string arguments. + printf("%%s%p\n", (void *)0xf01dab1eca55e77e); + printf("%%c%s\n", "xyzzy"); + printf("%c%c%c\n", 's', 'e', 'p'); + printf("%d\n", -42); + printf("%u\n", 42); + printf("%f\n", 123.456); + printf("%F\n", -123.456); + printf("%e\n", -123.456); + printf("%E\n", 123.456); + printf("%g\n", 123.456); + printf("%G\n", -123.456); + printf("%c\n", 'x'); + printf("%s\n", N); + printf("%p\n", N); + printf("%.*f %*.*s %p\n", 8, 3.14159, 8, 5, s, (void *)0xf01dab1eca55e77e); +} + +int main(int argc, char **argv) { + std::string reference(R"here(xyzzy +% +hello % world +%s +%s0xf01dab1eca55e77e +%cxyzzy +sep +-42 +42 +123.456000 +-123.456000 +-1.234560e+02 +1.234560E+02 +123.456 +-123.456 +x + +(nil) +3.14159000 hello 0xf01dab1eca55e77e +)here"); + + CaptureStream captured(stdout); + hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + std::string device_output = gulp(CapturedData); + + HIPASSERT(device_output == reference); + passed(); +} diff --git a/projects/hip/tests/src/printf/hipPrintfStar.cpp b/projects/hip/tests/src/printf/hipPrintfStar.cpp new file mode 100644 index 0000000000..e4d48e692f --- /dev/null +++ b/projects/hip/tests/src/printf/hipPrintfStar.cpp @@ -0,0 +1,54 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +__global__ void test_kernel() { + printf("%*d\n", 16, 42); + printf("%.*d\n", 8, 42); + printf("%*.*d\n", -16, 8, 42); + printf("%*.*f %s * %.*s\n", 16, 8, 123.456, "hello", 5, "worldxyz"); +} + +int main(int argc, char **argv) { + std::string reference(R"here( 42 +00000042 +00000042 + 123.45600000 hello * world +)here"); + + CaptureStream captured(stdout); + hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + std::string device_output = gulp(CapturedData); + + HIPASSERT(device_output == reference); + passed(); +} diff --git a/projects/hip/tests/src/printf/hipPrintfWidthPrecision.cpp b/projects/hip/tests/src/printf/hipPrintfWidthPrecision.cpp new file mode 100644 index 0000000000..5bf6b65724 --- /dev/null +++ b/projects/hip/tests/src/printf/hipPrintfWidthPrecision.cpp @@ -0,0 +1,74 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +__global__ void test_kernel() { + printf("%16d\n", 42); + printf("%.8d\n", 42); + printf("%16.5d\n", -42); + printf("%.8x\n", 0x42); + printf("%.8o\n", 042); + printf("%16.8e\n", 12345.67891); + printf("%16.8f\n", -12345.67891); + printf("%16.8g\n", 12345.67891); + printf("%8.4e\n", -12345.67891); + printf("%8.4f\n", 12345.67891); + printf("%8.4g\n", 12345.67891); + printf("%4.2f\n", 12345.67891); + printf("%.1f\n", 12345.67891); + printf("%.5s\n", "helloxyz"); +} + +int main(int argc, char **argv) { + std::string reference(R"here( 42 +00000042 + -00042 +00000042 +00000042 + 1.23456789e+04 + -12345.67891000 + 12345.679 +-1.2346e+04 +12345.6789 +1.235e+04 +12345.68 +12345.7 +hello +)here"); + + CaptureStream captured(stdout); + hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + std::string device_output = gulp(CapturedData); + + HIPASSERT(device_output == reference); + passed(); +} diff --git a/projects/hip/tests/src/printf/printf_common.h b/projects/hip/tests/src/printf/printf_common.h new file mode 100644 index 0000000000..a2df88db9f --- /dev/null +++ b/projects/hip/tests/src/printf/printf_common.h @@ -0,0 +1,94 @@ +#ifndef COMMON_H +#define COMMON_H + +#include +#include +#include +#include +#include +#include +#include +#include + +struct CaptureStream { + int saved_fd; + int orig_fd; + int temp_fd; + + char tempname[13] = "mytestXXXXXX"; + + CaptureStream(FILE *original) { + orig_fd = fileno(original); + saved_fd = dup(orig_fd); + + temp_fd = mkstemp(tempname); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + + fflush(nullptr); + dup2(temp_fd, orig_fd); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + close(temp_fd); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + } + + void restoreStream() { + if (saved_fd == -1) + return; + fflush(nullptr); + dup2(saved_fd, orig_fd); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + close(saved_fd); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + saved_fd = -1; + } + + std::ifstream getCapturedData() { + restoreStream(); + std::ifstream temp(tempname); + return temp; + } + + ~CaptureStream() { + restoreStream(); + remove(tempname); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + } +}; + +static std::string gulp(std::ifstream &input) { + std::string retval; + input.seekg(0, std::ios_base::end); + retval.resize(input.tellg()); + input.seekg(0, std::ios_base::beg); + input.read(&retval[0], retval.size()); + input.close(); + return retval; +} + +#define DECLARE_DATA() \ + const char *msg_short = "Carpe diem."; \ + const char *msg_long1 = "Lorem ipsum dolor sit amet, consectetur nullam. " \ + "In mollis imperdiet nibh nec ullamcorper."; \ + const char *msg_long2 = "Curabitur nec metus sit amet augue vehicula " \ + "ultrices ut id leo. Lorem ipsum dolor sit amet, " \ + "consectetur adipiscing elit amet."; + +#endif diff --git a/projects/hip/tests/src/runtimeApi/event/hipEventIpc.cpp b/projects/hip/tests/src/runtimeApi/event/hipEventIpc.cpp index b62e0a16aa..dd6c23e334 100644 --- a/projects/hip/tests/src/runtimeApi/event/hipEventIpc.cpp +++ b/projects/hip/tests/src/runtimeApi/event/hipEventIpc.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. // forces synchronization : set /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t --iterations 10 * HIT_END */ diff --git a/projects/hip/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp b/projects/hip/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp index 692d14cec7..febc664f7d 100644 --- a/projects/hip/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp +++ b/projects/hip/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp @@ -18,7 +18,7 @@ * */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp b/projects/hip/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp index d08ff2a7e8..23ac329d64 100644 --- a/projects/hip/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp +++ b/projects/hip/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp @@ -60,16 +60,16 @@ int main() { HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); - HIPCHECK(hipStreamCreate(&s)); HIPCHECK(hipSetDevice(1)); + HIPCHECK(hipStreamCreate(&s)); HIPCHECK(hipMemcpyPeerAsync(X_d, 1, A_d, 0, Nbytes, s)); HIPCHECK(hipMemcpyPeerAsync(Y_d, 1, B_d, 0, Nbytes, s)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(X_d), static_cast(Y_d), Z_d, N); HIPCHECK(hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK(hipDeviceSynchronize()); HIPCHECK(hipStreamSynchronize(s)); + HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); HIPCHECK(hipStreamDestroy(s)); diff --git a/projects/hip/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp b/projects/hip/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp index 3db855c400..adface243d 100644 --- a/projects/hip/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp +++ b/projects/hip/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp @@ -26,17 +26,19 @@ THE SOFTWARE. * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 * TEST: %t EXCLUDE_HIP_PLATFORM all * HIT_END + */ #include "hip/hip_runtime.h" #include "test_common.h" -#ifdef __HIP_PLATFORM_HCC__ -#include -#endif - #define USE_HCC_MEMTRACKER 0 /* Debug flag to show the memtracker periodically */ +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_VDI__) +#include +#else +#define USE_HCC_MEMTRACKER 0 +#endif int elementSizes[] = {1, 16, 1024, 524288, 16 * 1000 * 1000}; int nSizes = sizeof(elementSizes) / sizeof(int); @@ -201,7 +203,8 @@ int main(int argc, char* argv[]) { }; for (int index = 0; index < nSizes; index++) { - testMultiGpu(dev0, dev1, elementSizes[index], false /*GPU Synchronization*/); + //ToDo: Enable when verified on all platforms + //testMultiGpu(dev0, dev1, elementSizes[index], false /*GPU Synchronization*/); testMultiGpu(dev0, dev1, elementSizes[index], true /*Host Synchronization*/); } diff --git a/projects/hip/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp b/projects/hip/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp index f4c72ca1c5..a26c9be4a0 100755 --- a/projects/hip/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp +++ b/projects/hip/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp @@ -19,7 +19,7 @@ THE SOFTWARE. /* HIT_START * BUILD_CMD: matmul.code %hc --genco %S/matmul.cpp -o matmul.code EXCLUDE_HIP_PLATFORM nvcc - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp b/projects/hip/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp index c565426f2d..102387cbe7 100644 --- a/projects/hip/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp +++ b/projects/hip/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp @@ -20,7 +20,7 @@ THE SOFTWARE. // Simple test for hipLaunchCooperativeKernelMultiDevice API. /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ @@ -178,8 +178,6 @@ int main() { hipLaunchCooperativeKernelMultiDevice(launchParamsList, nGpu, 0); - HIPCHECK(hipMemcpy(init, dC, sizeof(long), hipMemcpyDeviceToHost)); - if (*dC != (((long)(BufferSizeInDwords) * (BufferSizeInDwords - 1)) / 2)) { std::cout << "Data validation failed for grid size = " << dimGrid.x << " and block size = " << dimBlock.x << "\n"; std::cout << "Test failed! \n"; diff --git a/projects/hip/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp b/projects/hip/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp index 896738892d..e0fcd4108b 100644 --- a/projects/hip/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp +++ b/projects/hip/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. // Simple test for hipLaunchCooperativeKernel API. /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp b/projects/hip/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp index 6bbbbbef34..11bd6e7d50 100644 --- a/projects/hip/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp +++ b/projects/hip/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp b/projects/hip/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp index 9ae5883608..e7c254e9fd 100644 --- a/projects/hip/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp +++ b/projects/hip/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ @@ -33,6 +33,9 @@ THE SOFTWARE. #define fileName "tex2d_kernel.code" +#if __HIP__ +__hip_pinned_shadow__ +#endif texture tex; bool testResult = false; diff --git a/projects/hip/tests/src/runtimeApi/module/tex2d_kernel.cpp b/projects/hip/tests/src/runtimeApi/module/tex2d_kernel.cpp index b12dd1815d..e744d88776 100644 --- a/projects/hip/tests/src/runtimeApi/module/tex2d_kernel.cpp +++ b/projects/hip/tests/src/runtimeApi/module/tex2d_kernel.cpp @@ -21,11 +21,15 @@ THE SOFTWARE. */ /* HIT_START - * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code + * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code EXCLUDE_HIP_PLATFORM vdi * HIT_END */ #include "hip/hip_runtime.h" + +#if __HIP__ +__hip_pinned_shadow__ +#endif extern texture tex; extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) { diff --git a/projects/hip/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp b/projects/hip/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp index 33ca8263e1..d8385669ea 100644 --- a/projects/hip/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp +++ b/projects/hip/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. // Test the Grid_Launch syntax. /* HIT_START - * BUILD: %t %s ../../test_common.cpp + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp b/projects/hip/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp index 5f267bba28..c22b390ecc 100644 --- a/projects/hip/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp +++ b/projects/hip/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp @@ -11,7 +11,7 @@ #include "test_common.h" /* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/surface/hipSurfaceObj2D.cpp b/projects/hip/tests/src/surface/hipSurfaceObj2D.cpp index 4580220d1d..2724604279 100644 --- a/projects/hip/tests/src/surface/hipSurfaceObj2D.cpp +++ b/projects/hip/tests/src/surface/hipSurfaceObj2D.cpp @@ -1,5 +1,5 @@ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/test_common.h b/projects/hip/tests/src/test_common.h index 7d8c39e74c..8897dc938e 100644 --- a/projects/hip/tests/src/test_common.h +++ b/projects/hip/tests/src/test_common.h @@ -55,11 +55,15 @@ THE SOFTWARE. printf("%sPASSED!%s\n", KGRN, KNRM); \ exit(0); +// The real "assert" would have written to stderr. But it is +// sufficient to just fflush here without getting pedantic. This also +// ensures that we don't lose any earlier writes to stdout. #define failed(...) \ printf("%serror: ", KRED); \ printf(__VA_ARGS__); \ printf("\n"); \ printf("error: TEST FAILED\n%s", KNRM); \ + fflush(NULL); \ abort(); #define warn(...) \ diff --git a/projects/hip/tests/src/texture/hipBindTex2DPitch.cpp b/projects/hip/tests/src/texture/hipBindTex2DPitch.cpp index b01402c91d..8c57520c00 100644 --- a/projects/hip/tests/src/texture/hipBindTex2DPitch.cpp +++ b/projects/hip/tests/src/texture/hipBindTex2DPitch.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /*HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/texture/hipBindTexRef1DFetch.cpp b/projects/hip/tests/src/texture/hipBindTexRef1DFetch.cpp index 52a0d99ac1..2e962fb05d 100644 --- a/projects/hip/tests/src/texture/hipBindTexRef1DFetch.cpp +++ b/projects/hip/tests/src/texture/hipBindTexRef1DFetch.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/texture/hipNormalizedFloatValueTex.cpp b/projects/hip/tests/src/texture/hipNormalizedFloatValueTex.cpp index 609f6916f8..b4aa3e9c05 100644 --- a/projects/hip/tests/src/texture/hipNormalizedFloatValueTex.cpp +++ b/projects/hip/tests/src/texture/hipNormalizedFloatValueTex.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc hcc vdi * TEST: %t * HIT_END */ @@ -30,78 +30,92 @@ THE SOFTWARE. #define SIZE 10 static float getNormalizedValue(const float value, - const enum hipArray_Format texFormat) { - switch (texFormat) { - case HIP_AD_FORMAT_SIGNED_INT8: - return (value / SCHAR_MAX); - case HIP_AD_FORMAT_UNSIGNED_INT8: - return (value / UCHAR_MAX); - case HIP_AD_FORMAT_SIGNED_INT16: - return (value / SHRT_MAX); - case HIP_AD_FORMAT_UNSIGNED_INT16: - return (value / USHRT_MAX); - default: - return value; - } + const hipChannelFormatDesc& desc) { + if ((desc.x == 8) && (desc.f == hipChannelFormatKindSigned)) + return (value / SCHAR_MAX); + if ((desc.x == 8) && (desc.f == hipChannelFormatKindUnsigned)) + return (value / UCHAR_MAX); + if ((desc.x == 16) && (desc.f == hipChannelFormatKindSigned)) + return (value / SHRT_MAX); + if ((desc.x == 16) && (desc.f == hipChannelFormatKindUnsigned)) + return (value / USHRT_MAX); + return value; } #if __HIP__ __hip_pinned_shadow__ #endif -texture textureNormalizedVal_1D; +texture texc; +#if __HIP__ +__hip_pinned_shadow__ +#endif +texture texuc; + +#if __HIP__ +__hip_pinned_shadow__ +#endif +texture texs; + +#if __HIP__ +__hip_pinned_shadow__ +#endif +texture texus; + + +template __global__ void normalizedValTextureTest(unsigned int numElements, float* pDst) { unsigned int elementID = hipThreadIdx_x; if(elementID >= numElements) - return; - float coord =(float) elementID/(numElements-1); - pDst[elementID] = tex1D(textureNormalizedVal_1D, coord); + return; + float coord =(float) elementID/numElements; + if(std::is_same::value) + pDst[elementID] = tex1D(texc, coord); + else if(std::is_same::value) + pDst[elementID] = tex1D(texuc, coord); + else if(std::is_same::value) + pDst[elementID] = tex1D(texs, coord); + else if(std::is_same::value) + pDst[elementID] = tex1D(texus, coord); } template -bool textureTest(enum hipArray_Format texFormat) +bool textureTest(texture *tex) { - T hData[] = {65, 66, 67, 68, 69, 70, 71, 72,73,74}; - T *dData = NULL; - HIPCHECK(hipMalloc((void **) &dData, sizeof(T)*SIZE)); - HIPCHECK(hipMemcpyHtoD((hipDeviceptr_t)dData, hData, sizeof(T)*SIZE)); - textureReference* texRef = &textureNormalizedVal_1D; - HIPCHECK(hipTexRefSetAddressMode(texRef, 0, hipAddressModeClamp)); - HIPCHECK(hipTexRefSetAddressMode(texRef, 1, hipAddressModeClamp)); - HIPCHECK(hipTexRefSetFilterMode(texRef, hipFilterModePoint)); - HIPCHECK(hipTexRefSetFlags(texRef, HIP_TRSF_NORMALIZED_COORDINATES)); - HIPCHECK(hipTexRefSetFormat(texRef, texFormat, 1)); - - HIP_ARRAY_DESCRIPTOR desc; - desc.Width = SIZE; - desc.Height = 1; - desc.Format = texFormat; - desc.NumChannels = 1; - HIPCHECK(hipTexRefSetAddress2D(texRef, &desc, (hipDeviceptr_t)dData, sizeof(T)*SIZE)); - - bool testResult = true; + hipChannelFormatDesc desc = hipCreateChannelDesc(); + hipArray_t dData; + HIPCHECK(hipMallocArray(&dData, &desc, SIZE, 1, hipArrayDefault)); + + T hData[] = {65, 66, 67, 68, 69, 70, 71, 72, 73, 74}; + HIPCHECK(hipMemcpy2DToArray(dData, 0, 0, hData, sizeof(T)*SIZE, sizeof(T)*SIZE, 1, hipMemcpyHostToDevice)); + + tex->normalized = true; + tex->channelDesc = desc; + HIPCHECK(hipBindTextureToArray(tex, dData, &desc)); + float *dOutputData = NULL; HIPCHECK(hipMalloc((void **) &dOutputData, sizeof(float)*SIZE)); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(normalizedValTextureTest), dim3(1,1,1), dim3(SIZE,1,1), 0, 0, SIZE, dOutputData); + + hipLaunchKernelGGL(normalizedValTextureTest, dim3(1,1,1), dim3(SIZE,1,1), 0, 0, SIZE, dOutputData); float *hOutputData = new float[SIZE]; - HIPCHECK(hipMemcpyDtoH(hOutputData, (hipDeviceptr_t)dOutputData, (sizeof(float)*SIZE))); - + HIPCHECK(hipMemcpy(hOutputData, dOutputData, (sizeof(float)*SIZE), hipMemcpyDeviceToHost)); + + bool testResult = true; for(int i = 0; i < SIZE; i++) { - float expected = getNormalizedValue(float(hData[i]), texFormat); + float expected = getNormalizedValue(float(hData[i]), desc); if(expected != hOutputData[i]) { - printf("mismatch at index:%d for texType:%d output:%f\n",i,texFormat,hOutputData[i]); + printf("mismatch at index:%d output:%f expected:%f\n",i,hOutputData[i],expected); testResult = false; - break; + break; } } - hipFree(dData); - hipFree(dOutputData); - hipUnbindTexture(textureNormalizedVal_1D); + + HIPCHECK(hipFreeArray(dData)); + HIPCHECK(hipFree(dOutputData)); delete [] hOutputData; return testResult; } @@ -118,12 +132,11 @@ int main(int argc, char** argv) std::cout << "Arch - AMD GPU :: " << props.gcnArch << std::endl; #endif - status &= textureTest (HIP_AD_FORMAT_SIGNED_INT8); - status &= textureTest (HIP_AD_FORMAT_UNSIGNED_INT8); - status &= textureTest (HIP_AD_FORMAT_SIGNED_INT16); - status &= textureTest(HIP_AD_FORMAT_UNSIGNED_INT16); - status &= textureTest (HIP_AD_FORMAT_FLOAT); - + status &= textureTest (&texc); + status &= textureTest (&texuc); + status &= textureTest (&texs); + status &= textureTest(&texus); + if(status){ passed(); } diff --git a/projects/hip/tests/src/texture/hipTex1DFetchCheckModes.cpp b/projects/hip/tests/src/texture/hipTex1DFetchCheckModes.cpp index 9b7a36c6be..381d07280c 100644 --- a/projects/hip/tests/src/texture/hipTex1DFetchCheckModes.cpp +++ b/projects/hip/tests/src/texture/hipTex1DFetchCheckModes.cpp @@ -17,8 +17,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*HIT_START - * BUILD: %t %s ../test_common.cpp +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/texture/hipTextureRef2D.cpp b/projects/hip/tests/src/texture/hipTextureRef2D.cpp index b476ae8062..5573cf6884 100644 --- a/projects/hip/tests/src/texture/hipTextureRef2D.cpp +++ b/projects/hip/tests/src/texture/hipTextureRef2D.cpp @@ -1,5 +1,5 @@ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/texture/simpleTexture2DLayered.cpp b/projects/hip/tests/src/texture/simpleTexture2DLayered.cpp index e5014dae6b..f4d3aac1e5 100644 --- a/projects/hip/tests/src/texture/simpleTexture2DLayered.cpp +++ b/projects/hip/tests/src/texture/simpleTexture2DLayered.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/projects/hip/tests/src/texture/simpleTexture3D.cpp b/projects/hip/tests/src/texture/simpleTexture3D.cpp index 96b69811de..a494a1a6c0 100644 --- a/projects/hip/tests/src/texture/simpleTexture3D.cpp +++ b/projects/hip/tests/src/texture/simpleTexture3D.cpp @@ -21,12 +21,15 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ #include "test_common.h" +//typedef char T; +const char *sampleName = "simpleTexture3D"; + // Texture reference for 3D texture #if __HIP__ __hip_pinned_shadow__ @@ -44,26 +47,29 @@ __hip_pinned_shadow__ texture texc; template -__global__ void simpleKernel3DArray(T* outputData, +__global__ void simpleKernel3DArray(T* outputData, int width, int height,int depth) { for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - if(std::is_same::value) - outputData[i*width*height + j*width + k] = tex3D(texf, k, j, i); - else if(std::is_same::value) - outputData[i*width*height + j*width + k] = tex3D(texi, k, j, i); - else if(std::is_same::value) - outputData[i*width*height + j*width + k] = tex3D(texc, k, j, i); - } - } + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + if(std::is_same::value) + outputData[i*width*height + j*width + k] = tex3D(texf, k, j, i); + else if(std::is_same::value) + outputData[i*width*height + j*width + k] = tex3D(texi, k, j, i); + else if(std::is_same::value) + outputData[i*width*height + j*width + k] = tex3D(texc, k, j, i); + } + } } } +//////////////////////////////////////////////////////////////////////////////// +//! Run a simple test for tex3D +//////////////////////////////////////////////////////////////////////////////// template -void runTest(int width,int height,int depth,texture *tex, hipChannelFormatKind formatKind) +void runTest(int width,int height,int depth,texture *tex) { unsigned int size = width * height * depth * sizeof(T); T* hData = (T*) malloc(size); @@ -78,7 +84,7 @@ void runTest(int width,int height,int depth,texture(); hipArray *arr; HIPCHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height, depth), hipArrayDefault)); @@ -88,11 +94,7 @@ void runTest(int width,int height,int depth,texture(i,i,i,&texf, hipChannelFormatKindFloat); - runTest(i+1,i,i,&texi, hipChannelFormatKindSigned); - runTest(i,i+1,i,&texc, hipChannelFormatKindSigned); + runTest(i,i,i,&texf); + runTest(i+1,i,i,&texi); + runTest(i,i+1,i,&texc); } passed(); } + diff --git a/projects/hip/vdi/CMakeLists.txt b/projects/hip/vdi/CMakeLists.txt new file mode 100644 index 0000000000..8c1ca1f2de --- /dev/null +++ b/projects/hip/vdi/CMakeLists.txt @@ -0,0 +1,179 @@ +#project("hip") +cmake_minimum_required(VERSION 3.5.1) + +set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-keep-memory -Wl,-Bsymbolic -Wl,--unresolved-symbols=report-all -Wl,--version-script=${CMAKE_CURRENT_LIST_DIR}/hip_hcc.map.in") + +if(CMAKE_CXX_FLAGS MATCHES "fsanitize=address") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") +endif() + +set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +set(LIB_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib) +set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip) + +add_definitions(-D__HIP_VDI__ -D__HIP_PLATFORM_HCC__ -DLINUX -D__x86_64__ -D__AMD64__ -DUNIX_OS -DqLittleEndian -DOPENCL_MAJOR=2 -DOPENCL_MINOR=0 -DCL_TARGET_OPENCL_VERSION=220 -DWITH_AQL -DWITH_ONLINE_COMPILER -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DATI_COMP_GCC -DWITH_HSA_DEVICE -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DVEGA10_ONLY=false -DWITH_LIGHTNING_COMPILER -DUSE_PROF_API) + +if(CMAKE_BUILD_TYPE MATCHES "^Debug$") + add_definitions(-DDEBUG) +endif() + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + add_compile_options("-Wno-ignored-attributes") +endif() + +set(USE_PROF_API "1") + +if(NOT DEFINED LIBVDI_STATIC_DIR) + find_path(LIBVDI_STATIC_DIR + NAMES libamdvdi_static.a + PATHS /opt/rocm/vdi + PATH_SUFFIXES lib + ) +endif() + +if(NOT DEFINED VDI_DIR) + find_path(VDI_DIR + NAMES top.hpp + PATH_SUFFIXES include + PATHS /opt/rocm/vdi + ) +endif() +message("Found Static vdi lib:${LIBVDI_STATIC_DIR} and vdi includes: ${VDI_DIR}") +set(PROF_API_HEADER_PATH ${VDI_DIR}/platform) +############################# +# Profiling API support +############################# +# Generate profiling API macros/structures header +set(PROF_API_STR "${CMAKE_CURRENT_SOURCE_DIR}/../include/hip/hcc_detail/hip_prof_str.h") +set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/../include/hip/hcc_detail/hip_runtime_api.h") +set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}") +set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py") +set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt") +set(PROF_API_CMD "${PROF_API_GEN} -v -t --priv ${OPT_PROF_API} ${PROF_API_HDR} ${PROF_API_SRC} ${PROF_API_STR} >${PROF_API_LOG}") +MESSAGE(STATUS "Generating profiling promitives: ${PROF_API_STR}") +execute_process(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}") +#MESSAGE(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}") +set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${PROF_API_GEN} ${PROF_API_HDR} ${PROF_API_STR}) + +# Enable profiling API +if(USE_PROF_API EQUAL 1) + find_path(PROF_API_HEADER_DIR prof_protocol.h + HINTS + ${PROF_API_HEADER_PATH} + PATHS + /opt/rocm/roctracer + PATH_SUFFIXES + include/ext + ) + if(NOT PROF_API_HEADER_DIR) + MESSAGE(WARNING "Profiling API header not found. Disabling roctracer integration. Use -DPROF_API_HEADER_PATH=") + else() + add_definitions(-DUSE_PROF_API=1) + include_directories(${PROF_API_HEADER_DIR}) + MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}") + endif() +endif() + + +if(NOT DEFINED VDI_DIR OR NOT DEFINED LIBOCL_STATIC_DIR OR NOT DEFINED LIBVDI_STATIC_DIR ) + # message(FATAL_ERROR "define VDI_DIR, LIBOCL_STATIC_DIR\n") + +endif() +list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" ) +set(CMAKE_MODULE_PATH${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") + +include_directories(${ROCR_INCLUDES}) +if (DEFINED LLVM_INCLUDES AND NOT ${LLVM_INCLUDES} STREQUAL "") + message(STATUS "LLVM includes found ${LLVM_INCLUDES}") + include_directories(${LLVM_INCLUDES}) +endif() # if (DEFINED LLVM_INCLUDES AND NOT ${LLVM_INCLUDES} STREQUAL "") + +include_directories(${CMAKE_SOURCE_DIR}) +include_directories(${CMAKE_SOURCE_DIR}/include) +include_directories(${CMAKE_SOURCE_DIR}/elfio) +include_directories(${CMAKE_SOURCE_DIR}/amdocl) +include_directories(${CMAKE_SOURCE_DIR}/include/hip/hcc_detail/elfio) +include_directories(${VDI_DIR}) +include_directories(${VDI_DIR}/include) +include_directories(${VDI_DIR}/compiler/lib) +include_directories(${VDI_DIR}/compiler/lib/include) +include_directories(${VDI_DIR}/elf/utils/common) +include_directories(${VDI_DIR}/elf/utils/libelf) +add_definitions(-DUSE_COMGR_LIBRARY -DCOMGR_DYN_DLL) + find_package(amd_comgr REQUIRED CONFIG + PATHS + /opt/rocm/ + PATH_SUFFIXES + cmake/amd_comgr + lib/cmake/amd_comgr + ) + MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.") + +include_directories("$") + +add_definitions(-DBSD_LIBELF) + +add_library(hip64 OBJECT + hip_context.cpp + hip_device.cpp + hip_device_runtime.cpp + hip_error.cpp + hip_event.cpp + hip_memory.cpp + hip_module.cpp + hip_peer.cpp + hip_platform.cpp + hip_profile.cpp + hip_stream.cpp + hip_surface.cpp + hip_texture.cpp + hip_activity.cpp + hip_intercept.cpp + hip_rtc.cpp + cl_gl.cpp + cl_lqdflash_amd.cpp + fixme.cpp + ) +set_target_properties(hip64 PROPERTIES POSITION_INDEPENDENT_CODE ON) +set_target_properties( + hip64 PROPERTIES + CXX_STANDARD 14 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF +) + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) +include(${LIBVDI_STATIC_DIR}/amdvdi_staticTargets.cmake) + +add_library(amdhip64 SHARED + $ + ) + +add_library(amdhip64_static STATIC + $ + ) + +add_library(host INTERFACE) +target_link_libraries(host INTERFACE amdhip64) +add_library(device INTERFACE) +target_link_libraries(device INTERFACE host) + +target_link_libraries(amdhip64_static PRIVATE amdvdi_static pthread dl) +target_link_libraries(amdhip64 PRIVATE amdvdi_static pthread dl) + + +INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) +INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) +INSTALL(CODE "execute_process( COMMAND ${CMAKE_COMMAND} -E create_symlink libamdhip64.so lib/libhip_hcc.so )" DESTINATION lib COMPONENT MAIN) + +INSTALL(CODE "execute_process( COMMAND ${CMAKE_COMMAND} -E create_symlink libamdhip64.so lib/libhiprtc.so )" DESTINATION lib COMPONENT MAIN) +INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhip_hcc.so DESTINATION lib COMPONENT MAIN) + +INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc.so DESTINATION lib COMPONENT MAIN) + +INSTALL(TARGETS amdhip64_static amdhip64 host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) +INSTALL(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) + diff --git a/projects/hip/vdi/cl_gl.cpp b/projects/hip/vdi/cl_gl.cpp new file mode 100644 index 0000000000..b0403eb488 --- /dev/null +++ b/projects/hip/vdi/cl_gl.cpp @@ -0,0 +1,2432 @@ +/* Copyright (c) 2010-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "top.hpp" + +#ifdef _WIN32 +#include +#include +#include +// This is necessary since there are common GL/D3D10 functions +#include "cl_d3d9_amd.hpp" +#include "cl_d3d10_amd.hpp" +#include "cl_d3d11_amd.hpp" +#endif //_WIN32 + +#include +#include + +#include +#include +#include + +#include "cl_common.hpp" +#include "cl_gl_amd.hpp" + +#include "device/device.hpp" + +/* The pixel internal format for DOPP texture defined in gl_enum.h */ +#define GL_BGR8_ATI 0x8083 +#define GL_BGRA8_ATI 0x8088 + +#include +#include + + +/*! \addtogroup API + * @{ + * + * \addtogroup CL_GL_Interops + * + * This section discusses OpenCL functions that allow applications to + * use OpenGL buffer/texture/render-buffer objects as OpenCL memory + * objects. This allows efficient sharing of data between these OpenCL + * and OpenGL. The OpenCL API can be used to execute kernels that read + * and/or write memory objects that are also an OpenGL buffer object + * or a texture. An OpenCL image object can be created from an OpenGL + * texture or renderbuffer object. An OpenCL buffer object can be + * created from an OpenGL buffer object. An OpenCL memory object can + * be created from an OpenGL texture/buffer/render-buffer object or + * the default system provided framebuffer if any only if the OpenCL + * clContext has been created from a GL clContext. OpenGL contexts are + * created using platform specific APIs (EGL, CGL, WGL, GLX are some + * of the platform specific APIs that allow applications to create GL + * contexts). The appropriate platform API (such as EGL, CGL, WGL, + * GLX) will be extended to allow a CL clContext to be created from a + * GL clContext. Creating an OpenCL memory object from the default + * system provided framebuffer will also require an appropriate + * extension to the platform API. Refer to the appropriate platform + * API documentation to understand how to create a CL clContext from a + * GL clContext and creating a CL memory object from the default + * system provided framebuffer. + * + * @{ + * + * \addtogroup clCreateFromGLBuffer + * + * @{ + */ + +/*! \brief Creates an OpenCL buffer object from an OpenGL buffer object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. Only + * CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE can be used. + * + * \param glBufferName is a GL buffer object name. The GL buffer + * object must have a data store created though it does not need to + * be initialized. The size of the data store will be used to + * determine the size of the CL buffer object. + * + * \param pCpuMem is a pointer to the buffer data that may already be + * allocated by the application. The size of the buffer that pCpuMem points + * to must be >= \a size bytes. Passing in a pointer to an already allocated + * buffer on the host and using it as a buffer object allows applications to + * share data efficiently with kernels and the host. + * + * \param errcode_ret will return an appropriate error code. If errcode_ret + * is NULL, no error code is returned. + * + * \return valid non-zero OpenCL buffer object and errcode_ret is set + * to CL_SUCCESS if the buffer object is created successfully. It + * returns a NULL value with one of the following error values + * returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_GL_OBJECT if glBufferName is not a GL buffer object or is a + * GL buffer object but does not have a data store created. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLBuffer, + (cl_context context, cl_mem_flags flags, GLuint bufobj, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + return (amd::clCreateFromGLBufferAMD(*as_amd(context), flags, bufobj, errcode_ret)); +} +RUNTIME_EXIT + +/*! \brief creates the following: + * - an OpenCL 2D image object from an OpenGL 2D texture object + * or a single face of an OpenGL cubemap texture object, + * - an OpenCL 2D image array object from an OpenGL 2D texture array object, + * - an OpenCL 1D image object from an OpenGL 1D texture object, + * - an OpenCL 1D image buffer object from an OpenGL texture buffer object, + * - an OpenCL 1D image array object from an OpenGL 1D texture array object, + * - an OpenCL 3D image object from an OpenGL 3D texture object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param texture_target must be GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY, + * GL_TEXTURE_BUFFER, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D, + * GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP_POSITIVE_X, + * GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_Z or GL_TEXTURE_RECTANGLE_ARB. + * + * \param miplevel is the mipmap level to be used. If \a texture_target + * is GL_TEXTURE_BUFFER, \a miplevel must be 0. + * + * \param texture is a GL 1D, 2D, 3D, 1D array, 2D array, cubemap, + * rectangle or buffer texture object. + * The texture object must be a complete texture as per + * OpenGL rules on texture completeness. The texture format and dimensions + * defined by OpenGL for the specified miplevel of the texture will be + * used to create the OpenCL image memory object. Only GL texture formats + * that map to appropriate image channel order and data type can be used + * to create the the OpenCL image memory object. + * + * \param errcode_ret will return an appropriate error code. If \a + * errcode_ret is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set to + * CL_SUCCESS if the image object is created successfully. It returns a NULL value + * with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture. + * - CL_INVALID_GL_OBJECT if \a texture is not an appropriate GL 2D texture, + * cubemap or texture rectangle. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not + * map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.2r07 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture, + (cl_context context, cl_mem_flags flags, GLenum texture_target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } + + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, texture_target, miplevel, texture, + errcode_ret); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromGLTexture2D + * @{ + */ + +/*! \brief Create an OpenCL 2D image object from an OpenGL 2D texture object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param target must be GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP_POSITIVE_X, + * GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_Z or GL_TEXTURE_RECTANGLE_ARB. + * + * \param miplevel is the mipmap level to be used. + * + * \param texture is a GL 2D texture, cubemap or texture rectangle + * object name. The texture object must be a complete texture as per + * OpenGL rules on texture completeness. The \a texture format and + * dimensions specified using appropriate glTexImage2D call for \a + * miplevel will be used to create the 2D image object. Only GL + * texture formats that map to appropriate image channel order and + * data type can be used to create the 2D image object. + * + * \param errcode_ret will return an appropriate error code. If \a + * errcode_ret is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set to + * CL_SUCCESS if the image object is created successfully. It returns a NULL value + * with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture. + * - CL_INVALID_GL_OBJECT if \a texture is not an appropriate GL 2D texture, + * cubemap or texture rectangle. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not + * map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture2D, + (cl_context context, cl_mem_flags flags, GLenum target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } + + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target, miplevel, texture, + errcode_ret); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromGLTexture3D + * @{ + */ + +/*! \brief Create an OpenCL 3D image object from an OpenGL 3D texture object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param target must be GL_TEXTURE_3D. + * + * \param miplevel is the mipmap level to be used. + * + * \param texture is a GL 3D texture object [name]. + * The texture object must be a complete texture as per OpenGL rules on texture + * completeness. The \a texture format and dimensions specified using appropriate + * glTexImage3D call for \a miplevel will be used to create the 3D image object. + * Only GL texture formats that map to appropriate image channel order and + * data type can be used to create the 3D image object. + * + * \param errcode_ret will return an appropriate error code. If \a errcode_ret + * is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set to + * CL_SUCCESS if the image object is created successfully. It returns a NULL value + * with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture. + * - CL_INVALID_GL_OBJECT if \a texture is not an GL 3D texture. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not + * map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture3D, + (cl_context context, cl_mem_flags flags, GLenum target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } + + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target, miplevel, texture, + errcode_ret); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromGLRenderbuffer + * @{ + */ + +/*! \brief Create an OpenCL 2D image object from an OpenGL renderbuffer object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param renderbuffer is a GL renderbuffer object name. The renderbuffer + * storage must be specified before the image object can be created. Only + * GL renderbuffer formats that map to appropriate image channel order and + * data type can be used to create the 2D image object. + * + * \param errcode_ret will return an appropriate error code. If \a errcode_ret + * is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set + * to CL_SUCCESS if the image object is created successfully. It returns a + * NULL value with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_GL_OBJECT if \a renderbuffer is not an GL renderbuffer object. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL renderbuffer format + * does not map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLRenderbuffer, (cl_context context, cl_mem_flags flags, + GLuint renderbuffer, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + return (amd::clCreateFromGLRenderbufferAMD(*as_amd(context), flags, renderbuffer, errcode_ret)); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clGetGLObjectInfo + * @{ + */ + +/*! \brief Query GL object type from a CL memory object. + * + * \param memobj [is a valid cl_mem object created from a GL object]. + * + * \param gl_object_type returns the type of GL object attached to memobj + * and can be CL_GL_OBJECT_BUFFER, CL_GL_OBJECT_TEXTURE2D, + * CL_GL_OBJECT_TEXTURE_RECTANGLE, CL_GL_OBJECT_TEXTURE3D, or + * CL_GL_OBJECT_RENDERBUFFER. If \a gl_object_type is NULL, it is ignored. + * + * \param gl_object_name returns the GL object name used to create memobj. + * If \a gl_object_name is NULL, it is ignored. + * + * \return One of the following values is returned: + * - CL_SUCCESS if the call was executed successfully. + * - CL_INVALID_MEM_OBJECT if \a memobj is not a valid OpenCL memory object. + * - CL_INVALID_GL_OBJECT if there is no GL object associated with \a memobj. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clGetGLObjectInfo, + (cl_mem memobj, cl_gl_object_type* gl_object_type, GLuint* gl_object_name)) { + if (!is_valid(memobj)) { + LogWarning("\"memobj\" is not a valid cl_mem object"); + return CL_INVALID_MEM_OBJECT; + } + + amd::InteropObject* interop = as_amd(memobj)->getInteropObj(); + if (NULL == interop) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } + + amd::GLObject* glObject = interop->asGLObject(); + if (NULL == glObject) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } + + cl_int result; + + cl_gl_object_type clGLType = glObject->getCLGLObjectType(); + result = amd::clGetInfo(clGLType, sizeof(cl_gl_object_type), gl_object_type, NULL); + + GLuint glName = glObject->getGLName(); + result |= amd::clGetInfo(glName, sizeof(GLuint), gl_object_name, NULL); + + return result; +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clGetGLTextureInfo + * @{ + */ + +/*! \brief Query additional information about the GL texture object associated + * with \a memobj. + * + * \param memobj [is a valid cl_mem object created from a GL object]. + * + * \param param_name specifies what additional information about the GL + * texture object associated with \a memobj to query: + * - CL_GL_TEXTURE_TARGET (GLenum) to query the \a target argument specified + * in clCreateGLTexture2D or clCreateGLTexture3D calls. + * - CL_GL_MIPMAP_LEVEL (GLint) to query the \a miplevel argument specified + * in clCreateGLTexture2D or clCreateGLTexture3D calls. + * + * \param param_value is a pointer to memory where the appropriate result + * being queried is returned. If \a param_value is NULL, it is ignored. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type as + * described for \a param_name argumnet (GLenum or GLint). + * \a param_value_size_ret returns the actual size in bytes of data copied to + * \a param_value. If \a param_value_size_ret is NULL, it is ignored + * + * \return One of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_MEM_OBJECT if \a memobj is not a valid OpenCL memory object. + * - CL_INVALID_GL_OBJECT if there is no GL texture object (2D or 3D texture) + * associated with \a memobj. + * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes + * specified by \a param_value_size is < size of return type required by + * \a param_name and \a param_value is not NULL, or if \a param_value and + * \a param_value_size_ret are NULL. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clGetGLTextureInfo, + (cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(memobj)) { + LogWarning("\"memobj\" is not a valid cl_mem object"); + return CL_INVALID_MEM_OBJECT; + } + amd::InteropObject* interop = as_amd(memobj)->getInteropObj(); + if (NULL == interop) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } + amd::GLObject* glObject = interop->asGLObject(); + if ((NULL == glObject) || (NULL != glObject->asBufferGL())) { + LogWarning("CL object \"memobj\" is not created from GL texture"); + return CL_INVALID_GL_OBJECT; + } + + switch (param_name) { + case CL_GL_TEXTURE_TARGET: { + GLenum glTarget = glObject->getGLTarget(); + if (glTarget == GL_TEXTURE_CUBE_MAP) { + glTarget = glObject->getCubemapFace(); + } + return amd::clGetInfo(glTarget, param_value_size, param_value, param_value_size_ret); + } + case CL_GL_MIPMAP_LEVEL: { + GLint mipLevel = glObject->getGLMipLevel(); + return amd::clGetInfo(mipLevel, param_value_size, param_value, param_value_size_ret); + } + case CL_GL_NUM_SAMPLES: { + GLsizei numSamples = glObject->getNumSamples(); + return amd::clGetInfo(numSamples, param_value_size, param_value, param_value_size_ret); + } + default: + LogWarning("Unknown param_name in clGetGLTextureInfoAMD"); + break; + } + + return CL_INVALID_VALUE; +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clEnqueueAcquireExtObjects + * @{ + */ + +/*! \brief Acquire OpenCL memory objects that have been created from external + * objects (OpenGL, D3D). + * + * \param command_queue is a valid command-queue. + * + * \param num_objects is the number of memory objects to be acquired + * in \a mem_objects. + * + * \param mem_objects is a pointer to a list of CL memory objects that refer + * to a GL object (buffer/texture/renderbuffer objects or the framebuffer). + * + * \param event_wait_list specify [is a pointer to] events that need to + * complete before this particular command can be executed. + * If \a event_wait_list is NULL, then this particular command does not wait + * on any event to complete. If \a event_wait_list is NULL, + * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL, + * the list of events pointed to by \a event_wait_list must be valid and + * \a num_events_in_wait_list must be greater than 0. The events specified in + * \a event_wait_list act as synchronization points. + * + * \param num_events_in_wait_list specify the number of events in + * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be + * greater than 0 if \a event_wait_list is not NULL. + * + * \param event returns an event object that identifies this particular + * command and can be used to query or queue a wait for this particular + * command to complete. \a event can be NULL in which case it will not be + * possible for the application to query the status of this command or queue a + * wait for this command to complete. + * + * \return One of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the + * function does nothing. + * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a + * NULL value or if \a num_objects > 0 and \a mem_objects is NULL. + * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid + * OpenCL memory objects. + * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not + * created from an OpenGL clContext. + * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been + * created from a GL object(s). + * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and + * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and + * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list + * are not valid events. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources + * required by the OpenCL implementation on the host. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clEnqueueAcquireGLObjects, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_ACQUIRE_GL_OBJECTS); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clEnqueueReleaseGLObjects + * @{ + */ + +/*! \brief Release OpenCL memory objects that have been created from OpenGL + * objects. + * + * \param command_queue is a valid command-queue [which is associated with the + * OpenCL clContext releasing the OpenGL objects]. + * + * \param num_objects is the number of memory objects to be released + * in \a mem_objects. + * + * \param mem_objects is a pointer to a list of CL memory objects that refer + * to a GL object (buffer/texture/renderbuffer objects or the framebuffer). + * + * \param event_wait_list specify [is a pointer to] events that need to + * complete before this particular command can be executed. + * If \a event_wait_list is NULL, then this particular command does not wait + * on any event to complete. If \a event_wait_list is NULL, + * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL, + * the list of events pointed to by \a event_wait_list must be valid and + * \a num_events_in_wait_list must be greater than 0. The events specified in + * \a event_wait_list act as synchronization points. + * + * \param num_events_in_wait_list specify the number of events in + * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be + * greater than 0 if \a event_wait_list is not NULL. + * + * \param event returns an event object that identifies this particular + * command and can be used to query or queue a wait for this particular + * command to complete. \a event can be NULL in which case it will not be + * possible for the application to query the status of this command or queue a + * wait for this command to complete. + * + * \return One of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the + * function does nothing. + * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a + * NULL value or if \a num_objects > 0 and \a mem_objects is NULL. + * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid + * OpenCL memory objects. + * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not + * created from an OpenGL clContext. + * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been + * created from a GL object(s). + * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and + * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and + * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list + * are not valid events. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources + * required by the OpenCL implementation on the host. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clEnqueueReleaseGLObjects, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_RELEASE_GL_OBJECTS); +} +RUNTIME_EXIT + +/*! @} +* \addtogroup clCreateEventFromGLsyncKHR +* @{ +*/ + +/*! \brief Creates an event object linked to an OpenGL sync object. +* Completion of such an event object is equivalent to waiting for completion +* of the fence command associated with the linked GL sync object. +* +* \param context is valid OpenCL context created from an OpenGL context +* or share group, using the cl_khr_gl_sharing extension. +* +* \param sync is the 'name' of a sync object in the GL share group associated +* with context. +* +* \param errcode_ret Returns an appropriate error code as described below. +* If errcode_ret is NULL, no error code is returned. +* +* \return a valid OpenCL event object and errcode_ret is set to CL_SUCCESS +* if the event object is created successfully.Otherwise, it returns a NULL +* value with one of the following error values returned in errcode_ret: +* - CL_INVALID_CONTEXT if context is not a valid context or was not created +* from a GL context. +* - CL_INVALID_GL_OBJECT if sync is not the name of a sync object in the +* GL share group associated with context. +* +* \version 1.1 +*/ + +RUNTIME_ENTRY_RET(cl_event, clCreateEventFromGLsyncKHR, + (cl_context context, cl_GLsync clGLsync, cl_int* errcode_ret)) { + // create event of fence sync type + amd::ClGlEvent* clglEvent = new amd::ClGlEvent(*as_amd(context)); + clglEvent->context().glenv()->glFlush_(); + // initially set the status of fence as queued + clglEvent->setStatus(CL_SUBMITTED); + // store GLsync id of the fence in event in order to associate them together + clglEvent->setData(clGLsync); + amd::Event* evt = dynamic_cast(clglEvent); + evt->retain(); + return as_cl(evt); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clGetGLContextInfoKHR + * @{ + */ + +/*! \brief This f-n is defined in CL extension cl_khr_gl_sharing and serves + * the purpose of quering current device and all devices that support + * CL-GL interoperability. + * + * \param properties points to an , which is a array of + * ordered pairs terminated with zero. If an + * attribute is not specified in , then its default value + * (listed in table 4.attr) is used (it is said to be specified + * implicitly). If is NULL or empty (points to a list + * whose first value is zero), all attributes take on their default + * values. + * + * \param param_name may accept one of the following enumerated values: + * - CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 + * - CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type as + * described for \a param_name argumnet (GLenum or GLint). + * \a param_value_size_ret returns the actual size in bytes of data copied to + * \a param_value. If \a param_value_size_ret is NULL, it is ignored + * + * \param param_value is a pointer to memory where the appropriate result + * being queried is returned. If \a param_value is NULL, it is ignored. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type as + * described for \a param_name argumnet (GLenum or GLint). + * \a param_value_size_ret returns the actual size in bytes of data copied to + * \a param_value. If \a param_value_size_ret is NULL, it is ignored + * + * \return one of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the + * function does nothing. + * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a + * NULL value or if \a num_objects > 0 and \a mem_objects is NULL. + * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid + * OpenCL memory objects. + * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not + * created from an OpenGL clContext. + * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been + * created from a GL object(s). + * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and + * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and + * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list + * are not valid events. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources + * required by the OpenCL implementation on the host. + * - CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if + * + * \version 1.0r47 + */ +RUNTIME_ENTRY(cl_int, clGetGLContextInfoKHR, + (const cl_context_properties* properties, cl_gl_context_info param_name, + size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { + cl_int errcode=0; + cl_device_id* gpu_devices; + cl_uint num_gpu_devices = 0; + amd::Context::Info info; + static const bool VALIDATE_ONLY = true; + + errcode = amd::Context::checkProperties(properties, &info); + if (CL_SUCCESS != errcode) { + return errcode; + } + + if (!(info.flags_ & amd::Context::GLDeviceKhr)) { + // No GL context is specified + return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; + } + + // Get devices + //errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } + + if (!num_gpu_devices) { + return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; + } + + switch (param_name) { + case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR: + // Return the CL device currently associated with the specified OpenGL context. + if (num_gpu_devices) { + gpu_devices = (cl_device_id*)alloca(num_gpu_devices * sizeof(cl_device_id)); + + //errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); + if (errcode != CL_SUCCESS) { + return errcode; + } + + for (cl_uint i = 0; i < num_gpu_devices; ++i) { + cl_device_id device = gpu_devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, + VALIDATE_ONLY)) { + return amd::clGetInfo(device, param_value_size, param_value, param_value_size_ret); + } + } + + *not_null(param_value_size_ret) = 0; + } + break; + + case CL_DEVICES_FOR_GL_CONTEXT_KHR: { + // List of all CL devices that can be associated with the specified OpenGL context. + cl_uint total_devices = num_gpu_devices; + size_t size = total_devices * sizeof(cl_device_id); + + cl_device_id* devices = (cl_device_id*)alloca(size); + + //errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, total_devices, devices, NULL); + if (errcode != CL_SUCCESS) { + return errcode; + } + + std::vector compatible_devices; + + for (cl_uint i = 0; i < total_devices; ++i) { + cl_device_id device = devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, + VALIDATE_ONLY)) { + compatible_devices.push_back(as_amd(device)); + } + } + + size_t deviceCount = compatible_devices.size(); + size_t deviceCountSize = deviceCount * sizeof(cl_device_id); + + if (param_value != NULL && param_value_size < deviceCountSize) { + return CL_INVALID_VALUE; + } + + *not_null(param_value_size_ret) = deviceCountSize; + + if (param_value != NULL) { + cl_device_id* deviceList = (cl_device_id*)param_value; + for (const auto& it : compatible_devices) { + *deviceList++ = as_cl(it); + } + } + + return CL_SUCCESS; + } break; + + default: + LogWarning("\"param_name\" is not valid"); + return CL_INVALID_VALUE; + } + return CL_SUCCESS; +} +RUNTIME_EXIT + +// +// +// namespace amd +// +// +namespace amd { + +typedef struct { + GLenum glBinding; + GLenum glTarget; +} TargetBindings_t; + +/*! @} + * \addtogroup CL-GL interop helper functions + * @{ + */ + +//! Function clearGLErrors() to clear all GL error bits, if any +void clearGLErrors(const Context& amdContext) { + GLenum glErr, glLastErr = GL_NO_ERROR; + while (1) { + glErr = amdContext.glenv()->glGetError_(); + if (glErr == GL_NO_ERROR || glErr == glLastErr) { + break; + } + glLastErr = glErr; + LogWarning("GL error"); + } +} + +GLenum checkForGLError(const Context& amdContext) { + GLenum glRetErr = GL_NO_ERROR; + GLenum glErr; + while (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + glRetErr = glErr; // Just return the last GL error + LogWarning("Check GL error"); + } + return glRetErr; +} + +//! Function getCLFormatFromGL returns "true" if GL format +//! is compatible with CL format, "false" otherwise. +bool getCLFormatFromGL(const Context& amdContext, GLint gliInternalFormat, + cl_image_format* pclImageFormat, int* piBytesPerPixel, cl_mem_flags flags) { + bool bRetVal = false; + + /* + Available values for "image_channel_order" + ========================================== + CL_R + CL_A + CL_INTENSITY + CL_LUMINANCE + CL_RG + CL_RA + CL_RGB + CL_RGBA + CL_ARGB + CL_BGRA + + Available values for "image_channel_data_type" + ============================================== + CL_SNORM_INT8 + CL_SNORM_INT16 + CL_UNORM_INT8 + CL_UNORM_INT16 + CL_UNORM_SHORT_565 + CL_UNORM_SHORT_555 + CL_UNORM_INT_101010 + CL_SIGNED_INT8 + CL_SIGNED_INT16 + CL_SIGNED_INT32 + CL_UNSIGNED_INT8 + CL_UNSIGNED_INT16 + CL_UNSIGNED_INT32 + CL_HALF_FLOAT + CL_FLOAT + */ + + switch (gliInternalFormat) { + case GL_RGB10_EXT: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_UNORM_INT_101010; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RGB10_A2: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_UNORM_INT_101010; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_BGR8_ATI: + case GL_BGRA8_ATI: + pclImageFormat->image_channel_order = CL_BGRA; + pclImageFormat->image_channel_data_type = CL_UNORM_INT8; // CL_UNSIGNED_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_ALPHA8: + pclImageFormat->image_channel_order = CL_A; + pclImageFormat->image_channel_data_type = CL_UNORM_INT8; // CL_UNSIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; + + case GL_R8: + case GL_R8UI: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_R8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; + + case GL_R8I: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; + + case GL_RG8: + case GL_RG8UI: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RG8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_RG8I: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_RGB8: + case GL_RGB8UI: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGB8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 3; + bRetVal = true; + break; + + case GL_RGB8I: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 3; + bRetVal = true; + break; + + case GL_RGBA: + case GL_RGBA8: + case GL_RGBA8UI: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGBA8UI) ? CL_UNSIGNED_INT8 : CL_UNORM_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RGBA8I: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_R16: + case GL_R16UI: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_R16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + bRetVal = true; + *piBytesPerPixel = 2; + break; + + case GL_R16I: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_R16F: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_RG16: + case GL_RG16UI: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RG16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RG16I: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RG16F: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RGB16: + case GL_RGB16UI: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGB16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 6; + bRetVal = true; + break; + + case GL_RGB16I: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 6; + bRetVal = true; + break; + + case GL_RGB16F: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 6; + bRetVal = true; + break; + + case GL_RGBA16: + case GL_RGBA16UI: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGBA16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RGBA16I: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RGBA16F: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_R32I: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_R32UI: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_R32F: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RG32I: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RG32UI: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RG32F: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RGB32I: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 12; + bRetVal = true; + break; + + case GL_RGB32UI: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 12; + bRetVal = true; + break; + + case GL_RGB32F: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 12; + bRetVal = true; + break; + + case GL_RGBA32I: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 16; + bRetVal = true; + break; + + case GL_RGBA32UI: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 16; + bRetVal = true; + break; + + case GL_RGBA32F: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 16; + bRetVal = true; + break; + case GL_DEPTH_COMPONENT32F: + pclImageFormat->image_channel_order = CL_DEPTH; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; + case GL_DEPTH_COMPONENT16: + pclImageFormat->image_channel_order = CL_DEPTH; + pclImageFormat->image_channel_data_type = CL_UNORM_INT16; + *piBytesPerPixel = 2; + bRetVal = true; + break; + case GL_DEPTH24_STENCIL8: + pclImageFormat->image_channel_order = CL_DEPTH_STENCIL; + pclImageFormat->image_channel_data_type = CL_UNORM_INT24; + *piBytesPerPixel = 4; + bRetVal = true; + break; + case GL_DEPTH32F_STENCIL8: + pclImageFormat->image_channel_order = CL_DEPTH_STENCIL; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 5; + bRetVal = true; + break; + default: + LogWarning("unsupported GL internal format"); + break; + } + amd::Image::Format imageFormat(*pclImageFormat); + if (bRetVal && !imageFormat.isSupported(amdContext, 0, flags)) { + bRetVal = false; + } + return bRetVal; +} + +void BufferGL::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(BufferGL)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +static GLenum clChannelDataTypeToGlType(cl_channel_type channel_type) { + // Pick + // GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_INT, + // GL_UNSIGNED_INT, GL_FLOAT, GL_2_BYTES, GL_3_BYTES, GL_4_BYTES + // or GL_DOUBLE + switch (channel_type) { + case CL_SNORM_INT8: + return GL_BYTE; + case CL_SNORM_INT16: + return GL_SHORT; + case CL_UNORM_INT8: + return GL_UNSIGNED_BYTE; + case CL_UNORM_INT16: + return GL_UNSIGNED_SHORT; + case CL_SIGNED_INT8: + return GL_BYTE; + case CL_SIGNED_INT16: + return GL_SHORT; + case CL_SIGNED_INT32: + return GL_INT; + case CL_UNSIGNED_INT8: + return GL_UNSIGNED_BYTE; + case CL_UNSIGNED_INT16: + return GL_UNSIGNED_SHORT; + case CL_UNSIGNED_INT32: + return GL_UNSIGNED_INT; + case CL_FLOAT: + return GL_FLOAT; + case CL_UNORM_INT_101010: + return GL_UNSIGNED_INT_10_10_10_2; + case CL_HALF_FLOAT: + case CL_UNORM_SHORT_565: + case CL_UNORM_SHORT_555: + default: + guarantee(false && "Unexpected CL type."); + return 0; + } +} + +static GLenum glInternalFormatToGlFormat(GLenum internalFormat) { + switch (internalFormat) { + // Base internal formats + case GL_RGBA: + case GL_BGRA: + return internalFormat; + // Sized internal formats + case GL_RGBA8: + case GL_RGBA16: + case GL_RGBA16F: + case GL_RGBA32F: + return GL_RGBA; + case GL_RGBA8I: + case GL_RGBA8UI: + case GL_RGBA16I: + case GL_RGBA16UI: + case GL_RGBA32I: + case GL_RGBA32UI: + return GL_RGBA_INTEGER; + + default: + guarantee(false && "Unexpected GL internal format."); + return 0; + } +} + +void ImageGL::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(ImageGL)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +//******************************************************************* +// +// Internal implementation of CL API functions +// +//******************************************************************* + +// +// clCreateFromGLBufferAMD +// +cl_mem clCreateFromGLBufferAMD(Context& amdContext, cl_mem_flags flags, GLuint bufobj, + cl_int* errcode_ret) { + BufferGL* pBufferGL = NULL; + GLenum glErr; + GLenum glTarget = GL_ARRAY_BUFFER; + GLint gliSize = 0; + GLint gliMapped = 0; + + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return (cl_mem)0; + } + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); + } + + // Verify GL buffer object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsBuffer_(bufobj)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"bufobj\" is not a GL buffer object"); + return (cl_mem)0; + } + + // It seems that CL spec is not concerned with GL_BUFFER_USAGE, so skip it + + // Check if size is available - data store is created + + amdContext.glenv()->glBindBuffer_(glTarget, bufobj); + clearGLErrors(amdContext); + amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &gliSize); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("cannot get the GL buffer size"); + return (cl_mem)0; + } + if (gliSize == 0) { + //@todo - check why sometime the size is zero + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("the GL buffer's data store is not created"); + return (cl_mem)0; + } + + // Mapping will be done at acquire time (sync point) + + } // Release scoped lock + + // Now create BufferGL object + pBufferGL = new (amdContext) BufferGL(amdContext, flags, gliSize, 0, bufobj); + + if (!pBufferGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("cannot create object of class BufferGL"); + return (cl_mem)0; + } + + if (!pBufferGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pBufferGL->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + + // Create interop object + if (pBufferGL->getInteropObj() == NULL) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("cannot create object of class BufferGL"); + return (cl_mem)0; + } + + // Fixme: If more than one device is present in the context, we choose the first device. + // We should come up with a more elegant solution to handle this. + assert(amdContext.devices().size() == 1); + + const auto it = amdContext.devices().cbegin(); + const amd::Device& dev = *(*it); + + device::Memory* mem = pBufferGL->getDeviceMemory(dev); + if (NULL == mem) { + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", pBufferGL->getSize()); + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + return (cl_mem)0; + } + mem->processGLResource(device::Memory::GLDecompressResource); + + return as_cl(pBufferGL); +} + +cl_mem clCreateFromGLTextureAMD(Context& amdContext, cl_mem_flags clFlags, GLenum target, + GLint miplevel, GLuint texture, int* errcode_ret) { + ImageGL* pImageGL = NULL; + GLenum glErr; + GLenum glTarget = 0; + GLenum glInternalFormat; + cl_image_format clImageFormat; + uint dim = 1; + cl_mem_object_type clType; + cl_gl_object_type clGLType; + GLsizei numSamples = 1; + + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return static_cast(0); + } + + GLint gliTexWidth = 1; + GLint gliTexHeight = 1; + GLint gliTexDepth = 1; + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); + } + + // Verify GL texture object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsTexture_(texture)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"texture\" is not a GL texture object"); + return static_cast(0); + } + + bool image = true; + + // Check target value validity + switch (target) { + case GL_TEXTURE_BUFFER: + glTarget = GL_TEXTURE_BUFFER; + dim = 1; + clType = CL_MEM_OBJECT_IMAGE1D_BUFFER; + clGLType = CL_GL_OBJECT_TEXTURE_BUFFER; + image = false; + break; + + case GL_TEXTURE_1D: + glTarget = GL_TEXTURE_1D; + dim = 1; + clType = CL_MEM_OBJECT_IMAGE1D; + clGLType = CL_GL_OBJECT_TEXTURE1D; + break; + + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + glTarget = GL_TEXTURE_CUBE_MAP; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_1D_ARRAY: + glTarget = GL_TEXTURE_1D_ARRAY; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + clGLType = CL_GL_OBJECT_TEXTURE1D_ARRAY; + break; + + case GL_TEXTURE_2D: + glTarget = GL_TEXTURE_2D; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_2D_MULTISAMPLE: + glTarget = GL_TEXTURE_2D_MULTISAMPLE; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_RECTANGLE_ARB: + glTarget = GL_TEXTURE_RECTANGLE_ARB; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_2D_ARRAY: + glTarget = GL_TEXTURE_2D_ARRAY; + dim = 3; + clType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + clGLType = CL_GL_OBJECT_TEXTURE2D_ARRAY; + break; + + case GL_TEXTURE_3D: + glTarget = GL_TEXTURE_3D; + dim = 3; + clType = CL_MEM_OBJECT_IMAGE3D; + clGLType = CL_GL_OBJECT_TEXTURE3D; + break; + + default: + // wrong value + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid \"target\" value"); + return static_cast(0); + break; + } + + amdContext.glenv()->glBindTexture_(glTarget, texture); + + // Check if size is available - data store is created + if (image) { + // Check mipmap level for "texture" name + GLint gliTexBaseLevel; + GLint gliTexMaxLevel; + + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_BASE_LEVEL, &gliTexBaseLevel); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("Cannot get base mipmap level of a GL \"texture\" object"); + return static_cast(0); + } + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_MAX_LEVEL, &gliTexMaxLevel); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("Cannot get max mipmap level of a GL \"texture\" object"); + return static_cast(0); + } + if ((gliTexBaseLevel > miplevel) || (miplevel > gliTexMaxLevel)) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("\"miplevel\" is not a valid mipmap level of the GL \"texture\" object"); + return static_cast(0); + } + + // Get GL texture format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_INTERNAL_FORMAT, + (GLint*)&glInternalFormat); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_SAMPLES, + (GLint*)&numSamples); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get numbers of samples of GL \"texture\" object"); + return static_cast(0); + } + if (numSamples > 1) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("MSAA \"texture\" object is not suppoerted for the device"); + return static_cast(0); + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"texture\" format does not map to an appropriate CL image format"); + return static_cast(0); + } + + switch (dim) { + case 3: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_DEPTH, + &gliTexDepth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the depth of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + // Fall trough to process other dimensions... + case 2: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_HEIGHT, + &gliTexHeight); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the height of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + // Fall trough to process other dimensions... + case 1: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_WIDTH, + &gliTexWidth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the width of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + break; + default: + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid \"target\" value"); + return static_cast(0); + } + } else { + GLint size; + + // In case target is GL_TEXTURE_BUFFER + GLint backingBuffer; + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_( + glTarget, 0, GL_TEXTURE_BUFFER_DATA_STORE_BINDING, &backingBuffer); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get backing buffer for GL \"texture buffer\" object"); + return static_cast(0); + } + amdContext.glenv()->glBindBuffer_(glTarget, backingBuffer); + + // Get GL texture format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetIntegerv_(GL_TEXTURE_BUFFER_FORMAT_EXT, + reinterpret_cast(&glInternalFormat)); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"texture\" format does not map to an appropriate CL image format"); + return static_cast(0); + } + + clearGLErrors(amdContext); + amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &size); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + gliTexWidth = size / iBytesPerPixel; + } + size_t imageSize = (clType == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? static_cast(gliTexHeight) + : static_cast(gliTexDepth); + + if (!amd::Image::validateDimensions( + amdContext.devices(), clType, static_cast(gliTexWidth), + static_cast(gliTexHeight), static_cast(gliTexDepth), imageSize)) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"texture\" data store is not created or out of supported dimensions"); + return static_cast(0); + } + + // PBO and mapping will be done at "acquire" time (sync point) + + } // Release scoped lock + + target = (glTarget == GL_TEXTURE_CUBE_MAP) ? target : 0; + + pImageGL = new (amdContext) + ImageGL(amdContext, clType, clFlags, clImageFormat, static_cast(gliTexWidth), + static_cast(gliTexHeight), static_cast(gliTexDepth), glTarget, + texture, miplevel, glInternalFormat, clGLType, numSamples, target); + + if (!pImageGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("Cannot create class ImageGL - out of memory?"); + return static_cast(0); + } + + if (!pImageGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImageGL->release(); + return static_cast(0); + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImageGL); +} + +// +// clCreateFromGLRenderbufferDAMD +// +cl_mem clCreateFromGLRenderbufferAMD(Context& amdContext, cl_mem_flags clFlags, GLuint renderbuffer, + int* errcode_ret) { + ImageGL* pImageGL = NULL; + GLenum glErr; + + GLenum glTarget = GL_RENDERBUFFER; + GLenum glInternalFormat; + cl_image_format clImageFormat; + + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return (cl_mem)0; + } + + GLint gliRbWidth; + GLint gliRbHeight; + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); + } + + // Verify GL renderbuffer object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsRenderbufferEXT_(renderbuffer)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"renderbuffer\" is not a GL texture object"); + return (cl_mem)0; + } + + amdContext.glenv()->glBindRenderbuffer_(glTarget, renderbuffer); + + // Get GL RB format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_INTERNAL_FORMAT, + (GLint*)&glInternalFormat); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of GL \"renderbuffer\" object"); + return (cl_mem)0; + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"renderbuffer\" format does not map to an appropriate CL image format"); + return (cl_mem)0; + } + + // Check if size is available - data store is created + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_WIDTH, + &gliRbWidth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the width of GL \"renderbuffer\""); + return (cl_mem)0; + } + if (gliRbWidth == 0) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"renderbuffer\" data store is not created"); + return (cl_mem)0; + } + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_HEIGHT, + &gliRbHeight); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the height of GL \"renderbuffer\""); + return (cl_mem)0; + } + if (gliRbHeight == 0) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"renderbuffer\" data store is not created"); + return (cl_mem)0; + } + + // PBO and mapping will be done at "acquire" time (sync point) + + } // Release scoped lock + + pImageGL = + new (amdContext) ImageGL(amdContext, CL_MEM_OBJECT_IMAGE2D, clFlags, clImageFormat, + (size_t)gliRbWidth, (size_t)gliRbHeight, 1, glTarget, renderbuffer, + 0, glInternalFormat, CL_GL_OBJECT_RENDERBUFFER, 0); + + if (!pImageGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("Cannot create class ImageGL from renderbuffer - out of memory?"); + return (cl_mem)0; + } + + if (!pImageGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImageGL->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImageGL); +} + +// +// clEnqueueAcquireExtObjectsAMD +// + +static cl_int clSetInteropObjects(cl_uint num_objects, const cl_mem* mem_objects, + std::vector& interopObjects) { + if ((num_objects == 0 && mem_objects != NULL) || (num_objects != 0 && mem_objects == NULL)) { + return CL_INVALID_VALUE; + } + + while (num_objects-- > 0) { + cl_mem obj = *mem_objects++; + if (!is_valid(obj)) { + return CL_INVALID_MEM_OBJECT; + } + + amd::Memory* mem = as_amd(obj); + if (mem->getInteropObj() == NULL) { + return CL_INVALID_GL_OBJECT; + } + + interopObjects.push_back(mem); + } + return CL_SUCCESS; +} + +cl_int clEnqueueAcquireExtObjectsAMD(cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event, + cl_command_type cmd_type) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (cmd_type == CL_COMMAND_ACQUIRE_GL_OBJECTS) { + // Verify context init'ed for interop + if (!hostQueue.context().glenv() || !hostQueue.context().glenv()->isAssociated()) { + LogWarning("\"amdContext\" is not created from GL context or share list"); + return CL_INVALID_CONTEXT; + } + } + + std::vector memObjects; + cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects); + if (err != CL_SUCCESS) { + return err; + } + + amd::Command::EventWaitList eventWaitList; + err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + +#ifdef _WIN32 + if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0) { + //! Make sure D3D10 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR) { + SyncD3D10Objects(memObjects); + } + //! Make sure D3D11 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR) { + SyncD3D11Objects(memObjects); + } + //! Make sure D3D9 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR) { + SyncD3D9Objects(memObjects); + } + } +#endif //_WIN32 + + //! Now create command and enqueue + amd::AcquireExtObjectsCommand* command = new amd::AcquireExtObjectsCommand( + hostQueue, eventWaitList, num_objects, memObjects, cmd_type); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; +} + + +// +// clEnqueueReleaseExtObjectsAMD +// +cl_int clEnqueueReleaseExtObjectsAMD(cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event, + cl_command_type cmd_type) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + std::vector memObjects; + cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects); + if (err != CL_SUCCESS) { + return err; + } + + amd::Command::EventWaitList eventWaitList; + err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + + //! Now create command and enqueue + amd::ReleaseExtObjectsCommand* command = new amd::ReleaseExtObjectsCommand( + hostQueue, eventWaitList, num_objects, memObjects, cmd_type); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + +#ifdef _WIN32 + if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0) { + //! Make sure CL command queue is flushed and all commands are finished + //! before D3D10 side would access interop resources + if (cmd_type == CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR || + cmd_type == CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR || + cmd_type == CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR) { + command->awaitCompletion(); + } + } +#endif //_WIN32 + + *not_null(event) = as_cl(&command->event()); + + if (event == NULL) { + command->release(); + } + + return CL_SUCCESS; +} + +// Placed here as opposed to command.cpp, as glext.h and cl_gl_amd.hpp will have +// to be included because of the GL calls +bool ClGlEvent::waitForFence() { + GLenum ret; + // get fence id associated with fence event + GLsync gs = reinterpret_cast(command().data()); + if (!gs) return false; + +// Try to use DC and GLRC of current thread, if it doesn't exist +// create a new GL context on this thread, which is shared with the original context + +#ifdef _WIN32 + HDC tempDC_ = wglGetCurrentDC(); + HGLRC tempGLRC_ = wglGetCurrentContext(); + // Set DC and GLRC + if (tempDC_ && tempGLRC_) { + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + } else { + tempDC_ = context().glenv()->getDC(); + tempGLRC_ = context().glenv()->getIntGLRC(); + if (!context().glenv()->init(reinterpret_cast(tempDC_), + reinterpret_cast(tempGLRC_))) + return false; + + // Make the newly created GL context current to this thread + context().glenv()->setIntEnv(); + // If fence has not yet executed, wait till it finishes + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + // Since we're done making GL calls, restore whatever context was previously current to this + // thread + context().glenv()->restoreEnv(); + } +#else // Lnx + Display* tempDpy_ = context().glenv()->glXGetCurrentDisplay_(); + GLXDrawable tempDrawable_ = context().glenv()->glXGetCurrentDrawable_(); + GLXContext tempCtx_ = context().glenv()->glXGetCurrentContext_(); + // Set internal Display and GLXContext + if (tempDpy_ && tempCtx_) { + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + } else { + if (!context().glenv()->init(reinterpret_cast(context().glenv()->getIntDpy()), + reinterpret_cast(context().glenv()->getIntCtx()))) + return false; + + // Make the newly created GL context current to this thread + context().glenv()->setIntEnv(); + // If fence has not yet executed, wait till it finishes + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + // Since we're done making GL calls, restore whatever context was previously current to this + // thread + context().glenv()->restoreEnv(); + } +#endif + // If we reach this point, fence should have completed + setStatus(CL_COMPLETE); + return true; +} + +// +// GLFunctions implementation +// + +#ifdef _WIN32 +#define CONVERT_CHAR_GLUBYTE +#else //!_WIN32 +#define CONVERT_CHAR_GLUBYTE (GLubyte*) +#endif //!_WIN32 + +#define GLPREFIX(rtype, fcn, dclargs) \ + if (!(fcn##_ = (PFN_##fcn)GETPROCADDRESS(libHandle_, #fcn))) { \ + if (!(fcn##_ = (PFN_##fcn)GetProcAddress_(reinterpret_cast(#fcn)))) ++missed_; \ + } + +GLFunctions::SetIntEnv::SetIntEnv(GLFunctions* env) : env_(env) { + env_->getLock().lock(); + + // Set environment (DC and GLRC) + isValid_ = env_->setIntEnv(); +} + +GLFunctions::SetIntEnv::~SetIntEnv() { + // Restore environment (CL DC and CL GLRC) + env_->restoreEnv(); + + env_->getLock().unlock(); +} + +GLFunctions::GLFunctions(HMODULE h, bool isEGL) + : libHandle_(h), + missed_(0), + eglDisplay_(EGL_NO_DISPLAY), + eglOriginalContext_(EGL_NO_CONTEXT), + eglInternalContext_(EGL_NO_CONTEXT), + eglTempContext_(EGL_NO_CONTEXT), + isEGL_(isEGL), +#ifdef _WIN32 + hOrigGLRC_(0), + hDC_(0), + hIntGLRC_(0) +#else //!_WIN32 + Dpy_(0), + Drawable_(0), + origCtx_(0), + intDpy_(0), + intDrawable_(0), + intCtx_(0), + XOpenDisplay_(NULL), + XCloseDisplay_(NULL), + glXGetCurrentDrawable_(NULL), + glXGetCurrentDisplay_(NULL), + glXGetCurrentContext_(NULL), + glXChooseVisual_(NULL), + glXCreateContext_(NULL), + glXDestroyContext_(NULL), + glXMakeCurrent_(NULL) +#endif //!_WIN32 +{ +#define VERIFY_POINTER(p) \ + if (NULL == p) { \ + missed_++; \ + } + + if (isEGL_) { + GetProcAddress_ = (PFN_xxxGetProcAddress)GETPROCADDRESS(h, "eglGetProcAddress"); + } else { + GetProcAddress_ = (PFN_xxxGetProcAddress)GETPROCADDRESS(h, API_GETPROCADDR); + } +#ifndef _WIN32 + // Initialize pointers to X11/GLX functions + // We can not link with these functions on compile time since we need to support + // console mode. In console mode X server and X server components may be absent. + // Hence linking with X11 or libGL will fail module image loading in console mode.-tzachi cohen + + if (!isEGL_) { + glXGetCurrentDrawable_ = (PFNglXGetCurrentDrawable)GETPROCADDRESS(h, "glXGetCurrentDrawable"); + VERIFY_POINTER(glXGetCurrentDrawable_) + glXGetCurrentDisplay_ = (PFNglXGetCurrentDisplay)GETPROCADDRESS(h, "glXGetCurrentDisplay"); + VERIFY_POINTER(glXGetCurrentDisplay_) + glXGetCurrentContext_ = (PFNglXGetCurrentContext)GETPROCADDRESS(h, "glXGetCurrentContext"); + VERIFY_POINTER(glXGetCurrentContext_) + glXChooseVisual_ = (PFNglXChooseVisual)GETPROCADDRESS(h, "glXChooseVisual"); + VERIFY_POINTER(glXChooseVisual_) + glXCreateContext_ = (PFNglXCreateContext)GETPROCADDRESS(h, "glXCreateContext"); + VERIFY_POINTER(glXCreateContext_) + glXDestroyContext_ = (PFNglXDestroyContext)GETPROCADDRESS(h, "glXDestroyContext"); + VERIFY_POINTER(glXDestroyContext_) + glXMakeCurrent_ = (PFNglXMakeCurrent)GETPROCADDRESS(h, "glXMakeCurrent"); + VERIFY_POINTER(glXMakeCurrent_) + + HMODULE hXModule = (HMODULE)Os::loadLibrary("libX11.so.6"); + if (NULL != hXModule) { + XOpenDisplay_ = (PFNXOpenDisplay)GETPROCADDRESS(hXModule, "XOpenDisplay"); + VERIFY_POINTER(XOpenDisplay_) + XCloseDisplay_ = (PFNXCloseDisplay)GETPROCADDRESS(hXModule, "XCloseDisplay"); + VERIFY_POINTER(XCloseDisplay_) + } else { + missed_ += 2; + } + } +// Initialize pointers to GL functions +#include "gl_functions.hpp" +#else + if (!isEGL_) { + wglCreateContext_ = (PFN_wglCreateContext)GETPROCADDRESS(h, "wglCreateContext"); + VERIFY_POINTER(wglCreateContext_) + wglGetCurrentContext_ = (PFN_wglGetCurrentContext)GETPROCADDRESS(h, "wglGetCurrentContext"); + VERIFY_POINTER(wglGetCurrentContext_) + wglGetCurrentDC_ = (PFN_wglGetCurrentDC)GETPROCADDRESS(h, "wglGetCurrentDC"); + VERIFY_POINTER(wglGetCurrentDC_) + wglDeleteContext_ = (PFN_wglDeleteContext)GETPROCADDRESS(h, "wglDeleteContext"); + VERIFY_POINTER(wglDeleteContext_) + wglMakeCurrent_ = (PFN_wglMakeCurrent)GETPROCADDRESS(h, "wglMakeCurrent"); + VERIFY_POINTER(wglMakeCurrent_) + wglShareLists_ = (PFN_wglShareLists)GETPROCADDRESS(h, "wglShareLists"); + VERIFY_POINTER(wglShareLists_) + } +#endif +} + +GLFunctions::~GLFunctions() { +#ifdef _WIN32 + if (hIntGLRC_) { + if (!wglDeleteContext_(hIntGLRC_)) { + DWORD dwErr = GetLastError(); + LogWarning("Cannot delete GLRC"); + } + } +#else //!_WIN32 + if (intDpy_) { + if (intCtx_) { + glXDestroyContext_(intDpy_, intCtx_); + intCtx_ = NULL; + } + XCloseDisplay_(intDpy_); + intDpy_ = NULL; + } +#endif //!_WIN32 +} + +bool GLFunctions::init(intptr_t hdc, intptr_t hglrc) { + if (isEGL_) { + eglDisplay_ = (EGLDisplay)hdc; + eglOriginalContext_ = (EGLContext)hglrc; + return true; + } + +#ifdef _WIN32 + DWORD err; + + if (missed_) { + return false; + } + + if (!hdc) { + hDC_ = wglGetCurrentDC_(); + } else { + hDC_ = (HDC)hdc; + } + hOrigGLRC_ = (HGLRC)hglrc; + if (!(hIntGLRC_ = wglCreateContext_(hDC_))) { + err = GetLastError(); + return false; + } + if (!wglShareLists_(hOrigGLRC_, hIntGLRC_)) { + err = GetLastError(); + return false; + } + + bool makeCurrentNull = false; + + if (wglGetCurrentContext_() == NULL) { + wglMakeCurrent_(hDC_, hIntGLRC_); + + makeCurrentNull = true; + } + +// Initialize pointers to GL functions +#include "gl_functions.hpp" + + if (makeCurrentNull) { + wglMakeCurrent_(NULL, NULL); + } + + if (missed_ == 0) { + return true; + } +#else //!_WIN32 + if (!missed_) { + if (!hdc) { + Dpy_ = glXGetCurrentDisplay_(); + } else { + Dpy_ = (Display*)hdc; + } + Drawable_ = glXGetCurrentDrawable_(); + origCtx_ = (GLXContext)hglrc; + + int attribList[] = {GLX_RGBA, None}; + if (!(intDpy_ = XOpenDisplay_(DisplayString(Dpy_)))) { +#if defined(ATI_ARCH_X86) + asm("int $3"); +#endif + } + intDrawable_ = DefaultRootWindow(intDpy_); + + XVisualInfo* vis; + int defaultScreen = DefaultScreen(intDpy_); + if (!(vis = glXChooseVisual_(intDpy_, defaultScreen, attribList))) { + return false; + } + if (!(intCtx_ = glXCreateContext_(intDpy_, vis, origCtx_, true))) { + return false; + } + return true; + } +#endif //!_WIN32 + return false; +} + +bool GLFunctions::setIntEnv() { + if (isEGL_) { + return true; + } +#ifdef _WIN32 + // Save current DC and GLRC + tempDC_ = wglGetCurrentDC_(); + tempGLRC_ = wglGetCurrentContext_(); + // Set internal DC and GLRC + if (tempDC_ != getDC() || tempGLRC_ != getIntGLRC()) { + if (!wglMakeCurrent_(getDC(), getIntGLRC())) { + DWORD err = GetLastError(); + LogWarning("cannot set internal GL environment"); + return false; + } + } +#else //!_WIN32 + tempDpy_ = glXGetCurrentDisplay_(); + tempDrawable_ = glXGetCurrentDrawable_(); + tempCtx_ = glXGetCurrentContext_(); + // Set internal Display and GLXContext + if (tempDpy_ != getDpy() || tempCtx_ != getIntCtx()) { + if (!glXMakeCurrent_(getIntDpy(), getIntDrawable(), getIntCtx())) { + LogWarning("cannot set internal GL environment"); + return false; + } + } +#endif //!_WIN32 + + return true; +} + +bool GLFunctions::restoreEnv() { + if (isEGL_) { + // eglMakeCurrent( ); + return true; + } +#ifdef _WIN32 + // Restore original DC and GLRC + if (!wglMakeCurrent_(tempDC_, tempGLRC_)) { + DWORD err = GetLastError(); + LogWarning("cannot restore original GL environment"); + return false; + } +#else //!_WIN32 + // Restore Display and GLXContext + if (tempDpy_) { + if (!glXMakeCurrent_(tempDpy_, tempDrawable_, tempCtx_)) { + LogWarning("cannot restore original GL environment"); + return false; + } + } else { + // Just release internal context + if (!glXMakeCurrent_(getIntDpy(), None, NULL)) { + LogWarning("cannot reelase internal GL environment"); + return false; + } + } +#endif //!_WIN32 + + return true; +} + +} // namespace amd diff --git a/projects/hip/vdi/cl_gl_amd.hpp b/projects/hip/vdi/cl_gl_amd.hpp new file mode 100644 index 0000000000..36831fa747 --- /dev/null +++ b/projects/hip/vdi/cl_gl_amd.hpp @@ -0,0 +1,379 @@ +/* Copyright (c) 2010-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef CL_GL_AMD_HPP_ +#define CL_GL_AMD_HPP_ + +#ifdef _WIN32 +#include +#else //!_WIN32 +#include +#endif //!_WIN32 + +#include +#include +#include "CL/cl_gl.h" +#ifndef _WIN32 +#include +#endif //!_WIN32 + +#include +#include +#include + +#include "platform/context.hpp" +#include "platform/command.hpp" + +namespace amd +{ + +//! Class GLObject keeps all the info about the GL object +//! from which the CL object is created +class GLObject : public InteropObject +{ +protected: + cl_gl_object_type clGLType_; //!< CL GL object type + GLenum glTarget_; + GLuint gluiName_; + GLint gliMipLevel_; + GLenum glInternalFormat_; + GLint gliWidth_; + GLint gliHeight_; + GLint gliDepth_; + GLenum glCubemapFace_; + GLsizei glNumSamples_; + +public: +//! GLObject constructor initializes member variables + GLObject( + GLenum glTarget, + GLuint gluiName, + GLint gliMipLevel, + GLenum glInternalFormat, + GLint gliWidth, + GLint gliHeight, + GLint gliDepth, + cl_gl_object_type clGLType, + GLenum glCubemapFace, + GLsizei glNumSamples + ): // Initialization of member variables + clGLType_(clGLType), + glTarget_(glTarget), + gluiName_(gluiName), + gliMipLevel_(gliMipLevel), + glInternalFormat_(glInternalFormat), + gliWidth_(gliWidth), + gliHeight_(gliHeight), + gliDepth_(gliDepth), + glCubemapFace_(glCubemapFace), + glNumSamples_(glNumSamples) + { + } + + virtual ~GLObject() {} + virtual GLObject* asGLObject() {return this;} + +//! GLObject query functions to get GL info from member variables + GLenum getGLTarget() const {return glTarget_;} + GLuint getGLName() const {return gluiName_;} + GLint getGLMipLevel() const {return gliMipLevel_;} + GLenum getGLInternalFormat() const {return glInternalFormat_;} + GLint getGLSize() const {return gliWidth_;} + GLint getGLWidth() const {return gliWidth_;} + GLint getGLHeight() const {return gliHeight_;} + GLint getGLDepth() const {return gliDepth_;} + cl_gl_object_type getCLGLObjectType() const { return clGLType_; } + GLenum getCubemapFace() const {return glCubemapFace_;} + GLsizei getNumSamples() const { return glNumSamples_;} +}; + + +//! Class BufferGL is drived from classes Buffer and GLObject +//! where the former keeps all data for CL object and +//! the latter keeps all data for GL object +class BufferGL : public Buffer, public GLObject +{ +protected: + //! Initializes the device memory array which is nested + // after'BufferGL' object in memory layout. + virtual void initDeviceMemory(); +public: +//! BufferGL constructor just calls constructors of base classes +//! to pass down the parameters + BufferGL( + Context& amdContext, + cl_mem_flags clFlags, + size_t uiSizeInBytes, + GLenum glTarget, + GLuint gluiName) + : // Call base classes constructors + Buffer( + amdContext, + clFlags, + uiSizeInBytes + ), + GLObject( + glTarget, + gluiName, + 0, // Mipmap level default + GL_ARRAY_BUFFER, // Just init to some value + (GLint) uiSizeInBytes, + 1, + 1, + CL_GL_OBJECT_BUFFER, + 0, + 0 + ) + { + setInteropObj(this); + } + virtual ~BufferGL() {} + + virtual BufferGL* asBufferGL() { return this; } +}; + + +//! Class ImageGL is derived from classes Image and GLObject +//! where the former keeps all data for CL object and +//! the latter keeps all data for GL object +class ImageGL : public Image, public GLObject +{ +public: + //! ImageGL constructor just calls constructors of base classes + //! to pass down the parameters + ImageGL( + Context& amdContext, + cl_mem_object_type clType, + cl_mem_flags clFlags, + const Format& format, + size_t width, + size_t height, + size_t depth, + GLenum glTarget, + GLuint gluiName, + GLint gliMipLevel, + GLenum glInternalFormat, + cl_gl_object_type clGLType, + GLsizei numSamples, + GLenum glCubemapFace = 0) + : Image(amdContext, clType, clFlags, format, width, height, depth, + Format(format).getElementSize() * width, + Format(format).getElementSize() * width * depth) + , GLObject(glTarget, gluiName, gliMipLevel, glInternalFormat, + static_cast(width), static_cast(height), + static_cast(depth), clGLType, glCubemapFace,numSamples) + { + setInteropObj(this); + } + + virtual ~ImageGL() {} + +protected: + //! Initializes the device memory array which is nested + // after'BufferGL' object in memory layout. + virtual void initDeviceMemory(); +}; + +#ifdef _WIN32 +#define APICALL WINAPI +#define GETPROCADDRESS GetProcAddress +#define API_GETPROCADDR "wglGetProcAddress" +#define FCN_STR_TYPE LPCSTR + typedef PROC (WINAPI* PFN_xxxGetProcAddress) (LPCSTR fcnName); + typedef HGLRC (APICALL* PFN_wglCreateContext) (HDC hdc); + typedef HGLRC (APICALL* PFN_wglGetCurrentContext) (void); + typedef HDC (APICALL* PFN_wglGetCurrentDC) (void); + typedef BOOL (APICALL* PFN_wglDeleteContext) (HGLRC hglrc); + typedef BOOL (APICALL* PFN_wglMakeCurrent) (HDC hdc, HGLRC hglrc); + typedef BOOL (APICALL* PFN_wglShareLists) (HGLRC hglrc1, HGLRC hglrc2); +#else //!_WIN32 +#define APICALL // __stdcall //??? todo odintsov +#define API_GETPROCADDR "glXGetProcAddress" +#define GETPROCADDRESS dlsym +#define FCN_STR_TYPE const GLubyte* +#define WINAPI +#define PROC void* + typedef void* (*PFN_xxxGetProcAddress) (const GLubyte* procName); + // X11 typedef + typedef Display* (*PFNXOpenDisplay)(_Xconst char* display_name ); + typedef int (*PFNXCloseDisplay)(Display* display ); + + //glx typedefs + typedef GLXDrawable (*PFNglXGetCurrentDrawable)(); + typedef Display* (*PFNglXGetCurrentDisplay)(); + typedef GLXContext (*PFNglXGetCurrentContext)( void ); + typedef XVisualInfo* (*PFNglXChooseVisual)(Display *dpy, int screen, int *attribList); + typedef GLXContext(*PFNglXCreateContext)(Display* dpy,XVisualInfo* vis,GLXContext shareList,Bool direct); + typedef void(*PFNglXDestroyContext)(Display* dpy, GLXContext ctx); + typedef Bool(*PFNglXMakeCurrent)( Display* dpy, GLXDrawable drawable, GLXContext ctx); + typedef void* HMODULE; +#endif //!_WIN32 + +#define GLPREFIX(rtype, fcn, dclargs) \ + typedef rtype (APICALL* PFN_##fcn) dclargs; + +// Declare prototypes for GL functions +#include "gl_functions.hpp" + +class GLFunctions +{ +public: + //! Locks any access to the virtual GPUs + class SetIntEnv : public amd::StackObject { + public: + //! Default constructor + SetIntEnv(GLFunctions* env); + + //! Destructor + ~SetIntEnv(); + + //! Checks if the environment setup was successful + bool isValid() const { return isValid_; } + + private: + GLFunctions* env_; //!< GL environment + bool isValid_; //!< If TRUE, then it's a valid setup + }; + +private: + HMODULE libHandle_; + int missed_; // Indicates how many GL functions not init'ed, if any + + amd::Monitor lock_; + + EGLDisplay eglDisplay_; + EGLContext eglOriginalContext_; + EGLContext eglInternalContext_; + EGLContext eglTempContext_; + bool isEGL_; + +#ifdef _WIN32 + HGLRC hOrigGLRC_; + HDC hDC_; + HGLRC hIntGLRC_; // handle for internal GLRC to access shared context + HDC tempDC_; + HGLRC tempGLRC_; + + PFN_wglCreateContext wglCreateContext_; + PFN_wglGetCurrentContext wglGetCurrentContext_; + PFN_wglGetCurrentDC wglGetCurrentDC_; + PFN_wglDeleteContext wglDeleteContext_; + PFN_wglMakeCurrent wglMakeCurrent_; + PFN_wglShareLists wglShareLists_; +#else +public: + Display* Dpy_; + GLXDrawable Drawable_; + GLXContext origCtx_; + Display* intDpy_; + Window intDrawable_; + GLXContext intCtx_; + Display* tempDpy_; + GLXDrawable tempDrawable_; + GLXContext tempCtx_; + + //pointers to X11 functions + PFNXOpenDisplay XOpenDisplay_; + PFNXCloseDisplay XCloseDisplay_; + + //pointers to GLX functions + PFNglXGetCurrentDrawable glXGetCurrentDrawable_; + PFNglXGetCurrentDisplay glXGetCurrentDisplay_; + PFNglXGetCurrentContext glXGetCurrentContext_; + PFNglXChooseVisual glXChooseVisual_; + PFNglXCreateContext glXCreateContext_; + PFNglXDestroyContext glXDestroyContext_; + PFNglXMakeCurrent glXMakeCurrent_; +#endif +public: + + GLFunctions(HMODULE h, bool isEGL); + ~GLFunctions(); + + // Query CL-GL context association + bool isAssociated() const + { + if (isEGL_ && eglDisplay_ && eglOriginalContext_) return true; +#ifdef _WIN32 + if(hDC_ && hOrigGLRC_) return true; +#else //!_WIN32 + if(Dpy_ && origCtx_) return true; +#endif //!_WIN32 + return false; + } + bool isEGL() const + { + return isEGL_; + } + // Accessor methods +#ifdef _WIN32 + HGLRC getOrigGLRC() const {return hOrigGLRC_;} + HDC getDC() const {return hDC_;} + HGLRC getIntGLRC() const {return hIntGLRC_;} +#else //!_WIN32 + Display* getDpy() const {return Dpy_;} + GLXDrawable getDrawable() const {return Drawable_;} + GLXContext getOrigCtx() const {return origCtx_;} + + Display* getIntDpy() const {return intDpy_;} + GLXDrawable getIntDrawable() const {return intDrawable_;} + GLXContext getIntCtx() const {return intCtx_;} + + EGLDisplay getEglDpy() const { return eglDisplay_; } + EGLContext getEglOrigCtx() const { return eglOriginalContext_; } +#endif //!_WIN32 + + // Initialize GL dynamic library and function pointers + bool init(intptr_t hdc, intptr_t hglrc); + + // Return true if successful, false - if error occurred + bool setIntEnv(); + bool restoreEnv(); + + amd::Monitor& getLock() { return lock_; } + + PFN_xxxGetProcAddress GetProcAddress_; + +#define GLPREFIX(rtype, fcn, dclargs) \ + PFN_##fcn fcn##_; +// Declare pointers to GL functions +#include "gl_functions.hpp" +}; + +//! Functions for executing the GL related stuff +cl_mem clCreateFromGLBufferAMD(Context& amdContext, cl_mem_flags flags, + GLuint bufobj, cl_int* errcode_ret); +cl_mem clCreateFromGLTextureAMD(Context& amdContext, cl_mem_flags flags, + GLenum target, GLint miplevel, GLuint texture, int* errcode_ret); +cl_mem clCreateFromGLRenderbufferAMD(Context& amdContext, cl_mem_flags flags, + GLuint renderbuffer, int* errcode_ret); + +bool +getCLFormatFromGL( + const Context& amdContext, + GLint gliInternalFormat, + cl_image_format* pclImageFormat, + int* piBytesPerPixel, + cl_mem_flags flags +); + +} //namespace amd + +#endif //CL_GL_AMD_HPP_ diff --git a/projects/hip/vdi/cl_lqdflash_amd.cpp b/projects/hip/vdi/cl_lqdflash_amd.cpp new file mode 100644 index 0000000000..b7bea3db2b --- /dev/null +++ b/projects/hip/vdi/cl_lqdflash_amd.cpp @@ -0,0 +1,310 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "cl_common.hpp" +#include + +#include "platform/object.hpp" + +#include "cl_lqdflash_amd.h" + +#if (!defined(BUILD_HSA_TARGET) && defined(WITH_HSA_DEVICE) && \ + defined(WITH_AMDGPU_PRO)) || defined(_WIN32) || defined(WITH_PAL_DEVICE) +#define WITH_LIQUID_FLASH 1 +#endif // _WIN32 + +#if defined(WITH_LIQUID_FLASH) +#include "lf.h" +#include +#include +#endif // WITH_LIQUID_FLASH + +namespace amd { + +LiquidFlashFile::~LiquidFlashFile() { close(); } + +bool LiquidFlashFile::open() { +#if defined WITH_LIQUID_FLASH + lf_status err; + lf_file_flags flags = 0; + + switch (flags_) { + case CL_FILE_READ_ONLY_AMD: + flags = LF_READ; + break; + case CL_FILE_WRITE_ONLY_AMD: + flags = LF_WRITE; + break; + case CL_FILE_READ_WRITE_AMD: + flags = LF_READ | LF_WRITE; + break; + } +#ifdef ATI_OS_LINUX + assert(sizeof(wchar_t) != sizeof(lf_char)); + std::string name_char; + std::wstring_convert, wchar_t> cv; + name_char = cv.to_bytes(name_); + handle_ = lfOpenFile(name_char.c_str(), flags, &err); +#else + handle_ = lfOpenFile(name_.c_str(), flags, &err); +#endif + + if (err != lf_success) { + return false; + } + + if (lfGetFileBlockSize((lf_file)handle_, &blockSize_) != lf_success) { + return false; + } + + if (lfGetFileSize((lf_file)handle_, &fileSize_) != lf_success) { + return false; + } + return true; +#else + return false; +#endif // WITH_LIQUID_FLASH +} + +void LiquidFlashFile::close() { +#if defined WITH_LIQUID_FLASH + if (handle_ != NULL) { + lfReleaseFile((lf_file)handle_); + handle_ = NULL; + } +#endif // WITH_LIQUID_FLASH +} + +bool LiquidFlashFile::transferBlock(bool writeBuffer, void* srcDst, uint64_t bufferSize, + uint64_t fileOffset, uint64_t bufferOffset, + uint64_t size) const { +#if defined WITH_LIQUID_FLASH + lf_status status; + + lf_region_descriptor region = {fileOffset / blockSize(), bufferOffset / blockSize(), + size / blockSize()}; + if (writeBuffer) { + status = lfReadFile(srcDst, bufferSize, (lf_file)handle_, 1, ®ion, NULL); + } else { + status = lfWriteFile(srcDst, bufferSize, (lf_file)handle_, 1, ®ion, NULL); + } + if (lf_success == status) { + return true; + } else { + return false; + } +#else + return false; +#endif // WITH_LIQUID_FLASH +} + +} // namespace amd + +/*! \addtogroup API + * @{ + * + * \addtogroup AMD_Extensions + * @{ + * + */ + +RUNTIME_ENTRY_RET(cl_file_amd, clCreateSsgFileObjectAMD, + (cl_context context, cl_file_flags_amd flags, const wchar_t* file_name, + cl_int* errcode_ret)) { +#if defined WITH_LIQUID_FLASH && defined ATI_OS_LINUX + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return (cl_file_amd)0; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + for (auto& dev : devices) { + if (lf_success == lfCheckExtensionSupportForDevice(dev->info().pcieDeviceId_, + dev->info().pcieRevisionId_)) { + supportPass = true; + break; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_DEVICE; + LogWarning("SSG isn't supported"); + return (cl_file_amd)0; + } +#endif + amd::LiquidFlashFile* file = new amd::LiquidFlashFile(file_name, flags); + + if (file == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_file_amd)0; + } + + if (!file->open()) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + delete file; + return (cl_file_amd)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(file); +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clGetSsgFileObjectInfoAMD, + (cl_file_amd file, cl_file_info_amd param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + + switch (param_name) { + case CL_FILE_BLOCK_SIZE_AMD: { + cl_uint blockSize = as_amd(file)->blockSize(); + return amd::clGetInfo(blockSize, param_value_size, param_value, param_value_size_ret); + } + case CL_FILE_SIZE_AMD: { + cl_ulong fileSize = as_amd(file)->fileSize(); + return amd::clGetInfo(fileSize, param_value_size, param_value, param_value_size_ret); + } + default: + break; + } + + return CL_INVALID_VALUE; +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clRetainSsgFileObjectAMD, (cl_file_amd file)) { + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + as_amd(file)->retain(); + return CL_SUCCESS; +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clReleaseSsgFileObjectAMD, (cl_file_amd file)) { + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + as_amd(file)->release(); + return CL_SUCCESS; +} +RUNTIME_EXIT + +static cl_int EnqueueTransferBufferFromSsgFileAMD( + cl_bool isWrite, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t buffer_offset, size_t cb, cl_file_amd file, size_t file_offset, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid(buffer)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* pBuffer = as_amd(buffer)->asBuffer(); + if (pBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } + + if (pBuffer->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { + return CL_INVALID_OPERATION; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != pBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } + + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + + amd::LiquidFlashFile* amdFile = as_amd(file); + amd::Coord3D bufferOffset(buffer_offset, 0, 0); + amd::Coord3D bufferSize(cb, 1, 1); + + if ((!pBuffer->validateRegion(bufferOffset, bufferSize)) || + // LF library supports aligned sizes only + ((buffer_offset % amdFile->blockSize()) != 0) || ((cb % amdFile->blockSize()) != 0) || + ((file_offset % amdFile->blockSize()) != 0)) { + return CL_INVALID_VALUE; + } + + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + + amd::TransferBufferFileCommand* command; + command = new amd::TransferBufferFileCommand( + isWrite ? CL_COMMAND_READ_SSG_FILE_AMD : CL_COMMAND_WRITE_SSG_FILE_AMD, hostQueue, + eventWaitList, *pBuffer, bufferOffset, bufferSize, amdFile, file_offset); + + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + if (blocking_write) { + command->awaitCompletion(); + } + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; +} + +RUNTIME_ENTRY(cl_int, clEnqueueReadSsgFileAMD, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t buffer_offset, size_t cb, cl_file_amd file, size_t file_offset, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return EnqueueTransferBufferFromSsgFileAMD(CL_TRUE, command_queue, buffer, blocking_write, + buffer_offset, cb, file, file_offset, + num_events_in_wait_list, event_wait_list, event); +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clEnqueueWriteSsgFileAMD, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t buffer_offset, size_t cb, cl_file_amd file, size_t file_offset, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return EnqueueTransferBufferFromSsgFileAMD(CL_FALSE, command_queue, buffer, blocking_write, + buffer_offset, cb, file, file_offset, + num_events_in_wait_list, event_wait_list, event); +} +RUNTIME_EXIT diff --git a/projects/hip/vdi/cl_lqdflash_amd.h b/projects/hip/vdi/cl_lqdflash_amd.h new file mode 100644 index 0000000000..5a3e725b4c --- /dev/null +++ b/projects/hip/vdi/cl_lqdflash_amd.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef __CL_LQDFLASH_AMD_H +#define __CL_LQDFLASH_AMD_H + +#include "CL/cl_ext.h" + +#ifdef __cplusplus +extern "C" { +#endif /*__cplusplus*/ + +extern CL_API_ENTRY cl_file_amd CL_API_CALL +clCreateSsgFileObjectAMD(cl_context context, cl_file_flags_amd flags, const wchar_t* file_name, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL clGetSsgFileObjectInfoAMD( + cl_file_amd file, cl_file_info_amd param_name, size_t param_value_size, void* param_value, + size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL clRetainSsgFileObjectAMD(cl_file_amd file) + CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL clReleaseSsgFileObjectAMD(cl_file_amd file) + CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadSsgFileAMD( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t buffer_offset, + size_t cb, cl_file_amd file, size_t file_offset, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteSsgFileAMD( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t buffer_offset, + size_t cb, cl_file_amd file, size_t file_offset, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} /*extern "C"*/ +#endif /*__cplusplus*/ + +#endif diff --git a/projects/hip/vdi/fixme.cpp b/projects/hip/vdi/fixme.cpp new file mode 100644 index 0000000000..90f034f63e --- /dev/null +++ b/projects/hip/vdi/fixme.cpp @@ -0,0 +1,32 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "vdi_common.hpp" +#include + +cl_icd_dispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0}; +amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_}; + +RUNTIME_ENTRY(cl_int, clGetDeviceIDs, + (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + return CL_SUCCESS; +} +RUNTIME_EXIT diff --git a/projects/hip/vdi/hip_activity.cpp b/projects/hip/vdi/hip_activity.cpp new file mode 100644 index 0000000000..d3ce84bfd9 --- /dev/null +++ b/projects/hip/vdi/hip_activity.cpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "platform/activity.hpp" + +extern "C" void hipInitActivityCallback(void* id_callback, void* op_callback, void* arg) { + activity_prof::CallbacksTable::init(reinterpret_cast(id_callback), + reinterpret_cast(op_callback), + arg); +} + +extern "C" bool hipEnableActivityCallback(unsigned op, bool enable) { + return activity_prof::CallbacksTable::SetEnabled(op, enable); +} + +extern "C" const char* hipGetCmdName(unsigned op) { + return getOclCommandKindString(static_cast(op)); +} diff --git a/projects/hip/vdi/hip_context.cpp b/projects/hip/vdi/hip_context.cpp new file mode 100644 index 0000000000..440c3f4b47 --- /dev/null +++ b/projects/hip/vdi/hip_context.cpp @@ -0,0 +1,373 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "platform/runtime.hpp" +#include "utils/flags.hpp" +#include "utils/versions.hpp" + +std::vector g_devices; + +namespace hip { + +thread_local Device* g_device = nullptr; +thread_local std::stack g_ctxtStack; +thread_local hipError_t g_lastError = hipSuccess; +std::once_flag g_ihipInitialized; +Device* host_device = nullptr; + +void init() { + if (!amd::Runtime::initialized()) { + amd::IS_HIP = true; + GPU_NUM_MEM_DEPENDENCY = 0; + amd::Runtime::init(); + } + + const std::vector& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false); + + for (unsigned int i=0; i device(1, devices[i]); + amd::Context* context = new amd::Context(device, amd::Context::Info()); + if (!context) return; + + // Enable active wait on the device by default + devices[i]->SetActiveWait(true); + + if (context && CL_SUCCESS != context->create(nullptr)) { + context->release(); + } else { + g_devices.push_back(new Device(context, i)); + } + } + + amd::Context* hContext = new amd::Context(devices, amd::Context::Info()); + if (!hContext) return; + + if (CL_SUCCESS != hContext->create(nullptr)) { + hContext->release(); + } + host_device = new Device(hContext, -1); + + PlatformState::instance().init(); +} + +Device* getCurrentDevice() { + return g_device; +} + +void setCurrentDevice(unsigned int index) { + assert(index(stream); + if ((s->flags & hipStreamNonBlocking) == 0) { + getNullStream()->finish(); + } + return s->asHostQueue(); + } +} + +amd::HostQueue* getNullStream(amd::Context& ctx) { + for (auto& it : g_devices) { + if (it->asContext() == &ctx) { + return it->defaultStream(); + } + } + return nullptr; +} + +amd::HostQueue* getNullStream() { + Device* device = getCurrentDevice(); + return device ? device->defaultStream() : nullptr; +} + +}; + +using namespace hip; + +hipError_t hipInit(unsigned int flags) { + HIP_INIT_API(hipInit, flags); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { + HIP_INIT_API(hipCtxCreate, ctx, flags, device); + + if (static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidValue); + } + + *ctx = reinterpret_cast(g_devices[device]); + + // Increment ref count for device primary context + g_devices[device]->retain(); + g_ctxtStack.push(g_devices[device]); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxSetCurrent(hipCtx_t ctx) { + HIP_INIT_API(hipCtxSetCurrent, ctx); + + if (ctx == nullptr) { + if(!g_ctxtStack.empty()) { + g_ctxtStack.pop(); + } + } else { + hip::g_device = reinterpret_cast(ctx); + if(!g_ctxtStack.empty()) { + g_ctxtStack.pop(); + } + g_ctxtStack.push(hip::getCurrentDevice()); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { + HIP_INIT_API(hipCtxGetCurrent, ctx); + + *ctx = reinterpret_cast(hip::getCurrentDevice()); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { + HIP_INIT_API(hipCtxGetSharedMemConfig, pConfig); + + *pConfig = hipSharedMemBankSizeFourByte; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipRuntimeGetVersion(int *runtimeVersion) { + HIP_INIT_API(hipRuntimeGetVersion, runtimeVersion); + + if (!runtimeVersion) { + HIP_RETURN(hipErrorInvalidValue); + } + + *runtimeVersion = AMD_PLATFORM_BUILD_NUMBER; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxDestroy(hipCtx_t ctx) { + HIP_INIT_API(hipCtxDestroy, ctx); + + hip::Device* dev = reinterpret_cast(ctx); + if (dev == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Release last tracked command + hip::getNullStream()->setLastQueuedCommand(nullptr); + + // Need to remove the ctx of calling thread if its the top one + if (!g_ctxtStack.empty() && g_ctxtStack.top() == dev) { + g_ctxtStack.pop(); + } + + // Remove context from global context list + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == dev) { + // Decrement ref count for device primary context + dev->release(); + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { + HIP_INIT_API(hipCtxPopCurrent, ctx); + + hip::Device** dev = reinterpret_cast(ctx); + if (dev == nullptr) { + HIP_RETURN(hipErrorInvalidContext); + } + + if (!g_ctxtStack.empty()) { + *dev = g_ctxtStack.top(); + g_ctxtStack.pop(); + } else { + HIP_RETURN(hipErrorInvalidContext); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxPushCurrent(hipCtx_t ctx) { + HIP_INIT_API(hipCtxPushCurrent, ctx); + + hip::Device* dev = reinterpret_cast(ctx); + if (dev == nullptr) { + HIP_RETURN(hipErrorInvalidContext); + } + + hip::g_device = dev; + g_ctxtStack.push(hip::getCurrentDevice()); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDriverGetVersion(int* driverVersion) { + HIP_INIT_API(hipDriverGetVersion, driverVersion); + + auto* deviceHandle = g_devices[0]->devices()[0]; + const auto& info = deviceHandle->info(); + + if (driverVersion) { + *driverVersion = AMD_PLATFORM_BUILD_NUMBER * 100 + + AMD_PLATFORM_REVISION_NUMBER; + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetDevice(hipDevice_t* device) { + HIP_INIT_API(hipCtxGetDevice, device); + + if (device != nullptr) { + *device = hip::getCurrentDevice()->deviceId(); + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipErrorInvalidContext); +} + +hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { + HIP_INIT_API(hipCtxGetApiVersion, apiVersion); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { + HIP_INIT_API(hipCtxGetCacheConfig, cacheConfig); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { + HIP_INIT_API(hipCtxSetCacheConfig, cacheConfig); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { + HIP_INIT_API(hipCtxSetSharedMemConfig, config); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSynchronize(void) { + HIP_INIT_API(hipCtxSynchronize, 1); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxGetFlags(unsigned int* flags) { + HIP_INIT_API(hipCtxGetFlags, flags); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) { + HIP_INIT_API(hipDevicePrimaryCtxGetState, dev, flags, active); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (flags != nullptr) { + *flags = 0; + } + + if (active != nullptr) { + *active = (g_devices[dev] == hip::getCurrentDevice())? 1 : 0; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxRelease, dev); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxRetain, pctx, dev); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + if (pctx == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pctx = reinterpret_cast(g_devices[dev]); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxReset, dev); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { + HIP_INIT_API(hipDevicePrimaryCtxSetFlags, dev, flags); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } else { + HIP_RETURN(hipErrorContextAlreadyInUse); + } +} diff --git a/projects/hip/vdi/hip_conversions.hpp b/projects/hip/vdi/hip_conversions.hpp new file mode 100644 index 0000000000..2a78617ad9 --- /dev/null +++ b/projects/hip/vdi/hip_conversions.hpp @@ -0,0 +1,903 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +namespace hip +{ +inline +cl_channel_type getCLChannelType(const hipArray_Format hipFormat, + const hipTextureReadMode hipReadMode) { + if (hipReadMode == hipReadModeElementType) { + switch (hipFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + return CL_UNSIGNED_INT8; + case HIP_AD_FORMAT_SIGNED_INT8: + return CL_SIGNED_INT8; + case HIP_AD_FORMAT_UNSIGNED_INT16: + return CL_UNSIGNED_INT16; + case HIP_AD_FORMAT_SIGNED_INT16: + return CL_SIGNED_INT16; + case HIP_AD_FORMAT_UNSIGNED_INT32: + return CL_UNSIGNED_INT32; + case HIP_AD_FORMAT_SIGNED_INT32: + return CL_SIGNED_INT32; + case HIP_AD_FORMAT_HALF: + return CL_HALF_FLOAT; + case HIP_AD_FORMAT_FLOAT: + return CL_FLOAT; + } + } else if (hipReadMode == hipReadModeNormalizedFloat) { + switch (hipFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + return CL_UNORM_INT8; + case HIP_AD_FORMAT_SIGNED_INT8: + return CL_SNORM_INT8; + case HIP_AD_FORMAT_UNSIGNED_INT16: + return CL_UNORM_INT16; + case HIP_AD_FORMAT_SIGNED_INT16: + return CL_SNORM_INT16; + case HIP_AD_FORMAT_UNSIGNED_INT32: + return CL_UNSIGNED_INT32; + case HIP_AD_FORMAT_SIGNED_INT32: + return CL_SIGNED_INT32; + case HIP_AD_FORMAT_HALF: + return CL_HALF_FLOAT; + case HIP_AD_FORMAT_FLOAT: + return CL_FLOAT; + } + } + + ShouldNotReachHere(); + + return {}; +} + +inline +cl_channel_order getCLChannelOrder(const unsigned int hipNumChannels, + const int sRGB) { + switch (hipNumChannels) { + case 1: + return CL_R; + case 2: + return CL_RG; + case 4: + return (sRGB == 1) ? CL_sRGBA : CL_RGBA; + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +cl_mem_object_type getCLMemObjectType(const unsigned int hipWidth, + const unsigned int hipHeight, + const unsigned int hipDepth, + const unsigned int flags) { + if (flags == hipArrayDefault) { + if ((hipWidth != 0) && (hipHeight == 0) && (hipDepth == 0)) { + return CL_MEM_OBJECT_IMAGE1D; + } else if ((hipWidth != 0) && (hipHeight != 0) && (hipDepth == 0)) { + return CL_MEM_OBJECT_IMAGE2D; + } else if ((hipWidth != 0) && (hipHeight != 0) && (hipDepth != 0)) { + return CL_MEM_OBJECT_IMAGE3D; + } + } else if (flags == hipArrayLayered) { + if ((hipWidth != 0) && (hipHeight == 0) && (hipDepth != 0)) { + return CL_MEM_OBJECT_IMAGE1D_ARRAY; + } else if ((hipWidth != 0) && (hipHeight != 0) && (hipDepth != 0)) { + return CL_MEM_OBJECT_IMAGE2D_ARRAY; + } + } + + ShouldNotReachHere(); + + return {}; +} + +inline +cl_addressing_mode getCLAddressingMode(const hipTextureAddressMode hipAddressMode) { + switch (hipAddressMode) { + case hipAddressModeWrap: + return CL_ADDRESS_REPEAT; + case hipAddressModeClamp: + return CL_ADDRESS_CLAMP; + case hipAddressModeMirror: + return CL_ADDRESS_MIRRORED_REPEAT; + case hipAddressModeBorder: + return CL_ADDRESS_CLAMP_TO_EDGE; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +cl_filter_mode getCLFilterMode(const hipTextureFilterMode hipFilterMode) { + switch (hipFilterMode) { + case hipFilterModePoint: + return CL_FILTER_NEAREST; + case hipFilterModeLinear: + return CL_FILTER_LINEAR; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +cl_mem_object_type getCLMemObjectType(const hipResourceType hipResType) { + switch (hipResType) { + case hipResourceTypeLinear: + return CL_MEM_OBJECT_IMAGE1D_BUFFER; + case hipResourceTypePitch2D: + return CL_MEM_OBJECT_IMAGE2D; + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +size_t getElementSize(const hipArray_const_t array) { + switch (array->Format) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + case HIP_AD_FORMAT_SIGNED_INT8: + return 1 * array->NumChannels; + case HIP_AD_FORMAT_UNSIGNED_INT16: + case HIP_AD_FORMAT_SIGNED_INT16: + case HIP_AD_FORMAT_HALF: + return 2 * array->NumChannels; + case HIP_AD_FORMAT_UNSIGNED_INT32: + case HIP_AD_FORMAT_SIGNED_INT32: + case HIP_AD_FORMAT_FLOAT: + return 4 * array->NumChannels; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +hipChannelFormatDesc getChannelFormatDesc(int numChannels, + hipArray_Format arrayFormat) { + switch (arrayFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + switch (numChannels) { + case 1: + return {8, 0, 0, 0, hipChannelFormatKindUnsigned}; + case 2: + return {8, 8, 0, 0, hipChannelFormatKindUnsigned}; + case 4: + return {8, 8, 8, 8, hipChannelFormatKindUnsigned}; + } + case HIP_AD_FORMAT_SIGNED_INT8: + switch (numChannels) { + case 1: + return {8, 0, 0, 0, hipChannelFormatKindSigned}; + case 2: + return {8, 8, 0, 0, hipChannelFormatKindSigned}; + case 4: + return {8, 8, 8, 8, hipChannelFormatKindSigned}; + } + case HIP_AD_FORMAT_UNSIGNED_INT16: + switch (numChannels) { + case 1: + return {16, 0, 0, 0, hipChannelFormatKindUnsigned}; + case 2: + return {16, 16, 0, 0, hipChannelFormatKindUnsigned}; + case 4: + return {16, 16, 16, 16, hipChannelFormatKindUnsigned}; + } + case HIP_AD_FORMAT_SIGNED_INT16: + switch (numChannels) { + case 1: + return {16, 0, 0, 0, hipChannelFormatKindSigned}; + case 2: + return {16, 16, 0, 0, hipChannelFormatKindSigned}; + case 4: + return {16, 16, 16, 16, hipChannelFormatKindSigned}; + } + case HIP_AD_FORMAT_UNSIGNED_INT32: + switch (numChannels) { + case 1: + return {32, 0, 0, 0, hipChannelFormatKindUnsigned}; + case 2: + return {32, 32, 0, 0, hipChannelFormatKindUnsigned}; + case 4: + return {32, 32, 32, 32, hipChannelFormatKindUnsigned}; + } + case HIP_AD_FORMAT_SIGNED_INT32: + switch (numChannels) { + case 1: + return {32, 0, 0, 0, hipChannelFormatKindSigned}; + case 2: + return {32, 32, 0, 0, hipChannelFormatKindSigned}; + case 4: + return {32, 32, 32, 32, hipChannelFormatKindSigned}; + } + case HIP_AD_FORMAT_HALF: + switch (numChannels) { + case 1: + return {16, 0, 0, 0, hipChannelFormatKindFloat}; + case 2: + return {16, 16, 0, 0, hipChannelFormatKindFloat}; + case 4: + return {16, 16, 16, 16, hipChannelFormatKindFloat}; + } + case HIP_AD_FORMAT_FLOAT: + switch (numChannels) { + case 1: + return {32, 0, 0, 0, hipChannelFormatKindFloat}; + case 2: + return {32, 32, 0, 0, hipChannelFormatKindFloat}; + case 4: + return {32, 32, 32, 32, hipChannelFormatKindFloat}; + } + } + + ShouldNotReachHere(); + + return {}; +} + +inline +unsigned int getNumChannels(const hipChannelFormatDesc& desc) { + return ((desc.x != 0) + (desc.y != 0) + (desc.z != 0) + (desc.w != 0)); +} + +inline +hipArray_Format getArrayFormat(const hipChannelFormatDesc& desc) { + switch (desc.f) { + case hipChannelFormatKindUnsigned: + switch (desc.x) { + case 8: + return HIP_AD_FORMAT_UNSIGNED_INT8; + case 16: + return HIP_AD_FORMAT_UNSIGNED_INT16; + case 32: + return HIP_AD_FORMAT_UNSIGNED_INT32; + } + case hipChannelFormatKindSigned: + switch (desc.x) { + case 8: + return HIP_AD_FORMAT_SIGNED_INT8; + case 16: + return HIP_AD_FORMAT_SIGNED_INT16; + case 32: + return HIP_AD_FORMAT_SIGNED_INT32; + } + case hipChannelFormatKindFloat: + switch (desc.x) { + case 16: + return HIP_AD_FORMAT_HALF; + case 32: + return HIP_AD_FORMAT_FLOAT; + } + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +int getNumChannels(const hipResourceViewFormat hipFormat) { + switch (hipFormat) { + case hipResViewFormatUnsignedChar1: + case hipResViewFormatSignedChar1: + case hipResViewFormatUnsignedShort1: + case hipResViewFormatSignedShort1: + case hipResViewFormatUnsignedInt1: + case hipResViewFormatSignedInt1: + case hipResViewFormatHalf1: + case hipResViewFormatFloat1: + return 1; + case hipResViewFormatUnsignedChar2: + case hipResViewFormatSignedChar2: + case hipResViewFormatUnsignedShort2: + case hipResViewFormatSignedShort2: + case hipResViewFormatUnsignedInt2: + case hipResViewFormatSignedInt2: + case hipResViewFormatHalf2: + case hipResViewFormatFloat2: + return 2; + case hipResViewFormatUnsignedChar4: + case hipResViewFormatSignedChar4: + case hipResViewFormatUnsignedShort4: + case hipResViewFormatSignedShort4: + case hipResViewFormatUnsignedInt4: + case hipResViewFormatSignedInt4: + case hipResViewFormatHalf4: + case hipResViewFormatFloat4: + return 4; + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +hipArray_Format getArrayFormat(const hipResourceViewFormat hipFormat) { + switch (hipFormat) { + case hipResViewFormatUnsignedChar1: + case hipResViewFormatUnsignedChar2: + case hipResViewFormatUnsignedChar4: + return HIP_AD_FORMAT_UNSIGNED_INT8; + case hipResViewFormatSignedChar1: + case hipResViewFormatSignedChar2: + case hipResViewFormatSignedChar4: + return HIP_AD_FORMAT_SIGNED_INT8; + case hipResViewFormatUnsignedShort1: + case hipResViewFormatUnsignedShort2: + case hipResViewFormatUnsignedShort4: + return HIP_AD_FORMAT_UNSIGNED_INT16; + case hipResViewFormatSignedShort1: + case hipResViewFormatSignedShort2: + case hipResViewFormatSignedShort4: + return HIP_AD_FORMAT_SIGNED_INT16; + case hipResViewFormatUnsignedInt1: + case hipResViewFormatUnsignedInt2: + case hipResViewFormatUnsignedInt4: + return HIP_AD_FORMAT_UNSIGNED_INT32; + case hipResViewFormatSignedInt1: + case hipResViewFormatSignedInt2: + case hipResViewFormatSignedInt4: + return HIP_AD_FORMAT_SIGNED_INT32; + case hipResViewFormatHalf1: + case hipResViewFormatHalf2: + case hipResViewFormatHalf4: + return HIP_AD_FORMAT_HALF; + case hipResViewFormatFloat1: + case hipResViewFormatFloat2: + case hipResViewFormatFloat4: + return HIP_AD_FORMAT_FLOAT; + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +hipResourceViewFormat getResourceViewFormat(const hipChannelFormatDesc& desc) { + switch (desc.f) { + case hipChannelFormatKindUnsigned: + switch (getNumChannels(desc)) { + case 1: + switch (desc.x) { + case 8: + return hipResViewFormatUnsignedChar1; + case 16: + return hipResViewFormatUnsignedShort1; + case 32: + return hipResViewFormatUnsignedInt1; + } + case 2: + switch (desc.x) { + case 8: + return hipResViewFormatUnsignedChar2; + case 16: + return hipResViewFormatUnsignedShort2; + case 32: + return hipResViewFormatUnsignedInt2; + } + case 4: + switch (desc.x) { + case 8: + return hipResViewFormatUnsignedChar4; + case 16: + return hipResViewFormatUnsignedShort4; + case 32: + return hipResViewFormatUnsignedInt4; + } + } + case hipChannelFormatKindSigned: + switch (getNumChannels(desc)) { + case 1: + switch (desc.x) { + case 8: + return hipResViewFormatSignedChar1; + case 16: + return hipResViewFormatSignedShort1; + case 32: + return hipResViewFormatSignedInt1; + } + case 2: + switch (desc.x) { + case 8: + return hipResViewFormatSignedChar2; + case 16: + return hipResViewFormatSignedShort2; + case 32: + return hipResViewFormatSignedInt2; + } + case 4: + switch (desc.x) { + case 8: + return hipResViewFormatSignedChar4; + case 16: + return hipResViewFormatSignedShort4; + case 32: + return hipResViewFormatSignedInt4; + } + } + case hipChannelFormatKindFloat: + switch (getNumChannels(desc)) { + case 1: + switch (desc.x) { + case 16: + return hipResViewFormatHalf1; + case 32: + return hipResViewFormatFloat1; + } + case 2: + switch (desc.x) { + case 16: + return hipResViewFormatHalf2; + case 32: + return hipResViewFormatFloat2; + } + case 4: + switch (desc.x) { + case 16: + return hipResViewFormatHalf4; + case 32: + return hipResViewFormatFloat4; + } + } + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +hipTextureDesc getTextureDesc(const textureReference* texRef) { + hipTextureDesc texDesc = {}; + std::memcpy(texDesc.addressMode, texRef->addressMode, sizeof(texDesc.addressMode)); + texDesc.filterMode = texRef->filterMode; + texDesc.readMode = texRef->readMode; + texDesc.sRGB = texRef->sRGB; + texDesc.normalizedCoords = texRef->normalized; + texDesc.maxAnisotropy = texRef->maxAnisotropy; + texDesc.mipmapFilterMode = texRef->mipmapFilterMode; + texDesc.mipmapLevelBias = texRef->mipmapLevelBias; + texDesc.minMipmapLevelClamp = texRef->minMipmapLevelClamp; + texDesc.maxMipmapLevelClamp = texRef->maxMipmapLevelClamp; + + return texDesc; +} + +inline +hipResourceViewDesc getResourceViewDesc(hipArray_const_t array, + const hipResourceViewFormat format) { + hipResourceViewDesc resViewDesc = {}; + resViewDesc.format = format; + resViewDesc.width = array->width; + resViewDesc.height = array->height; + resViewDesc.depth = array->depth; + resViewDesc.firstMipmapLevel = 0; + resViewDesc.lastMipmapLevel = 0; + resViewDesc.firstLayer = 0; + resViewDesc.lastLayer = 0; /* TODO add hipArray::numLayers */ + + return resViewDesc; +} + +inline +hipResourceViewDesc getResourceViewDesc(hipMipmappedArray_const_t array, + const hipResourceViewFormat format) { + hipResourceViewDesc resViewDesc = {}; + resViewDesc.format = format; + resViewDesc.width = array->width; + resViewDesc.height = array->height; + resViewDesc.depth = array->depth; + resViewDesc.firstMipmapLevel = 0; + resViewDesc.lastMipmapLevel = 0; /* TODO add hipMipmappedArray::numMipLevels */ + resViewDesc.firstLayer = 0; + resViewDesc.lastLayer = 0; /* TODO add hipArray::numLayers */ + + return resViewDesc; +} + +inline +std::pair getMemoryType(const hipMemcpyKind kind) { + switch (kind) { + case hipMemcpyHostToHost: + return {hipMemoryTypeHost, hipMemoryTypeHost}; + case hipMemcpyHostToDevice: + return {hipMemoryTypeHost, hipMemoryTypeDevice}; + case hipMemcpyDeviceToHost: + return {hipMemoryTypeDevice, hipMemoryTypeHost}; + case hipMemcpyDeviceToDevice: + return {hipMemoryTypeDevice, hipMemoryTypeDevice}; + case hipMemcpyDefault: + return {hipMemoryTypeUnified, hipMemoryTypeUnified}; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +HIP_MEMCPY3D getDrvMemcpy3DDesc(const hip_Memcpy2D& desc2D) { + HIP_MEMCPY3D desc3D = {}; + + desc3D.srcXInBytes = desc2D.srcXInBytes; + desc3D.srcY = desc2D.srcY; + desc3D.srcZ = 0; + desc3D.srcLOD = 0; + desc3D.srcMemoryType = desc2D.srcMemoryType; + desc3D.srcHost = desc2D.srcHost; + desc3D.srcDevice = desc2D.srcDevice; + desc3D.srcArray = desc2D.srcArray; + desc3D.srcPitch = desc2D.srcPitch; + desc3D.srcHeight = 0; + + desc3D.dstXInBytes = desc2D.dstXInBytes; + desc3D.dstY = desc2D.dstY; + desc3D.dstZ = 0; + desc3D.dstLOD = 0; + desc3D.dstMemoryType = desc2D.dstMemoryType; + desc3D.dstHost = desc2D.dstHost; + desc3D.dstDevice = desc2D.dstDevice; + desc3D.dstArray = desc2D.dstArray; + desc3D.dstPitch = desc2D.dstPitch; + desc3D.dstHeight = 0; + + desc3D.WidthInBytes = desc2D.WidthInBytes; + desc3D.Height = desc2D.Height; + desc3D.Depth = 0; + + return desc3D; +} + +inline +HIP_MEMCPY3D getDrvMemcpy3DDesc(const hipMemcpy3DParms& desc) { + HIP_MEMCPY3D descDrv = {}; + + descDrv.WidthInBytes = desc.extent.width; + descDrv.Height = desc.extent.height; + descDrv.Depth = desc.extent.depth; + + descDrv.srcXInBytes = desc.srcPos.x; + descDrv.srcY = desc.srcPos.y; + descDrv.srcZ = desc.srcPos.z; + descDrv.srcLOD = 0; + + descDrv.dstXInBytes = desc.dstPos.x; + descDrv.dstY = desc.dstPos.y; + descDrv.dstZ = desc.dstPos.z; + descDrv.dstLOD = 0; + + if (desc.srcArray != nullptr) { + descDrv.srcMemoryType = hipMemoryTypeArray; + descDrv.srcArray = desc.srcArray; + // When reffering to array memory, hipPos::x is in elements. + descDrv.srcXInBytes *= getElementSize(desc.srcArray); + } + + if (desc.srcPtr.ptr != nullptr) { + descDrv.srcMemoryType = std::get<0>(hip::getMemoryType(desc.kind)); + descDrv.srcHost = desc.srcPtr.ptr; + descDrv.srcDevice = desc.srcPtr.ptr; + descDrv.srcPitch = desc.srcPtr.pitch; + descDrv.srcHeight = desc.srcPtr.ysize; + } + + if (desc.dstArray != nullptr) { + descDrv.dstMemoryType = hipMemoryTypeArray; + descDrv.dstArray = desc.dstArray; + // When reffering to array memory, hipPos::x is in elements. + descDrv.dstXInBytes *= getElementSize(desc.dstArray); + } + + if (desc.dstPtr.ptr != nullptr) { + descDrv.dstMemoryType = std::get<1>(getMemoryType(desc.kind)); + descDrv.dstHost = desc.dstPtr.ptr; + descDrv.dstDevice = desc.dstPtr.ptr; + descDrv.dstPitch = desc.dstPtr.pitch; + descDrv.dstHeight = desc.dstPtr.ysize; + } + + // If a HIP array is participating in the copy, the extent is defined in terms of that array's elements. + if ((desc.srcArray != nullptr) && (desc.dstArray == nullptr)) { + descDrv.WidthInBytes *= getElementSize(desc.srcArray); + } else if ((desc.srcArray == nullptr) && (desc.dstArray != nullptr)) { + descDrv.WidthInBytes *= getElementSize(desc.dstArray); + } else if ((desc.srcArray != nullptr) && (desc.dstArray != nullptr)) { + descDrv.WidthInBytes *= getElementSize(desc.dstArray); + } + + return descDrv; +} + +inline +hipResourceType getResourceType(const HIPresourcetype resType) { + // These two enums should be isomorphic. + return static_cast(resType); +} + +inline +HIPresourcetype getResourceType(const hipResourceType resType) { + // These two enums should be isomorphic. + return static_cast(resType); +} + +inline +hipResourceDesc getResourceDesc(const HIP_RESOURCE_DESC& resDesc) { + hipResourceDesc desc; + + desc.resType = getResourceType(resDesc.resType); + switch (desc.resType) { + case hipResourceTypeArray: + desc.res.array.array = resDesc.res.array.hArray; + break; + case hipResourceTypeMipmappedArray: + desc.res.mipmap.mipmap = resDesc.res.mipmap.hMipmappedArray; + break; + case hipResourceTypeLinear: + desc.res.linear.devPtr = resDesc.res.linear.devPtr; + desc.res.linear.desc = getChannelFormatDesc(resDesc.res.linear.numChannels, resDesc.res.linear.format); + desc.res.linear.sizeInBytes = resDesc.res.linear.sizeInBytes; + break; + case hipResourceTypePitch2D: + desc.res.pitch2D.devPtr = resDesc.res.pitch2D.devPtr; + desc.res.pitch2D.desc = getChannelFormatDesc(resDesc.res.pitch2D.numChannels, resDesc.res.pitch2D.format); + desc.res.pitch2D.width = resDesc.res.pitch2D.width; + desc.res.pitch2D.height = resDesc.res.pitch2D.height; + desc.res.pitch2D.pitchInBytes = resDesc.res.pitch2D.pitchInBytes; + break; + default: + break; + } + + return desc; +} + +inline +HIP_RESOURCE_DESC getResourceDesc(const hipResourceDesc& resDesc) { + HIP_RESOURCE_DESC desc; + + desc.resType = getResourceType(resDesc.resType); + switch (desc.resType) { + case HIP_RESOURCE_TYPE_ARRAY: + desc.res.array.hArray = resDesc.res.array.array; + break; + case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY: + desc.res.mipmap.hMipmappedArray = resDesc.res.mipmap.mipmap; + break; + case HIP_RESOURCE_TYPE_LINEAR: + desc.res.linear.devPtr = resDesc.res.linear.devPtr; + desc.res.linear.numChannels = getNumChannels(resDesc.res.linear.desc); + desc.res.linear.format = getArrayFormat(resDesc.res.linear.desc); + desc.res.linear.sizeInBytes = resDesc.res.linear.sizeInBytes; + break; + case HIP_RESOURCE_TYPE_PITCH2D: + desc.res.pitch2D.devPtr = resDesc.res.pitch2D.devPtr; + desc.res.pitch2D.numChannels = getNumChannels(resDesc.res.pitch2D.desc); + desc.res.pitch2D.format = getArrayFormat(resDesc.res.pitch2D.desc); + desc.res.pitch2D.width = resDesc.res.pitch2D.width; + desc.res.pitch2D.height = resDesc.res.pitch2D.height; + desc.res.pitch2D.pitchInBytes = resDesc.res.pitch2D.pitchInBytes; + break; + default: + break; + } + + return desc; +} + +inline +hipTextureAddressMode getAddressMode(const HIPaddress_mode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +HIPaddress_mode getAddressMode(const hipTextureAddressMode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +hipTextureFilterMode getFilterMode(const HIPfilter_mode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +HIPfilter_mode getFilterMode(const hipTextureFilterMode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +hipTextureReadMode getReadMode(const unsigned int flags) { + if (flags & HIP_TRSF_READ_AS_INTEGER) { + return hipReadModeElementType; + } else { + return hipReadModeNormalizedFloat; + } +} + +inline +unsigned int getReadMode(const hipTextureReadMode mode) { + if (mode == hipReadModeElementType) { + return HIP_TRSF_READ_AS_INTEGER; + } else { + return 0; + } +} + +inline +int getsRGB(const unsigned int flags) { + if (flags & HIP_TRSF_SRGB) { + return 1; + } else { + return 0; + } +} + +inline +unsigned int getsRGB(const int sRGB) { + if (sRGB == 1) { + return HIP_TRSF_SRGB; + } else { + return 0; + } +} + +inline +int getNormalizedCoords(const unsigned int flags) { + if (flags & HIP_TRSF_NORMALIZED_COORDINATES) { + return 1; + } else { + return 0; + } +} + +inline +unsigned int getNormalizedCoords(const int normalizedCoords) { + if (normalizedCoords == 1) { + return HIP_TRSF_NORMALIZED_COORDINATES; + } else { + return 0; + } +} + +inline +hipTextureDesc getTextureDesc(const HIP_TEXTURE_DESC& texDesc) { + hipTextureDesc desc; + + desc.addressMode[0] = getAddressMode(texDesc.addressMode[0]); + desc.addressMode[1] = getAddressMode(texDesc.addressMode[1]); + desc.addressMode[2] = getAddressMode(texDesc.addressMode[2]); + desc.filterMode = getFilterMode(texDesc.filterMode); + desc.readMode = getReadMode(texDesc.flags); + desc.sRGB = getsRGB(texDesc.flags); + std::memcpy(desc.borderColor, texDesc.borderColor, sizeof(desc.borderColor)); + desc.normalizedCoords = getNormalizedCoords(texDesc.flags); + desc.maxAnisotropy = texDesc.maxAnisotropy; + desc.mipmapFilterMode = getFilterMode(texDesc.mipmapFilterMode); + desc.mipmapLevelBias = texDesc.mipmapLevelBias; + desc.minMipmapLevelClamp = texDesc.minMipmapLevelClamp; + desc.maxMipmapLevelClamp = texDesc.maxMipmapLevelClamp; + + return desc; +} + +inline +HIP_TEXTURE_DESC getTextureDesc(const hipTextureDesc& texDesc) { + HIP_TEXTURE_DESC desc; + + desc.addressMode[0] = getAddressMode(texDesc.addressMode[0]); + desc.addressMode[1] = getAddressMode(texDesc.addressMode[1]); + desc.addressMode[2] = getAddressMode(texDesc.addressMode[2]); + desc.filterMode = getFilterMode(texDesc.filterMode); + desc.flags = 0; + desc.flags |= getReadMode(texDesc.readMode); + desc.flags |= getsRGB(texDesc.sRGB); + desc.flags |= getNormalizedCoords(texDesc.normalizedCoords); + desc.maxAnisotropy = texDesc.maxAnisotropy; + desc.mipmapFilterMode = getFilterMode(texDesc.mipmapFilterMode); + desc.mipmapLevelBias = texDesc.mipmapLevelBias; + desc.minMipmapLevelClamp = texDesc.minMipmapLevelClamp; + desc.maxMipmapLevelClamp = texDesc.maxMipmapLevelClamp; + std::memcpy(desc.borderColor, texDesc.borderColor, sizeof(desc.borderColor)); + + return desc; +} + +inline +hipResourceViewFormat getResourceViewFormat(const HIPresourceViewFormat format) { + // These two enums should be isomorphic. + return static_cast(format); +} + +inline +HIPresourceViewFormat getResourceViewFormat(const hipResourceViewFormat format) { + // These two enums should be isomorphic. + return static_cast(format); +} + +inline +hipResourceViewDesc getResourceViewDesc(const HIP_RESOURCE_VIEW_DESC& resViewDesc) { + hipResourceViewDesc desc; + + desc.format = getResourceViewFormat(resViewDesc.format); + desc.width = resViewDesc.width; + desc.height = resViewDesc.height; + desc.depth = resViewDesc.depth; + desc.firstMipmapLevel = resViewDesc.firstMipmapLevel; + desc.lastMipmapLevel = resViewDesc.lastMipmapLevel; + desc.firstLayer = resViewDesc.firstLayer; + desc.lastLayer = resViewDesc.lastLayer; + + return desc; +} + +inline +HIP_RESOURCE_VIEW_DESC getResourceViewDesc(const hipResourceViewDesc& resViewDesc) { + HIP_RESOURCE_VIEW_DESC desc; + + desc.format = getResourceViewFormat(resViewDesc.format); + desc.width = resViewDesc.width; + desc.height = resViewDesc.height; + desc.depth = resViewDesc.depth; + desc.firstMipmapLevel = resViewDesc.firstMipmapLevel; + desc.lastMipmapLevel = resViewDesc.lastMipmapLevel; + desc.firstLayer = resViewDesc.firstLayer; + desc.lastLayer = resViewDesc.lastLayer; + + return desc; +} + +inline +size_t getElementSize(const hipChannelFormatDesc &desc) { + return (desc.x / 4) * getNumChannels(desc); +} +}; diff --git a/projects/hip/vdi/hip_device.cpp b/projects/hip/vdi/hip_device.cpp new file mode 100644 index 0000000000..80e247f37c --- /dev/null +++ b/projects/hip/vdi/hip_device.cpp @@ -0,0 +1,256 @@ +/* Copyright (c) 2018-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +namespace hip { + +amd::HostQueue* Device::defaultStream() { + if (defaultStream_ == nullptr) { + const cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + defaultStream_ = new amd::HostQueue(*asContext(), *devices()[0], properties, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if ((defaultStream_ == nullptr) || + !defaultStream_->create()) { + return nullptr; + } + } + return defaultStream_; +} + +}; + +hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { + HIP_INIT_API(hipDeviceGet, device, deviceId); + + if (device != nullptr) { + *device = deviceId; + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +}; + +hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) { + + HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig); + + // No way to set cache config yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { + + HIP_INIT_API(hipDeviceTotalMem, bytes, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (bytes == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + + *bytes = info.globalMemSize_; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { + + HIP_INIT_API(hipDeviceComputeCapability, major, minor, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (major == nullptr || minor == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + *major = info.gfxipVersion_ / 100; + *minor = info.gfxipVersion_ % 100; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetCount(int* count) { + HIP_INIT_API(NONE, count); + + HIP_RETURN(ihipDeviceGetCount(count)); +} + +hipError_t ihipDeviceGetCount(int* count) { + if (count == nullptr) { + return hipErrorInvalidValue; + } + + // Get all available devices + *count = g_devices.size(); + + if (*count < 1) { + return hipErrorNoDevice; + } + + return hipSuccess; +} + +hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { + + HIP_INIT_API(hipDeviceGetName, (void*)name, len, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (name == nullptr || len <= 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + const auto nameLen = ::strlen(info.boardName_); + + // Make sure that the size of `dest` is big enough to hold `src` including + // trailing zero byte + if (nameLen > (cl_uint)(len - 1)) { + HIP_RETURN(hipErrorInvalidValue); + } + + ::strncpy(name, info.boardName_, (nameLen + 1)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) { + HIP_INIT_API(hipGetDeviceProperties, props, device); + + if (props == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (unsigned(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + auto* deviceHandle = g_devices[device]->devices()[0]; + + hipDeviceProp_t deviceProps = {0}; + + const auto& info = deviceHandle->info(); + ::strncpy(deviceProps.name, info.boardName_, 128); + deviceProps.totalGlobalMem = info.globalMemSize_; + deviceProps.sharedMemPerBlock = info.localMemSizePerCU_; + deviceProps.regsPerBlock = info.availableSGPRs_; + deviceProps.warpSize = info.wavefrontWidth_; + deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_; + deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; + deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; + deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; + deviceProps.maxGridSize[0] = INT32_MAX; + deviceProps.maxGridSize[1] = INT32_MAX; + deviceProps.maxGridSize[2] = INT32_MAX; + deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000; + deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000; + deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; + deviceProps.totalConstMem = info.maxConstantBufferSize_; + deviceProps.major = info.gfxipVersion_ / 100; + deviceProps.minor = info.gfxipVersion_ % 100; + deviceProps.multiProcessorCount = info.maxComputeUnits_; + deviceProps.l2CacheSize = info.l2CacheSize_; + deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_; + deviceProps.computeMode = 0; + deviceProps.clockInstructionRate = info.timeStampFrequency_; + deviceProps.arch.hasGlobalInt32Atomics = 1; + deviceProps.arch.hasGlobalFloatAtomicExch = 1; + deviceProps.arch.hasSharedInt32Atomics = 1; + deviceProps.arch.hasSharedFloatAtomicExch = 1; + deviceProps.arch.hasFloatAtomicAdd = 0; + deviceProps.arch.hasGlobalInt64Atomics = 1; + deviceProps.arch.hasSharedInt64Atomics = 1; + deviceProps.arch.hasDoubles = 1; + deviceProps.arch.hasWarpVote = 0; + deviceProps.arch.hasWarpBallot = 0; + deviceProps.arch.hasWarpShuffle = 0; + deviceProps.arch.hasFunnelShift = 0; + deviceProps.arch.hasThreadFenceSystem = 1; + deviceProps.arch.hasSyncThreadsExt = 0; + deviceProps.arch.hasSurfaceFuncs = 0; + deviceProps.arch.has3dGrid = 1; + deviceProps.arch.hasDynamicParallelism = 0; + deviceProps.concurrentKernels = 1; + deviceProps.pciDomainID = info.deviceTopology_.pcie.function; + deviceProps.pciBusID = info.deviceTopology_.pcie.bus; + deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; + deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; + //deviceProps.isMultiGpuBoard = info.; + deviceProps.canMapHostMemory = 1; + deviceProps.gcnArch = info.gfxipVersion_; + deviceProps.cooperativeLaunch = info.cooperativeGroups_; + deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_; + + deviceProps.cooperativeMultiDeviceUnmatchedFunc = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedGridDim = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_; + + deviceProps.maxTexture1D = info.imageMaxBufferSize_; + deviceProps.maxTexture2D[0] = info.image2DMaxWidth_; + deviceProps.maxTexture2D[1] = info.image2DMaxHeight_; + deviceProps.maxTexture3D[0] = info.image3DMaxWidth_; + deviceProps.maxTexture3D[1] = info.image3DMaxHeight_; + deviceProps.maxTexture3D[2] = info.image3DMaxDepth_; + deviceProps.hdpMemFlushCntl = nullptr; + deviceProps.hdpRegFlushCntl = nullptr; + + deviceProps.memPitch = info.maxMemAllocSize_; + deviceProps.textureAlignment = info.imageBaseAddressAlignment_; + deviceProps.texturePitchAlignment = info.imagePitchAlignment_; + deviceProps.kernelExecTimeoutEnabled = 0; + deviceProps.ECCEnabled = info.errorCorrectionSupport_? 1:0; + + *props = deviceProps; + HIP_RETURN(hipSuccess); +} + +hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { + HIP_INIT_API(NONE, deviceId, acc); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) { + HIP_INIT_API(NONE, stream, av); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} diff --git a/projects/hip/vdi/hip_device_runtime.cpp b/projects/hip/vdi/hip_device_runtime.cpp new file mode 100644 index 0000000000..febf64d116 --- /dev/null +++ b/projects/hip/vdi/hip_device_runtime.cpp @@ -0,0 +1,569 @@ +/* Copyright (c) 2018-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { + + HIP_INIT_API(hipChooseDevice, device, properties); + + if (device == nullptr || properties == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *device = 0; + cl_uint maxMatchedCount = 0; + int count = 0; + ihipDeviceGetCount(&count); + + for (cl_int i = 0; i< count; ++i) { + hipDeviceProp_t currentProp = {0}; + cl_uint validPropCount = 0; + cl_uint matchedCount = 0; + hipError_t err = hipGetDeviceProperties(¤tProp, i); + if (properties->major != 0) { + validPropCount++; + if(currentProp.major >= properties->major) { + matchedCount++; + } + } + if (properties->minor != 0) { + validPropCount++; + if(currentProp.minor >= properties->minor) { + matchedCount++; + } + } + if(properties->totalGlobalMem != 0) { + validPropCount++; + if(currentProp.totalGlobalMem >= properties->totalGlobalMem) { + matchedCount++; + } + } + if(properties->sharedMemPerBlock != 0) { + validPropCount++; + if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) { + matchedCount++; + } + } + if(properties->maxThreadsPerBlock != 0) { + validPropCount++; + if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) { + matchedCount++; + } + } + if(properties->totalConstMem != 0) { + validPropCount++; + if(currentProp.totalConstMem >= properties->totalConstMem ) { + matchedCount++; + } + } + if(properties->multiProcessorCount != 0) { + validPropCount++; + if(currentProp.multiProcessorCount >= + properties->multiProcessorCount ) { + matchedCount++; + } + } + if(properties->maxThreadsPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxThreadsPerMultiProcessor >= + properties->maxThreadsPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->memoryClockRate != 0) { + validPropCount++; + if(currentProp.memoryClockRate >= properties->memoryClockRate ) { + matchedCount++; + } + } + if(properties->memoryBusWidth != 0) { + validPropCount++; + if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) { + matchedCount++; + } + } + if(properties->l2CacheSize != 0) { + validPropCount++; + if(currentProp.l2CacheSize >= properties->l2CacheSize ) { + matchedCount++; + } + } + if(properties->regsPerBlock != 0) { + validPropCount++; + if(currentProp.regsPerBlock >= properties->regsPerBlock ) { + matchedCount++; + } + } + if(properties->maxSharedMemoryPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxSharedMemoryPerMultiProcessor >= + properties->maxSharedMemoryPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->warpSize != 0) { + validPropCount++; + if(currentProp.warpSize >= properties->warpSize ) { + matchedCount++; + } + } + if(validPropCount == matchedCount) { + *device = matchedCount > maxMatchedCount ? i : *device; + maxMatchedCount = std::max(matchedCount, maxMatchedCount); + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { + + HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device); + + if (pi == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + int count = 0; + ihipDeviceGetCount(&count); + if (device < 0 || device >= count) { + HIP_RETURN(hipErrorInvalidDevice); + } + + //FIXME: should we cache the props, or just select from deviceHandle->info_? + hipDeviceProp_t prop = {0}; + hipError_t err = hipGetDeviceProperties(&prop, device); + if (err != hipSuccess) { + HIP_RETURN(err); + } + + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + *pi = prop.maxThreadsPerBlock; + break; + case hipDeviceAttributeMaxBlockDimX: + *pi = prop.maxThreadsDim[0]; + break; + case hipDeviceAttributeMaxBlockDimY: + *pi = prop.maxThreadsDim[1]; + break; + case hipDeviceAttributeMaxBlockDimZ: + *pi = prop.maxThreadsDim[2]; + break; + case hipDeviceAttributeMaxGridDimX: + *pi = prop.maxGridSize[0]; + break; + case hipDeviceAttributeMaxGridDimY: + *pi = prop.maxGridSize[1]; + break; + case hipDeviceAttributeMaxGridDimZ: + *pi = prop.maxGridSize[2]; + break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + *pi = prop.sharedMemPerBlock; + break; + case hipDeviceAttributeTotalConstantMemory: + *pi = prop.totalConstMem; + break; + case hipDeviceAttributeWarpSize: + *pi = prop.warpSize; + break; + case hipDeviceAttributeMaxRegistersPerBlock: + *pi = prop.regsPerBlock; + break; + case hipDeviceAttributeClockRate: + *pi = prop.clockRate; + break; + case hipDeviceAttributeMemoryClockRate: + *pi = prop.memoryClockRate; + break; + case hipDeviceAttributeMemoryBusWidth: + *pi = prop.memoryBusWidth; + break; + case hipDeviceAttributeMultiprocessorCount: + *pi = prop.multiProcessorCount; + break; + case hipDeviceAttributeComputeMode: + *pi = prop.computeMode; + break; + case hipDeviceAttributeL2CacheSize: + *pi = prop.l2CacheSize; + break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + *pi = prop.maxThreadsPerMultiProcessor; + break; + case hipDeviceAttributeComputeCapabilityMajor: + *pi = prop.major; + break; + case hipDeviceAttributeComputeCapabilityMinor: + *pi = prop.minor; + break; + case hipDeviceAttributePciBusId: + *pi = prop.pciBusID; + break; + case hipDeviceAttributeConcurrentKernels: + *pi = prop.concurrentKernels; + break; + case hipDeviceAttributePciDeviceId: + *pi = prop.pciDeviceID; + break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + *pi = prop.maxSharedMemoryPerMultiProcessor; + break; + case hipDeviceAttributeIsMultiGpuBoard: + *pi = prop.isMultiGpuBoard; + break; + case hipDeviceAttributeCooperativeLaunch: + *pi = prop.cooperativeLaunch; + break; + case hipDeviceAttributeCooperativeMultiDeviceLaunch: + *pi = prop.cooperativeMultiDeviceLaunch; + break; + case hipDeviceAttributeMaxTexture1DWidth: + *pi = prop.maxTexture1D; + break; + case hipDeviceAttributeMaxTexture2DWidth: + *pi = prop.maxTexture2D[0]; + break; + case hipDeviceAttributeMaxTexture2DHeight: + *pi = prop.maxTexture2D[1]; + break; + case hipDeviceAttributeMaxTexture3DWidth: + *pi = prop.maxTexture3D[0]; + break; + case hipDeviceAttributeMaxTexture3DHeight: + *pi = prop.maxTexture3D[1]; + break; + case hipDeviceAttributeMaxTexture3DDepth: + *pi = prop.maxTexture3D[2]; + break; + case hipDeviceAttributeHdpMemFlushCntl: + *reinterpret_cast(pi) = prop.hdpMemFlushCntl; + break; + case hipDeviceAttributeHdpRegFlushCntl: + *reinterpret_cast(pi) = prop.hdpRegFlushCntl; + break; + case hipDeviceAttributeMaxPitch: + *pi = prop.memPitch; + break; + case hipDeviceAttributeTextureAlignment: + *pi = prop.textureAlignment; + break; + case hipDeviceAttributeTexturePitchAlignment: + *pi = prop.texturePitchAlignment; + break; + case hipDeviceAttributeKernelExecTimeout: + *pi = prop.kernelExecTimeoutEnabled; + break; + case hipDeviceAttributeCanMapHostMemory: + *pi = prop.canMapHostMemory; + break; + case hipDeviceAttributeEccEnabled: + *pi = prop.ECCEnabled; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc: + *pi = prop.cooperativeMultiDeviceUnmatchedFunc; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim: + *pi = prop.cooperativeMultiDeviceUnmatchedGridDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim: + *pi = prop.cooperativeMultiDeviceUnmatchedBlockDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem: + *pi = prop.cooperativeMultiDeviceUnmatchedSharedMem; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { + + HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusIdstr); + + if (device == nullptr || pciBusIdstr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + int pciBusID = -1; + int pciDeviceID = -1; + int pciDomainID = -1; + + if (sscanf (pciBusIdstr, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID) == 0x3) { + int count = 0; + ihipDeviceGetCount(&count); + for (cl_int i = 0; i < count; i++) { + int pi = 0; + hipDevice_t dev; + hipDeviceGet(&dev, i); + hipDeviceGetAttribute(&pi, hipDeviceAttributePciBusId, dev); + + if (pciBusID == pi) { + *device = i; + break; + } + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) { + HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig); + + if(cacheConfig == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *cacheConfig = hipFuncCache_t(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { + + HIP_INIT_API(hipDeviceGetLimit, pValue, limit); + + if(pValue == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if(limit == hipLimitMallocHeapSize) { + hipDeviceProp_t prop; + hipGetDeviceProperties(&prop, ihipGetDevice()); + + *pValue = prop.totalGlobalMem; + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(hipErrorUnsupportedLimit); + } +} + +/** +hipError_t hipDeviceGetP2PAttribute ( int* value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice ) { + assert(0); + HIP_RETURN(hipSuccess); +} +**/ + +hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { + + HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device); + + int count; + ihipDeviceGetCount(&count); + if (device < 0 || device > count) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (pciBusId == nullptr || len < 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipDeviceProp_t prop; + hipGetDeviceProperties(&prop, device); + + snprintf (pciBusId, len, "%04x:%02x:%02x.0", + prop.pciDomainID, + prop.pciBusID, + prop.pciDeviceID); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { + HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig); + + *pConfig = hipSharedMemBankSizeFourByte; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceReset ( void ) { + HIP_INIT_API(hipDeviceReset); + + /* FIXME */ + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { + HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig); + + // No way to set cache config yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) { + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { + HIP_INIT_API(hipDeviceSetSharedMemConfig, config); + + // No way to set cache config yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSynchronize ( void ) { + HIP_INIT_API(hipDeviceSynchronize); + + hip::syncStreams(); + + amd::HostQueue* queue = hip::getNullStream(); + + if (!queue) { + HIP_RETURN(hipErrorOutOfMemory); + } + + queue->finish(); + HIP_RETURN(hipSuccess); +} + +int ihipGetDevice() { + return hip::getCurrentDevice()->deviceId(); +} + +hipError_t hipGetDevice ( int* deviceId ) { + HIP_INIT_API(hipGetDevice, deviceId); + + if (deviceId != nullptr) { + int dev = ihipGetDevice(); + if (dev == -1) { + HIP_RETURN(hipErrorNoDevice); + } + *deviceId = dev; + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(hipErrorInvalidValue); + } +} + +hipError_t hipGetDeviceCount ( int* count ) { + HIP_INIT_API(hipGetDeviceCount, count); + + HIP_RETURN(ihipDeviceGetCount(count)); +} + +hipError_t hipGetDeviceFlags ( unsigned int* flags ) { + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) { + HIP_INIT_API(NONE, handle, event); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) { + HIP_INIT_API(NONE, event, handle); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipSetDevice ( int device ) { + HIP_INIT_API(hipSetDevice, device); + + if (static_cast(device) < g_devices.size()) { + hip::setCurrentDevice(device); + + HIP_RETURN(hipSuccess); + } + HIP_RETURN(hipErrorInvalidDevice); +} + +hipError_t hipSetDeviceFlags ( unsigned int flags ) { + HIP_INIT_API(hipSetDeviceFlags, flags); + + constexpr uint32_t supportedFlags = + hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax; + + if (flags & ~supportedFlags) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + switch (flags & hipDeviceScheduleMask) { + case hipDeviceScheduleAuto: + // Current behavior is different from the spec, due to MT usage in runtime + if (hip::host_device->devices().size() >= std::thread::hardware_concurrency()) { + device->SetActiveWait(false); + break; + } + // Fall through for active wait... + case hipDeviceScheduleSpin: + case hipDeviceScheduleYield: + // The both options falls into yield, because MT usage in runtime + device->SetActiveWait(true); + break; + case hipDeviceScheduleBlockingSync: + device->SetActiveWait(false); + break; + default: + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipSetValidDevices ( int* device_arr, int len ) { + HIP_INIT_API(NONE, device_arr, len); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) { + HIP_INIT_API(hipExtGetLinkTypeAndHopCount, device1, device2, linktype, hopcount); + + amd::Device* amd_dev_obj1 = nullptr; + amd::Device* amd_dev_obj2 = nullptr; + const int numDevices = static_cast(g_devices.size()); + + if ((device1 < 0) || (device1 >= numDevices) || (device2 < 0) || (device2 >= numDevices)) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if ((linktype == nullptr) || (hopcount == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd_dev_obj1 = g_devices[device1]->devices()[0]; + amd_dev_obj2 = g_devices[device2]->devices()[0]; + + if (!amd_dev_obj1->findLinkTypeAndHopCount(amd_dev_obj2, linktype, hopcount)) { + HIP_RETURN(hipErrorInvalidHandle); + } + + HIP_RETURN(hipSuccess); +} + diff --git a/projects/hip/vdi/hip_error.cpp b/projects/hip/vdi/hip_error.cpp new file mode 100644 index 0000000000..5802629154 --- /dev/null +++ b/projects/hip/vdi/hip_error.cpp @@ -0,0 +1,172 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipGetLastError() +{ + HIP_INIT_API(hipGetLastError); + hipError_t err = hip::g_lastError; + hip::g_lastError = hipSuccess; + return err; +} + +hipError_t hipPeekAtLastError() +{ + HIP_INIT_API(hipPeekAtLastError); + hipError_t err = hip::g_lastError; + HIP_RETURN(err); +} + +const char *hipGetErrorName(hipError_t hip_error) +{ + switch (hip_error) { + case hipSuccess: + return "hipSuccess"; + case hipErrorInvalidValue: + return "hipErrorInvalidValue"; + case hipErrorOutOfMemory: + return "hipErrorOutOfMemory"; + case hipErrorNotInitialized: + return "hipErrorNotInitialized"; + case hipErrorDeinitialized: + return "hipErrorDeinitialized"; + case hipErrorProfilerDisabled: + return "hipErrorProfilerDisabled"; + case hipErrorProfilerNotInitialized: + return "hipErrorProfilerNotInitialized"; + case hipErrorProfilerAlreadyStarted: + return "hipErrorProfilerAlreadyStarted"; + case hipErrorProfilerAlreadyStopped: + return "hipErrorProfilerAlreadyStopped"; + case hipErrorInvalidConfiguration: + return "hipErrorInvalidConfiguration"; + case hipErrorInvalidSymbol: + return "hipErrorInvalidSymbol"; + case hipErrorInvalidDevicePointer: + return "hipErrorInvalidDevicePointer"; + case hipErrorInvalidMemcpyDirection: + return "hipErrorInvalidMemcpyDirection"; + case hipErrorInsufficientDriver: + return "hipErrorInsufficientDriver"; + case hipErrorMissingConfiguration: + return "hipErrorMissingConfiguration"; + case hipErrorPriorLaunchFailure: + return "hipErrorPriorLaunchFailure"; + case hipErrorInvalidDeviceFunction: + return "hipErrorInvalidDeviceFunction"; + case hipErrorNoDevice: + return "hipErrorNoDevice"; + case hipErrorInvalidDevice: + return "hipErrorInvalidDevice"; + case hipErrorInvalidImage: + return "hipErrorInvalidImage"; + case hipErrorInvalidContext: + return "hipErrorInvalidContext"; + case hipErrorContextAlreadyCurrent: + return "hipErrorContextAlreadyCurrent"; + case hipErrorMapFailed: + return "hipErrorMapFailed"; + case hipErrorUnmapFailed: + return "hipErrorUnmapFailed"; + case hipErrorArrayIsMapped: + return "hipErrorArrayIsMapped"; + case hipErrorAlreadyMapped: + return "hipErrorAlreadyMapped"; + case hipErrorNoBinaryForGpu: + return "hipErrorNoBinaryForGpu"; + case hipErrorAlreadyAcquired: + return "hipErrorAlreadyAcquired"; + case hipErrorNotMapped: + return "hipErrorNotMapped"; + case hipErrorNotMappedAsArray: + return "hipErrorNotMappedAsArray"; + case hipErrorNotMappedAsPointer: + return "hipErrorNotMappedAsPointer"; + case hipErrorECCNotCorrectable: + return "hipErrorECCNotCorrectable"; + case hipErrorUnsupportedLimit: + return "hipErrorUnsupportedLimit"; + case hipErrorContextAlreadyInUse: + return "hipErrorContextAlreadyInUse"; + case hipErrorPeerAccessUnsupported: + return "hipErrorPeerAccessUnsupported"; + case hipErrorInvalidKernelFile: + return "hipErrorInvalidKernelFile"; + case hipErrorInvalidGraphicsContext: + return "hipErrorInvalidGraphicsContext"; + case hipErrorInvalidSource: + return "hipErrorInvalidSource"; + case hipErrorFileNotFound: + return "hipErrorFileNotFound"; + case hipErrorSharedObjectSymbolNotFound: + return "hipErrorSharedObjectSymbolNotFound"; + case hipErrorSharedObjectInitFailed: + return "hipErrorSharedObjectInitFailed"; + case hipErrorOperatingSystem: + return "hipErrorOperatingSystem"; + case hipErrorInvalidHandle: + return "hipErrorInvalidHandle"; + case hipErrorNotFound: + return "hipErrorNotFound"; + case hipErrorNotReady: + return "hipErrorNotReady"; + case hipErrorIllegalAddress: + return "hipErrorIllegalAddress"; + case hipErrorLaunchOutOfResources: + return "hipErrorLaunchOutOfResources"; + case hipErrorLaunchTimeOut: + return "hipErrorLaunchTimeOut"; + case hipErrorPeerAccessAlreadyEnabled: + return "hipErrorPeerAccessAlreadyEnabled"; + case hipErrorPeerAccessNotEnabled: + return "hipErrorPeerAccessNotEnabled"; + case hipErrorSetOnActiveProcess: + return "hipErrorSetOnActiveProcess"; + case hipErrorAssert: + return "hipErrorAssert"; + case hipErrorHostMemoryAlreadyRegistered: + return "hipErrorHostMemoryAlreadyRegistered"; + case hipErrorHostMemoryNotRegistered: + return "hipErrorHostMemoryNotRegistered"; + case hipErrorLaunchFailure: + return "hipErrorLaunchFailure"; + case hipErrorNotSupported: + return "hipErrorNotSupported"; + case hipErrorUnknown: + return "hipErrorUnknown"; + case hipErrorRuntimeMemory: + return "hipErrorRuntimeMemory"; + case hipErrorRuntimeOther: + return "hipErrorRuntimeOther"; + case hipErrorTbd: + return "hipErrorTbd"; + default: + return "hipErrorUnknown"; + }; +} + +const char *hipGetErrorString(hipError_t hip_error) +{ + return hipGetErrorName(hip_error); +} + diff --git a/projects/hip/vdi/hip_event.cpp b/projects/hip/vdi/hip_event.cpp new file mode 100644 index 0000000000..677becd67e --- /dev/null +++ b/projects/hip/vdi/hip_event.cpp @@ -0,0 +1,254 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_event.hpp" + +namespace hip { + +bool Event::ready() { + if (event_->status() != CL_COMPLETE) { + event_->notifyCmdQueue(); + } + + return (event_->status() == CL_COMPLETE); +} + +hipError_t Event::query() { + amd::ScopedLock lock(lock_); + + if (event_ == nullptr) { + return hipErrorInvalidHandle; + } + + return ready() ? hipSuccess : hipErrorNotReady; +} + +hipError_t Event::synchronize() { + amd::ScopedLock lock(lock_); + + if (event_ == nullptr) { + return hipErrorInvalidHandle; + } + + event_->awaitCompletion(); + + return hipSuccess; +} + +hipError_t Event::elapsedTime(Event& eStop, float& ms) { + amd::ScopedLock startLock(lock_); + + if (this == &eStop) { + if (event_ == nullptr) { + return hipErrorInvalidHandle; + } + + if (flags & hipEventDisableTiming) { + return hipErrorInvalidHandle; + } + + if (!ready()) { + return hipErrorNotReady; + } + + ms = 0.f; + return hipSuccess; + } + amd::ScopedLock stopLock(eStop.lock_); + + if (event_ == nullptr || + eStop.event_ == nullptr) { + return hipErrorInvalidHandle; + } + + if ((flags | eStop.flags) & hipEventDisableTiming) { + return hipErrorInvalidHandle; + } + + if (!ready() || !eStop.ready()) { + return hipErrorNotReady; + } + + ms = static_cast(static_cast(eStop.event_->profilingInfo().end_ - + event_->profilingInfo().start_))/1000000.f; + + return hipSuccess; +} + +hipError_t Event::streamWait(amd::HostQueue* hostQueue, uint flags) { + if ((event_ == nullptr) || (event_->command().queue() == hostQueue)) { + return hipSuccess; + } + + amd::ScopedLock lock(lock_); + bool retain = false; + + if (!event_->notifyCmdQueue()) { + return hipErrorLaunchOutOfResources; + } + amd::Command::EventWaitList eventWaitList; + eventWaitList.push_back(event_); + + amd::Command* command = new amd::Marker(*hostQueue, false, eventWaitList); + if (command == NULL) { + return hipErrorOutOfMemory; + } + command->enqueue(); + command->release(); + + return hipSuccess; +} + +void Event::addMarker(amd::HostQueue* queue, amd::Command* command) { + amd::ScopedLock lock(lock_); + + if (event_ == &command->event()) return; + + if (event_ != nullptr) { + event_->release(); + } + + event_ = &command->event(); +} + +} + +hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { + if (event == nullptr) { + return hipErrorInvalidValue; + } + + unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming | + hipEventReleaseToDevice | hipEventReleaseToSystem; + const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem); + + const bool illegalFlags = + (flags & ~supportedFlags) || // can't set any unsupported flags. + (flags & releaseFlags) == releaseFlags; // can't set both release flags + + if (!illegalFlags) { + hip::Event* e = new hip::Event(flags); + + if (e == nullptr) { + return hipErrorOutOfMemory; + } + + *event = reinterpret_cast(e); + } else { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipEventQuery(hipEvent_t event) { + if (event == nullptr) { + return hipErrorInvalidHandle; + } + + hip::Event* e = reinterpret_cast(event); + + return e->query(); +} + +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { + HIP_INIT_API(hipEventCreateWithFlags, event, flags); + + HIP_RETURN(ihipEventCreateWithFlags(event, flags)); +} + +hipError_t hipEventCreate(hipEvent_t* event) { + HIP_INIT_API(hipEventCreate, event); + + HIP_RETURN(ihipEventCreateWithFlags(event, 0)); +} + +hipError_t hipEventDestroy(hipEvent_t event) { + HIP_INIT_API(hipEventDestroy, event); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + delete reinterpret_cast(event); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { + HIP_INIT_API(hipEventElapsedTime, ms, start, stop); + + if (start == nullptr || stop == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + if (ms == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::Event* eStart = reinterpret_cast(start); + hip::Event* eStop = reinterpret_cast(stop); + + HIP_RETURN(eStart->elapsedTime(*eStop, *ms)); +} + +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { + HIP_INIT_API(hipEventRecord, event, stream); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* e = reinterpret_cast(event); + + hip::Stream* s = reinterpret_cast(stream); + amd::HostQueue* queue = hip::getQueue(stream); + + amd::Command* command = (s != nullptr && (s->flags & hipStreamNonBlocking)) ? + queue->getLastQueuedCommand(true) : nullptr; + + if (command == nullptr) { + command = new amd::Marker(*queue, false); + command->enqueue(); + } + + e->addMarker(queue, command); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipEventSynchronize(hipEvent_t event) { + HIP_INIT_API(hipEventSynchronize, event); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* e = reinterpret_cast(event); + + HIP_RETURN(e->synchronize()); +} + +hipError_t hipEventQuery(hipEvent_t event) { + HIP_INIT_API(hipEventQuery, event); + + HIP_RETURN(ihipEventQuery(event)); +} diff --git a/projects/hip/vdi/hip_event.hpp b/projects/hip/vdi/hip_event.hpp new file mode 100644 index 0000000000..2360c972bb --- /dev/null +++ b/projects/hip/vdi/hip_event.hpp @@ -0,0 +1,68 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIP_EVENT_H +#define HIP_EVENT_H + +#include "hip_internal.hpp" +#include "thread/monitor.hpp" + +namespace hip { + +class TimerMarker: public amd::Marker { +public: + TimerMarker(amd::HostQueue& queue) : amd::Marker(queue, false) { + profilingInfo_.enabled_ = true; + profilingInfo_.callback_ = nullptr; + profilingInfo_.start_ = profilingInfo_.end_ = 0; + } +}; + +class Event { +public: + Event(unsigned int flags) : flags(flags), lock_("hipEvent_t"), event_(nullptr) { + // No need to init event_ here as addMarker does that + } + + ~Event() { + if (event_ != nullptr) { + event_->release(); + } + } + unsigned int flags; + + hipError_t query(); + hipError_t synchronize(); + hipError_t elapsedTime(Event& stop, float& ms); + hipError_t streamWait(amd::HostQueue* queue, uint flags); + + void addMarker(amd::HostQueue* queue, amd::Command* command); + +private: + amd::Monitor lock_; + amd::HostQueue* stream_; + amd::Event* event_; + + bool ready(); +}; + +}; + +#endif // HIP_EVEMT_H diff --git a/projects/hip/vdi/hip_formatting.hpp b/projects/hip/vdi/hip_formatting.hpp new file mode 100644 index 0000000000..8c26249e03 --- /dev/null +++ b/projects/hip/vdi/hip_formatting.hpp @@ -0,0 +1,843 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ +#include +#include + +inline std::ostream& operator<<(std::ostream& os, const hipTextureFilterMode& s) { + switch (s) { + case hipFilterModePoint: + os << "hipFilterModePoint"; + break; + case hipFilterModeLinear: + os << "hipFilterModeLinear"; + break; + default: + os << "hipFilterModePoint"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureReadMode& s) { + switch (s) { + case hipReadModeElementType: + os << "hipReadModeElementType"; + break; + case hipReadModeNormalizedFloat: + os << "hipReadModeNormalizedFloat"; + break; + default: + os << "hipReadModeElementType"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureAddressMode& s) { + switch (s) { + case hipAddressModeWrap: + os << "hipAddressModeWrap"; + break; + case hipAddressModeClamp: + os << "hipAddressModeClamp"; + break; + case hipAddressModeMirror: + os << "hipAddressModeMirror"; + break; + case hipAddressModeBorder: + os << "hipAddressModeBorder"; + break; + default: + os << "hipAddressModeWrap"; + }; + return os; +} + + +inline std::ostream& operator<<(std::ostream& os, const hipMemcpyKind& s) { + switch (s) { + case hipMemcpyHostToHost: + os << "hipMemcpyHostToHost"; + break; + case hipMemcpyHostToDevice: + os << "hipMemcpyHostToDevice"; + break; + case hipMemcpyDeviceToHost: + os << "hipMemcpyDeviceToHost"; + break; + case hipMemcpyDeviceToDevice: + os << "hipMemcpyDeviceToDevice"; + break; + case hipMemcpyDefault: + os << "hipMemcpyDefault"; + break; + default: + os << "hipMemcpyDefault"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipChannelFormatKind& s) { + switch (s) { + case hipChannelFormatKindSigned: + os << "hipChannelFormatKindSigned"; + break; + case hipChannelFormatKindUnsigned: + os << "hipMemcpyHostToDevice"; + break; + case hipChannelFormatKindFloat: + os << "hipChannelFormatKindFloat"; + break; + case hipChannelFormatKindNone: + os << "hipChannelFormatKindNone"; + break; + default: + os << "hipChannelFormatKindNone"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipArray_Format& s) { + switch (s) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + os << "HIP_AD_FORMAT_UNSIGNED_INT8"; + break; + case HIP_AD_FORMAT_UNSIGNED_INT16: + os << "HIP_AD_FORMAT_UNSIGNED_INT16"; + break; + case HIP_AD_FORMAT_UNSIGNED_INT32: + os << "HIP_AD_FORMAT_UNSIGNED_INT32"; + break; + case HIP_AD_FORMAT_SIGNED_INT8: + os << "HIP_AD_FORMAT_SIGNED_INT8"; + break; + case HIP_AD_FORMAT_SIGNED_INT16: + os << "HIP_AD_FORMAT_SIGNED_INT16"; + break; + case HIP_AD_FORMAT_SIGNED_INT32: + os << "HIP_AD_FORMAT_SIGNED_INT32"; + break; + case HIP_AD_FORMAT_HALF: + os << "HIP_AD_FORMAT_HALF"; + break; + case HIP_AD_FORMAT_FLOAT: + os << "HIP_AD_FORMAT_FLOAT"; + break; + default: + os << "HIP_AD_FORMAT_FLOAT"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipResourceViewFormat& s) { + switch (s) { + case hipResViewFormatNone: + os << "hipResViewFormatNone"; + break; + case hipResViewFormatUnsignedChar1: + os << "hipResViewFormatUnsignedChar1"; + break; + case hipResViewFormatUnsignedChar2: + os << "hipResViewFormatUnsignedChar2"; + break; + case hipResViewFormatUnsignedChar4: + os << "hipResViewFormatUnsignedChar4"; + break; + case hipResViewFormatSignedChar1: + os << "hipResViewFormatSignedChar1"; + break; + case hipResViewFormatSignedChar2: + os << "hipResViewFormatSignedChar2"; + break; + case hipResViewFormatSignedChar4: + os << "hipResViewFormatSignedChar4"; + break; + case hipResViewFormatUnsignedShort1: + os << "hipResViewFormatUnsignedShort1"; + break; + case hipResViewFormatUnsignedShort2: + os << "hipResViewFormatUnsignedShort2"; + break; + case hipResViewFormatUnsignedShort4: + os << "hipResViewFormatUnsignedShort4"; + break; + case hipResViewFormatSignedShort1: + os << "hipResViewFormatSignedShort1"; + break; + case hipResViewFormatSignedShort2: + os << "hipResViewFormatSignedShort2"; + break; + case hipResViewFormatSignedShort4: + os << "hipResViewFormatSignedShort4"; + break; + case hipResViewFormatUnsignedInt1: + os << "hipResViewFormatUnsignedInt1"; + break; + case hipResViewFormatUnsignedInt2: + os << "hipResViewFormatUnsignedInt2"; + break; + case hipResViewFormatUnsignedInt4: + os << "hipResViewFormatUnsignedInt4"; + break; + case hipResViewFormatSignedInt1: + os << "hipResViewFormatSignedInt1"; + break; + case hipResViewFormatSignedInt2: + os << "hipResViewFormatSignedInt2"; + break; + case hipResViewFormatSignedInt4: + os << "hipResViewFormatSignedInt4"; + break; + case hipResViewFormatHalf1: + os << "hipResViewFormatHalf1"; + break; + case hipResViewFormatHalf2: + os << "hipResViewFormatHalf2"; + break; + case hipResViewFormatHalf4: + os << "hipResViewFormatHalf4"; + break; + case hipResViewFormatFloat1: + os << "hipResViewFormatFloat1"; + break; + case hipResViewFormatFloat2: + os << "hipResViewFormatFloat2"; + break; + case hipResViewFormatFloat4: + os << "hipResViewFormatFloat4"; + break; + case hipResViewFormatUnsignedBlockCompressed1: + os << "hipResViewFormatUnsignedBlockCompressed1"; + break; + case hipResViewFormatUnsignedBlockCompressed2: + os << "hipResViewFormatUnsignedBlockCompressed2"; + break; + case hipResViewFormatUnsignedBlockCompressed3: + os << "hipResViewFormatUnsignedBlockCompressed3"; + break; + case hipResViewFormatUnsignedBlockCompressed4: + os << "hipResViewFormatUnsignedBlockCompressed4"; + break; + case hipResViewFormatSignedBlockCompressed4: + os << "hipResViewFormatSignedBlockCompressed4"; + break; + case hipResViewFormatUnsignedBlockCompressed5: + os << "hipResViewFormatUnsignedBlockCompressed5"; + break; + case hipResViewFormatSignedBlockCompressed5: + os << "hipResViewFormatSignedBlockCompressed5"; + break; + case hipResViewFormatUnsignedBlockCompressed6H: + os << "hipResViewFormatUnsignedBlockCompressed6H"; + break; + case hipResViewFormatSignedBlockCompressed6H: + os << "hipResViewFormatSignedBlockCompressed6H"; + break; + case hipResViewFormatUnsignedBlockCompressed7: + os << "hipResViewFormatUnsignedBlockCompressed7"; + break; + default: + os << "hipResViewFormatNone"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipFunction_attribute& s) { + switch (s) { + case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: + os << "HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK"; + break; + case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_NUM_REGS: + os << "HIP_FUNC_ATTRIBUTE_NUM_REGS"; + break; + case HIP_FUNC_ATTRIBUTE_PTX_VERSION: + os << "HIP_FUNC_ATTRIBUTE_PTX_VERSION"; + break; + case HIP_FUNC_ATTRIBUTE_BINARY_VERSION: + os << "HIP_FUNC_ATTRIBUTE_BINARY_VERSION"; + break; + case HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA: + os << "HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA"; + break; + case HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: + os << "HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT"; + break; + case HIP_FUNC_ATTRIBUTE_MAX: + os << "HIP_FUNC_ATTRIBUTE_MAX"; + break; + default: + os << "HIP_FUNC_ATTRIBUTE_MAX"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hiprtcResult& s) { + switch (s) { + case HIPRTC_SUCCESS: + os << "HIPRTC_SUCCESS"; + break; + case HIPRTC_ERROR_OUT_OF_MEMORY: + os << "HIPRTC_ERROR_OUT_OF_MEMORY"; + break; + case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE: + os << "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE"; + break; + case HIPRTC_ERROR_INVALID_INPUT: + os << "HIPRTC_ERROR_INVALID_INPUT"; + break; + case HIPRTC_ERROR_INVALID_PROGRAM: + os << "HIPRTC_ERROR_INVALID_PROGRAM"; + break; + case HIPRTC_ERROR_INVALID_OPTION: + os << "HIPRTC_ERROR_INVALID_OPTION"; + break; + case HIPRTC_ERROR_COMPILATION: + os << "HIPRTC_ERROR_COMPILATION"; + break; + case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE: + os << "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE"; + break; + case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: + os << "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION"; + break; + case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: + os << "IPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION"; + break; + case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID: + os << "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID"; + break; + case HIPRTC_ERROR_INTERNAL_ERROR: + os << "HIPRTC_ERROR_INTERNAL_ERROR"; + break; + default: + os << "HIPRTC_ERROR_INTERNAL_ERROR"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipJitOption& s) { + switch (s) { + case hipJitOptionMaxRegisters: + os << "hipJitOptionMaxRegisters"; + break; + case hipJitOptionThreadsPerBlock: + os << "hipJitOptionThreadsPerBlock"; + break; + case hipJitOptionWallTime: + os << "hipJitOptionWallTime"; + break; + case hipJitOptionInfoLogBuffer: + os << "hipJitOptionInfoLogBuffer"; + break; + case hipJitOptionInfoLogBufferSizeBytes: + os << "hipJitOptionInfoLogBufferSizeBytes"; + break; + case hipJitOptionErrorLogBuffer: + os << "hipJitOptionErrorLogBuffer"; + break; + case hipJitOptionErrorLogBufferSizeBytes: + os << "hipJitOptionErrorLogBufferSizeBytes"; + break; + case hipJitOptionOptimizationLevel: + os << "hipJitOptionOptimizationLevel"; + break; + case hipJitOptionTargetFromContext: + os << "hipJitOptionTargetFromContext"; + break; + case hipJitOptionTarget: + os << "hipJitOptionTarget"; + break; + case hipJitOptionFallbackStrategy: + os << "hipJitOptionFallbackStrategy"; + break; + case hipJitOptionGenerateDebugInfo: + os << "hipJitOptionGenerateDebugInfo"; + break; + case hipJitOptionCacheMode: + os << "hipJitOptionCacheMode"; + break; + case hipJitOptionSm3xOpt: + os << "hipJitOptionSm3xOpt"; + break; + case hipJitOptionFastCompile: + os << "hipJitOptionFastCompile"; + break; + case hipJitOptionNumOptions: + os << "hipJitOptionNumOptions"; + break; + default: + os << "hipJitOptionMaxRegisters"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipFuncCache_t& s) { + switch (s) { + case hipFuncCachePreferNone: + os << "hipFuncCachePreferNone"; + break; + case hipFuncCachePreferShared: + os << "hipFuncCachePreferShared"; + break; + case hipFuncCachePreferL1: + os << "hipFuncCachePreferL1"; + break; + case hipFuncCachePreferEqual: + os << "hipFuncCachePreferEqual"; + break; + default: + os << "hipFuncCachePreferNone"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipSharedMemConfig& s) { + switch (s) { + case hipSharedMemBankSizeDefault: + os << "hipSharedMemBankSizeDefault"; + break; + case hipSharedMemBankSizeFourByte: + os << "hipSharedMemBankSizeFourByte"; + break; + case hipSharedMemBankSizeEightByte: + os << "hipSharedMemBankSizeEightByte"; + break; + default: + os << "hipSharedMemBankSizeDefault"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipDataType& s) { + switch (s) { + case HIP_R_16F: + os << "HIP_R_16F"; + break; + case HIP_R_32F: + os << "HIP_R_32F"; + break; + case HIP_R_64F: + os << "HIP_R_64F"; + break; + case HIP_C_16F: + os << "HIP_C_16F"; + break; + case HIP_C_32F: + os << "HIP_C_32F"; + break; + case HIP_C_64F: + os << "HIP_C_64F"; + break; + default: + os << "HIP_R_16F"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipLibraryPropertyType& s) { + switch (s) { + case HIP_LIBRARY_MAJOR_VERSION: + os << "HIP_LIBRARY_MAJOR_VERSION"; + break; + case HIP_LIBRARY_MINOR_VERSION: + os << "HIP_LIBRARY_MINOR_VERSION"; + break; + case HIP_LIBRARY_PATCH_LEVEL: + os << "HIP_LIBRARY_PATCH_LEVEL"; + break; + default: + os << "HIP_LIBRARY_MAJOR_VERSION"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hip_api_id_t& s) { + os << hip_api_name(s); + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hip_api_id_t* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureDesc& s) { + os << '{' + << '{' + << s.addressMode[0] + << ',' + << s.addressMode[1] + << ',' + << s.addressMode[2] + << '}' + << ',' + << s.filterMode + << ',' + << s.readMode + << ',' + << s.sRGB + << ',' + << '{' + << s.borderColor[0] + << ',' + << s.borderColor[1] + << ',' + << s.borderColor[2] + << ',' + << s.borderColor[3] + << '}' + << ',' + << s.normalizedCoords + << ',' + << s.mipmapFilterMode + << ',' + << s.mipmapLevelBias + << ',' + << s.minMipmapLevelClamp + << ',' + << s.maxMipmapLevelClamp + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureDesc* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + + +inline std::ostream& operator<<(std::ostream& os, const dim3& s) { + os << '{' + << s.x + << ',' + << s.y + << ',' + << s.z + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const dim3* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipChannelFormatDesc& s) { + os << '{' + << s.x + << ',' + << s.y + << ',' + << s.z + << ',' + << s.w + << ',' + << s.f + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipChannelFormatDesc* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipMipmappedArray& s) { + os << '{' + << s.data + << ',' + << s.desc + << ',' + << s.width + << ',' + << s.height + << ',' + << s.depth + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipMipmappedArray* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + + +inline std::ostream& operator<<(std::ostream& os, const hipResourceDesc& s) { + os << '{' + << s.resType + << ',' + << '{'; + + switch (s.resType) { + case hipResourceTypeLinear: + os << s.res.linear.devPtr + << ',' + << s.res.linear.desc + << ',' + << s.res.linear.sizeInBytes; + break; + case hipResourceTypePitch2D: + os << s.res.pitch2D.devPtr + << ',' + << s.res.pitch2D.desc + << ',' + << s.res.pitch2D.width + << ',' + << s.res.pitch2D.height + << ',' + << s.res.pitch2D.pitchInBytes; + break; + case hipResourceTypeArray: + os << s.res.array.array; + break; + case hipResourceTypeMipmappedArray: + os <(fun), arg) ? + hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRemoveApiCallback(uint32_t id) { + return callbacks_table.set_callback(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) { + return callbacks_table.set_activity(id, reinterpret_cast(fun), arg) ? + hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRemoveActivityCallback(uint32_t id) { + return callbacks_table.set_activity(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipEnableTracing(bool enabled) { + callbacks_table.set_enabled(enabled); + return hipSuccess; +} + +const char* hipApiName(uint32_t id) { + return hip_api_name(id); +} diff --git a/projects/hip/vdi/hip_internal.hpp b/projects/hip/vdi/hip_internal.hpp new file mode 100755 index 0000000000..9b4bd17042 --- /dev/null +++ b/projects/hip/vdi/hip_internal.hpp @@ -0,0 +1,297 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIP_SRC_HIP_INTERNAL_H +#define HIP_SRC_HIP_INTERNAL_H + +#include "vdi_common.hpp" +#include "hip_prof_api.h" +#include "trace_helper.h" +#include "utils/debug.hpp" +#include "hip_formatting.hpp" +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif + +/*! IHIP IPC MEMORY Structure */ +#define IHIP_IPC_MEM_HANDLE_SIZE 32 +#define IHIP_IPC_MEM_RESERVED_SIZE LP64_SWITCH(28,24) + +typedef struct ihipIpcMemHandle_st { + char ipc_handle[IHIP_IPC_MEM_HANDLE_SIZE]; ///< ipc memory handle on ROCr + size_t psize; + char reserved[IHIP_IPC_MEM_RESERVED_SIZE]; +} ihipIpcMemHandle_t; + +#ifdef _WIN32 + inline int getpid() { return _getpid(); } +#endif + +#define HIP_INIT() \ + std::call_once(hip::g_ihipInitialized, hip::init); \ + if (hip::g_device == nullptr && g_devices.size() > 0) { \ + hip::g_device = g_devices[0]; \ + } + +// This macro should be called at the beginning of every HIP API. +#define HIP_INIT_API(cid, ...) \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "%-5d: [%zx] %s ( %s )", getpid(), std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ + amd::Thread* thread = amd::Thread::current(); \ + if (!VDI_CHECK_THREAD(thread)) { \ + HIP_RETURN(hipErrorOutOfMemory); \ + } \ + HIP_INIT() \ + HIP_CB_SPAWNER_OBJECT(cid); + +#define HIP_RETURN(ret) \ + hip::g_lastError = ret; \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "%-5d: [%zx] %s: Returned %s", getpid(), std::this_thread::get_id(), __func__, hipGetErrorName(hip::g_lastError)); \ + return hip::g_lastError; + +namespace hc { +class accelerator; +class accelerator_view; +}; + +namespace hip { + + /// HIP Device class + class Device { + amd::Monitor lock_{"Device lock"}; + /// VDI context + amd::Context* context_; + /// VDI host queue for default streams + amd::HostQueue* defaultStream_ = nullptr; + /// Device's ID + /// Store it here so we don't have to loop through the device list every time + int deviceId_; + //Maintain list of user enabled peers + std::list userEnabledPeers; + public: + Device(amd::Context* ctx, int devId): context_(ctx), deviceId_(devId) { assert(ctx != nullptr); } + ~Device() {} + + amd::Context* asContext() const { return context_; } + int deviceId() const { return deviceId_; } + void retain() const { context_->retain(); } + void release() const { context_->release(); } + const std::vector& devices() const { return context_->devices(); } + hipError_t EnablePeerAccess(int peerDeviceId){ + amd::ScopedLock lock(lock_); + bool found = (std::find(userEnabledPeers.begin(), userEnabledPeers.end(), peerDeviceId) != userEnabledPeers.end()); + if (found) { + return hipErrorPeerAccessAlreadyEnabled; + } + userEnabledPeers.push_back(peerDeviceId); + return hipSuccess; + } + hipError_t DisablePeerAccess(int peerDeviceId) { + amd::ScopedLock lock(lock_); + bool found = (std::find(userEnabledPeers.begin(), userEnabledPeers.end(), peerDeviceId) != userEnabledPeers.end()); + if (found) { + userEnabledPeers.remove(peerDeviceId); + return hipSuccess; + } else { + return hipErrorPeerAccessNotEnabled; + } + } + amd::HostQueue* defaultStream(); + }; + + extern std::once_flag g_ihipInitialized; + /// Current thread's device + extern thread_local Device* g_device; + extern thread_local hipError_t g_lastError; + /// Device representing the host - for pinned memory + extern Device* host_device; + + extern void init(); + + extern Device* getCurrentDevice(); + extern void setCurrentDevice(unsigned int index); + + /// Get VDI queue associated with hipStream + /// Note: This follows the CUDA spec to sync with default streams + /// and Blocking streams + extern amd::HostQueue* getQueue(hipStream_t s); + /// Get default stream associated with the VDI context + extern amd::HostQueue* getNullStream(amd::Context&); + /// Get default stream of the thread + extern amd::HostQueue* getNullStream(); + /// Sync Blocking streams on the current device + extern void syncStreams(); + /// Sync blocking streams on the given device + extern void syncStreams(int devId); + + + struct Function { + amd::Kernel* function_; + amd::Monitor lock_; + + Function(amd::Kernel* f) : function_(f), lock_("function lock") {} + hipFunction_t asHipFunction() { return reinterpret_cast(this); } + + static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } + }; + + struct Stream { + amd::HostQueue* queue; + amd::Monitor lock; + Device* device; + amd::CommandQueue::Priority priority; + unsigned int flags; + + Stream(Device* dev, amd::CommandQueue::Priority p, unsigned int f); + void create(); + amd::HostQueue* asHostQueue(); + void destroy(); + void finish(); + }; + +}; + +struct ihipExec_t { + dim3 gridDim_; + dim3 blockDim_; + size_t sharedMem_; + hipStream_t hStream_; + std::vector arguments_; +}; + +class PlatformState { + amd::Monitor lock_{"Guards global function map"}; + + std::unordered_map>> modules_; + bool initialized_{false}; + + void digestFatBinary(const void* data, std::vector>& programs); +public: + void init(); + std::vector>* addFatBinary(const void*data) + { + if (initialized_) { + digestFatBinary(data, modules_[data]); + } + return &modules_[data]; + } + void removeFatBinary(std::vector>* module) + { + for (auto& mod : modules_) { + if (&mod.second == module) { + modules_.erase(&mod); + return; + } + } + } + + struct RegisteredVar { + public: + RegisteredVar(): size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {} + ~RegisteredVar() {} + + hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; + size_t getvarsize() const { return size_; }; + + size_t size_; // Size of the variable + hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. + amd::Memory* amd_mem_obj_; + }; + + struct DeviceFunction { + std::string deviceName; + std::vector< std::pair< hipModule_t, bool > >* modules; + std::vector functions; + }; + struct DeviceVar { + void* shadowVptr; + std::string hostVar; + size_t size; + std::vector< std::pair< hipModule_t, bool > >* modules; + std::vector rvars; + bool dyn_undef; + }; +private: + class Module { + public: + Module(hipModule_t hip_module_) : hip_module(hip_module_) {} + std::unordered_map functions_; + private: + hipModule_t hip_module; + }; + std::unordered_map module_map_; + + std::unordered_map functions_; + std::unordered_multimap vars_; + // Map from the host shadow symbol to its device name. + std::unordered_map symbols_; + + static PlatformState* platform_; + + PlatformState() {} + ~PlatformState() {} +public: + static PlatformState& instance() { + return *platform_; + } + + bool unregisterFunc(hipModule_t hmod); + std::vector< std::pair >* unregisterVar(hipModule_t hmod); + + + bool findSymbol(const void *hostVar, std::string &devName); + PlatformState::DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod); + void registerVarSym(const void *hostVar, const char *symbolName); + void registerVar(const char* symbolName, const DeviceVar& var); + void registerFunction(const void* hostFunction, const DeviceFunction& func); + + bool registerModFuncs(std::vector& func_names, hipModule_t* module); + bool findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name); + bool createFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name); + hipFunction_t getFunc(const void* hostFunction, int deviceId); + bool getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr); + bool getGlobalVar(const char* hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr); + bool getTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef); + + bool getShadowVarInfo(std::string var_name, hipModule_t hmod, + void** var_addr, size_t* var_size); + void setupArgument(const void *arg, size_t size, size_t offset); + void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); + + void popExec(ihipExec_t& exec); + +}; + +extern std::vector g_devices; +extern hipError_t ihipDeviceGetCount(int* count); +extern int ihipGetDevice(); +extern hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags); +extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); +extern bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, + size_t* var_size); + +#endif // HIP_SRC_HIP_INTERNAL_H diff --git a/projects/hip/vdi/hip_memory.cpp b/projects/hip/vdi/hip_memory.cpp new file mode 100644 index 0000000000..eb56b69bd2 --- /dev/null +++ b/projects/hip/vdi/hip_memory.cpp @@ -0,0 +1,2188 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "hip_conversions.hpp" +#include "platform/context.hpp" +#include "platform/command.hpp" +#include "platform/memory.hpp" + +amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { + amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); + if (memObj != nullptr) { + if (memObj->getSvmPtr() != nullptr) { + // SVM pointer + offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getSvmPtr()); + } else if (memObj->getHostMem() != nullptr) { + // Prepinned memory + offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getHostMem()); + } else { + ShouldNotReachHere(); + } + } + return memObj; +} + +hipError_t ihipFree(void *ptr) +{ + if (ptr == nullptr) { + return hipSuccess; + } + if (amd::SvmBuffer::malloced(ptr)) { + for (auto& dev : g_devices) { + amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); + if (queue != nullptr) { + queue->finish(); + } + hip::syncStreams(dev->deviceId()); + } + amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr); + return hipSuccess; + } + return hipErrorInvalidValue; +} + +hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) +{ + if (sizeBytes == 0) { + *ptr = nullptr; + return hipSuccess; + } + else if (ptr == nullptr) { + return hipErrorInvalidValue; + } + + amd::Context* amdContext = ((flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0)? + hip::host_device->asContext() : hip::getCurrentDevice()->asContext(); + + if (amdContext == nullptr) { + return hipErrorOutOfMemory; + } + + if (amdContext->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { + return hipErrorOutOfMemory; + } + + *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, amdContext->devices()[0]->info().memBaseAddrAlign_); + if (*ptr == nullptr) { + return hipErrorOutOfMemory; + } + ClPrint(amd::LOG_INFO, amd::LOG_API, "%-5d: [%zx] ihipMalloc ptr=0x%zx", getpid(),std::this_thread::get_id(), *ptr); + return hipSuccess; +} + +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + amd::HostQueue& queue, bool isAsync = false) { + if (sizeBytes == 0) { + // Skip if nothing needs writing. + return hipSuccess; + } + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + + size_t sOffset = 0; + amd::Memory *srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory *dstMemory = getMemoryObject(dst, dOffset); + amd::Device* queueDevice = &queue.device(); + + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (kind == hipMemcpyHostToHost)) { + queue.finish(); + memcpy(dst, src, sizeBytes); + return hipSuccess; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + amd::HostQueue* pQueue = &queue; + if (queueDevice != dstMemory->getContext().devices()[0]) { + pQueue = hip::getNullStream(dstMemory->getContext()); + waitList.push_back(queue.getLastQueuedCommand(true)); + } + command = new amd::WriteMemoryCommand(*pQueue, CL_COMMAND_WRITE_BUFFER, waitList, + *dstMemory->asBuffer(), dOffset, sizeBytes, src); + isAsync = false; + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + amd::HostQueue* pQueue = &queue; + if (queueDevice != srcMemory->getContext().devices()[0]) { + pQueue = hip::getNullStream(srcMemory->getContext()); + waitList.push_back(queue.getLastQueuedCommand(true)); + } + command = new amd::ReadMemoryCommand(*pQueue, CL_COMMAND_READ_BUFFER, waitList, + *srcMemory->asBuffer(), sOffset, sizeBytes, dst); + isAsync = false; + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + if (queueDevice != srcMemory->getContext().devices()[0]) { + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + return hipSuccess; + } + if (queueDevice != dstMemory->getContext().devices()[0]) { + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + return hipSuccess; + } + command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, dOffset, sizeBytes); + } + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + if (waitList.size() > 0) { + waitList[0]->release(); + } + + return hipSuccess; +} + +hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipExtMallocWithFlags, ptr, sizeBytes, flags); + + if (flags != hipDeviceMallocDefault && + flags != hipDeviceMallocFinegrained) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, (flags & hipDeviceMallocFinegrained)? CL_MEM_SVM_ATOMICS: 0)); +} + +hipError_t hipMalloc(void** ptr, size_t sizeBytes) { + HIP_INIT_API(hipMalloc, ptr, sizeBytes); + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, 0)); +} + +hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipHostMalloc, ptr, sizeBytes, flags); + + if (ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *ptr = nullptr; + + const unsigned int coherentFlags = hipHostMallocCoherent | hipHostMallocNonCoherent; + + // can't have both Coherent and NonCoherent flags set at the same time + if ((flags & coherentFlags) == coherentFlags) { + HIP_RETURN(hipErrorInvalidValue); + } + + unsigned int ihipFlags = CL_MEM_SVM_FINE_GRAIN_BUFFER | (flags << 16); + if (flags == 0 || + flags & (hipHostMallocCoherent | hipHostMallocMapped) || + (!(flags & hipHostMallocNonCoherent) && HIP_HOST_COHERENT)) { + ihipFlags |= CL_MEM_SVM_ATOMICS; + } + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, ihipFlags)); +} + +hipError_t hipMallocManaged(void** devPtr, size_t size, + unsigned int flags) { + HIP_INIT_API(hipMallocManaged, devPtr, size, flags); + + if (flags != hipMemAttachGlobal) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMalloc(devPtr, size, CL_MEM_SVM_FINE_GRAIN_BUFFER)); +} + +hipError_t hipFree(void* ptr) { + HIP_INIT_API(hipFree, ptr); + + HIP_RETURN(ihipFree(ptr)); +} + +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue)); +} + +hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyWithStream, dst, src, sizeBytes, kind, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue, false)); +} + +hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { + HIP_INIT_API(hipMemPtrGetInfo, ptr, size); + + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(ptr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *size = svmMem->getSize(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipHostFree(void* ptr) { + HIP_INIT_API(hipHostFree, ptr); + + HIP_RETURN(ihipFree(ptr)); +} + +hipError_t ihipArrayDestroy(hipArray* array) { + if (array == nullptr) { + return hipErrorInvalidValue; + } + + cl_mem memObj = reinterpret_cast(array->data); + if (is_valid(memObj) == false) { + return hipErrorInvalidValue; + } + for (auto& dev : g_devices) { + amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); + if (queue != nullptr) { + queue->finish(); + } + hip::syncStreams(dev->deviceId()); + } + as_amd(memObj)->release(); + + delete array; + + return hipSuccess; +} + +hipError_t hipFreeArray(hipArray* array) { + HIP_INIT_API(hipFreeArray, array); + + HIP_RETURN(ihipArrayDestroy(array)); +} + +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) { + HIP_INIT_API(hipMemGetAddressRange, pbase, psize, dptr); + + // Since we are using SVM buffer DevicePtr and HostPtr is the same + void* ptr = dptr; + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(ptr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + *pbase = svmMem->getSvmPtr(); + *psize = svmMem->getSize(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemGetInfo(size_t* free, size_t* total) { + HIP_INIT_API(hipMemGetInfo, free, total); + + size_t freeMemory[2]; + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + if(device == nullptr) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if(!device->globalFreeMemory(freeMemory)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *free = freeMemory[0] * Ki; + *total = device->info().globalMemSize_; + + HIP_RETURN(hipSuccess); +} + +hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth, + cl_mem_object_type imageType, const cl_image_format* image_format) { + + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + + if (ptr == nullptr) { + return hipErrorInvalidValue; + } + + if ((width == 0) || (height == 0)) { + *ptr = nullptr; + return hipSuccess; + } + + const amd::Image::Format imageFormat(*image_format); + + *pitch = amd::alignUp(width * imageFormat.getElementSize(), device->info().imagePitchAlignment_); + + size_t sizeBytes = *pitch * height * depth; + + if (device->info().maxMemAllocSize_ < sizeBytes) { + return hipErrorOutOfMemory; + } + + *ptr = amd::SvmBuffer::malloc(*hip::getCurrentDevice()->asContext(), 0, sizeBytes, + device->info().memBaseAddrAlign_); + + if (*ptr == nullptr) { + return hipErrorOutOfMemory; + } + + return hipSuccess; +} + + +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { + HIP_INIT_API(hipMallocPitch, ptr, pitch, width, height); + + const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; + HIP_RETURN(ihipMallocPitch(ptr, pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format)); +} + +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { + HIP_INIT_API(hipMalloc3D, pitchedDevPtr, extent); + + size_t pitch = 0; + + if (pitchedDevPtr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; + hipError_t status = hipSuccess; + status = ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth, + CL_MEM_OBJECT_IMAGE3D, &image_format); + + if (status == hipSuccess) { + pitchedDevPtr->pitch = pitch; + pitchedDevPtr->xsize = extent.width; + pitchedDevPtr->ysize = extent.height; + } + + HIP_RETURN(status); +} + +amd::Image* ihipImageCreate(const cl_channel_order channelOrder, + const cl_channel_type channelType, + const cl_mem_object_type imageType, + const size_t imageWidth, + const size_t imageHeight, + const size_t imageDepth, + const size_t imageArraySize, + const size_t imageRowPitch, + const size_t imageSlicePitch, + const uint32_t numMipLevels, + amd::Memory* buffer) { + const amd::Image::Format imageFormat({channelOrder, channelType}); + if (!imageFormat.isValid()) { + return nullptr; + } + + amd::Context& context = *hip::getCurrentDevice()->asContext(); + if (!imageFormat.isSupported(context, imageType)) { + return nullptr; + } + + const std::vector& devices = context.devices(); + if (!devices[0]->info().imageSupport_) { + return nullptr; + } + + if (!amd::Image::validateDimensions(devices, + imageType, + imageWidth, + imageHeight, + imageDepth, + imageArraySize)) { + return nullptr; + } + + // TODO validate the image descriptor. + + amd::Image* image = nullptr; + if (buffer != nullptr) { + switch (imageType) { + case CL_MEM_OBJECT_IMAGE1D_BUFFER: + case CL_MEM_OBJECT_IMAGE2D: + image = new (context) amd::Image(*buffer->asBuffer(), + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + (imageHeight == 0) ? 1 : imageHeight, + (imageDepth == 0) ? 1 : imageDepth, + imageRowPitch, + imageSlicePitch); + break; + default: + ShouldNotReachHere(); + } + } else { + switch (imageType) { + case CL_MEM_OBJECT_IMAGE1D: + case CL_MEM_OBJECT_IMAGE2D: + case CL_MEM_OBJECT_IMAGE3D: + image = new (context) amd::Image(context, + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + (imageHeight == 0) ? 1 : imageHeight, + (imageDepth == 0) ? 1 : imageDepth, + imageWidth * imageFormat.getElementSize(), /* row pitch */ + imageWidth * imageHeight * imageFormat.getElementSize(), /* slice pitch */ + numMipLevels); + break; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + image = new (context) amd::Image(context, + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + imageArraySize, + 1, /* image depth */ + imageWidth * imageFormat.getElementSize(), + imageWidth * imageHeight * imageFormat.getElementSize(), + numMipLevels); + break; + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + image = new (context) amd::Image(context, + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + imageHeight, + imageArraySize, + imageWidth * imageFormat.getElementSize(), + imageWidth * imageHeight * imageFormat.getElementSize(), + numMipLevels); + break; + default: + ShouldNotReachHere(); + } + } + + if (image == nullptr) { + return nullptr; + } + + if (!image->create(nullptr)) { + delete image; + return nullptr; + } + + return image; +} + +hipError_t ihipArrayCreate(hipArray** array, + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray, + unsigned int numMipmapLevels) { + // NumChannels specifies the number of packed components per HIP array element; it may be 1, 2, or 4; + if ((pAllocateArray->NumChannels != 1) && + (pAllocateArray->NumChannels != 2) && + (pAllocateArray->NumChannels != 4)) { + return hipErrorInvalidValue; + } + + if ((pAllocateArray->Flags & hipArraySurfaceLoadStore) || + (pAllocateArray->Flags & hipArrayCubemap) || + (pAllocateArray->Flags & hipArrayTextureGather)) { + return hipErrorNotSupported; + } + + const cl_channel_order channelOrder = hip::getCLChannelOrder(pAllocateArray->NumChannels, 0); + const cl_channel_type channelType = hip::getCLChannelType(pAllocateArray->Format, hipReadModeElementType); + const cl_mem_object_type imageType = hip::getCLMemObjectType(pAllocateArray->Width, + pAllocateArray->Height, + pAllocateArray->Depth, + pAllocateArray->Flags); + + amd::Image* image = ihipImageCreate(channelOrder, + channelType, + imageType, + pAllocateArray->Width, + pAllocateArray->Height, + pAllocateArray->Depth, + // The number of layers is determined by the depth extent. + pAllocateArray->Depth, /* array size */ + 0, /* row pitch */ + 0, /* slice pitch */ + numMipmapLevels, + nullptr /* buffer */); + + if (image == nullptr) { + return hipErrorInvalidValue; + } + + cl_mem memObj = as_cl(image); + *array = new hipArray{reinterpret_cast(memObj)}; + + // It is UB to call hipGet*() on an array created via hipArrayCreate()/hipArray3DCreate(). + // This is due to hip not differentiating between runtime and driver types. + // TODO change the hipArray struct in driver_types.h. + (*array)->desc = hip::getChannelFormatDesc(pAllocateArray->NumChannels, pAllocateArray->Format); + (*array)->width = pAllocateArray->Width; + (*array)->height = pAllocateArray->Height; + (*array)->depth = pAllocateArray->Depth; + (*array)->Format = pAllocateArray->Format; + (*array)->NumChannels = pAllocateArray->NumChannels; + + return hipSuccess; +} + +hipError_t hipArrayCreate(hipArray** array, + const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { + HIP_INIT_API(hipArrayCreate, array, pAllocateArray); + + HIP_ARRAY3D_DESCRIPTOR desc = {pAllocateArray->Width, + pAllocateArray->Height, + 0, /* Depth */ + pAllocateArray->Format, + pAllocateArray->NumChannels, + hipArrayDefault /* Flags */}; + + HIP_RETURN(ihipArrayCreate(array, &desc, 0)); +} + + +hipError_t hipMallocArray(hipArray** array, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + unsigned int flags) { + HIP_INIT_API(hipMallocArray, array, desc, width, height, flags); + + HIP_ARRAY3D_DESCRIPTOR allocateArray = {width, + height, + 0, /* Depth */ + hip::getArrayFormat(*desc), + hip::getNumChannels(*desc), + flags}; + + HIP_RETURN(ihipArrayCreate(array, &allocateArray, 0 /* numMipLevels */)); +} + +hipError_t hipArray3DCreate(hipArray** array, + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray) { + HIP_INIT_API(hipArray3DCreate, array, pAllocateArray); + + HIP_RETURN(ihipArrayCreate(array, pAllocateArray, 0 /* numMipLevels */)); +} + +hipError_t hipMalloc3DArray(hipArray_t* array, + const hipChannelFormatDesc* desc, + hipExtent extent, + unsigned int flags) { + HIP_INIT_API(hipMalloc3DArray, array, desc, extent, flags); + + HIP_ARRAY3D_DESCRIPTOR allocateArray = {extent.width, + extent.height, + extent.depth, + hip::getArrayFormat(*desc), + hip::getNumChannels(*desc), + flags}; + + HIP_RETURN(ihipArrayCreate(array, &allocateArray, 0)); +} + +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { + HIP_INIT_API(hipHostGetFlags, flagsPtr, hostPtr); + + if (flagsPtr == nullptr || + hostPtr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(hostPtr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *flagsPtr = svmMem->getMemFlags() >> 16; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipHostRegister, hostPtr, sizeBytes, flags); + if(hostPtr != nullptr) { + amd::Memory* mem = new (*hip::host_device->asContext()) amd::Buffer(*hip::host_device->asContext(), CL_MEM_USE_HOST_PTR | CL_MEM_SVM_ATOMICS, sizeBytes); + + constexpr bool sysMemAlloc = false; + constexpr bool skipAlloc = false; + constexpr bool forceAlloc = true; + if (!mem->create(hostPtr, sysMemAlloc, skipAlloc, forceAlloc)) { + mem->release(); + HIP_RETURN(hipErrorOutOfMemory); + } + + for (const auto& device: hip::getCurrentDevice()->devices()) { + // Since the amd::Memory object is shared between all devices + // it's fine to have multiple addresses mapped to it + const device::Memory* devMem = mem->getDeviceMemory(*device); + amd::MemObjMap::AddMemObj(reinterpret_cast(devMem->virtualAddress()), mem); + } + + amd::MemObjMap::AddMemObj(hostPtr, mem); + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(ihipMalloc(&hostPtr, sizeBytes, flags)); + } +} + +hipError_t hipHostUnregister(void* hostPtr) { + HIP_INIT_API(hipHostUnregister, hostPtr); + + for (auto& dev : g_devices) { + amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); + if (queue != nullptr) { + queue->finish(); + } + hip::syncStreams(dev->deviceId()); + } + + if (amd::SvmBuffer::malloced(hostPtr)) { + amd::SvmBuffer::free(*hip::host_device->asContext(), hostPtr); + HIP_RETURN(hipSuccess); + } else { + size_t offset = 0; + amd::Memory* mem = getMemoryObject(hostPtr, offset); + + if(mem) { + for (const auto& device: hip::getCurrentDevice()->devices()) { + const device::Memory* devMem = mem->getDeviceMemory(*device); + amd::MemObjMap::RemoveMemObj(reinterpret_cast(devMem->virtualAddress())); + } + amd::MemObjMap::RemoveMemObj(hostPtr); + mem->release(); + HIP_RETURN(hipSuccess); + } + } + + HIP_RETURN(hipErrorInvalidValue); +} + +// Deprecated function: +hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_RETURN(ihipMalloc(ptr, sizeBytes, flags)); +}; + + +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t count, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyToSymbol, symbol, src, count, offset, kind); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpy(device_ptr, src, count, kind)); +} + +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyFromSymbol, symbol, dst, count, offset, kind); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpy(dst, device_ptr, count, kind)); +} + +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t count, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyToSymbolAsync, symbol, src, count, offset, kind, stream); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpyAsync(device_ptr, src, count, kind, stream)); +} + +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyFromSymbolAsync, symbol, dst, count, offset, kind, stream); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpyAsync(dst, device_ptr, count, kind, stream)); +} + +hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, + void* srcHost, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyHtoD, dstDevice, srcHost, ByteCount); + + HIP_RETURN(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *hip::getQueue(nullptr))); +} + +hipError_t hipMemcpyDtoH(void* dstHost, + hipDeviceptr_t srcDevice, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyDtoH, dstHost, srcDevice, ByteCount); + + HIP_RETURN(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *hip::getQueue(nullptr))); +} + +hipError_t hipMemcpyDtoD(hipDeviceptr_t dstDevice, + hipDeviceptr_t srcDevice, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyDtoD, dstDevice, srcDevice, ByteCount); + + HIP_RETURN(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *hip::getQueue(nullptr))); +} + +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyAsync, dst, src, sizeBytes, kind, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue, true)); +} + +hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, + void* srcHost, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyHtoDAsync, dstDevice, srcHost, ByteCount, stream); + + HIP_RETURN(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *hip::getQueue(stream), true)); +} + +hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, + hipDeviceptr_t srcDevice, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyDtoDAsync, dstDevice, srcDevice, ByteCount, stream); + + HIP_RETURN(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *hip::getQueue(stream), true)); +} + +hipError_t hipMemcpyDtoHAsync(void* dstHost, + hipDeviceptr_t srcDevice, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyDtoHAsync, dstHost, srcDevice, ByteCount, stream); + + HIP_RETURN(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *hip::getQueue(stream), true)); +} + +hipError_t ihipMemcpyAtoD(hipArray* srcArray, + void* dstDevice, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t dstRowPitch, + size_t dstSlicePitch, + hipStream_t stream, + bool isAsync = false) { + cl_mem srcMemObj = reinterpret_cast(srcArray->data); + if (is_valid(srcMemObj) == false) { + return hipErrorInvalidValue; + } + + amd::Image* srcImage = as_amd(srcMemObj)->asImage(); + size_t dstOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstDevice, dstOffset); + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcImage->getRowPitch(), srcImage->getSlicePitch())) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + dstRect.start_ += dstOffset; + dstRect.end_ += dstOffset; + + const size_t copySizeInBytes = copyRegion[0] * copyRegion[1] * copyRegion[2] * srcImage->getImageFormat().getElementSize(); + if (!srcImage->validateRegion(srcOrigin, copyRegion) || + !dstMemory->validateRegion(dstOrigin, {copySizeInBytes, 0, 0})) { + return hipErrorInvalidValue; + } + + amd::CopyMemoryCommand* command = new amd::CopyMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_COPY_IMAGE_TO_BUFFER, + amd::Command::EventWaitList{}, + *srcImage, + *dstMemory, + srcOrigin, + dstOrigin, + copyRegion, + srcRect, + dstRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyDtoA(void* srcDevice, + hipArray* dstArray, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + hipStream_t stream, + bool isAsync = false) { + cl_mem dstMemObj = reinterpret_cast(dstArray->data); + if (is_valid(dstMemObj) == false) { + return hipErrorInvalidValue; + } + + size_t srcOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcDevice, srcOffset); + amd::Image* dstImage = as_amd(dstMemObj)->asImage(); + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + srcRect.start_ += srcOffset; + srcRect.end_ += srcOffset; + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstImage->getRowPitch(), dstImage->getSlicePitch())) { + return hipErrorInvalidValue; + } + + const size_t copySizeInBytes = copyRegion[0] * copyRegion[1] * copyRegion[2] * dstImage->getImageFormat().getElementSize(); + if (!srcMemory->validateRegion(srcOrigin, {copySizeInBytes, 0, 0}) || + !dstImage->validateRegion(dstOrigin, copyRegion)) { + return hipErrorInvalidValue; + } + + amd::CopyMemoryCommand* command = new amd::CopyMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_COPY_BUFFER_TO_IMAGE, + amd::Command::EventWaitList{}, + *srcMemory, + *dstImage, + srcOrigin, + dstOrigin, + copyRegion, + srcRect, + dstRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyDtoD(void* srcDevice, + void* dstDevice, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + size_t dstRowPitch, + size_t dstSlicePitch, + hipStream_t stream, + bool isAsync = false) { + size_t srcOffset = 0; + amd::Memory *srcMemory = getMemoryObject(srcDevice, srcOffset); + size_t dstOffset = 0; + amd::Memory *dstMemory = getMemoryObject(dstDevice, dstOffset); + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + srcRect.start_ += srcOffset; + srcRect.end_ += srcOffset; + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D srcSize(srcRect.end_ - srcRect.start_, 1, 1); + if (!srcMemory->validateRegion(srcStart, srcSize)) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + dstRect.start_ += dstOffset; + dstRect.end_ += dstOffset; + + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D dstSize(dstRect.end_ - dstRect.start_, 1, 1); + if (!dstMemory->validateRegion(dstStart, dstSize)) { + return hipErrorInvalidValue; + } + + amd::CopyMemoryCommand* command = new amd::CopyMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_COPY_BUFFER_RECT, + amd::Command::EventWaitList{}, + *srcMemory, + *dstMemory, + srcStart, + dstStart, + copyRegion, + srcRect, + dstRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyDtoH(void* srcDevice, + void* dstHost, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + size_t dstRowPitch, + size_t dstSlicePitch, + hipStream_t stream, + bool isAsync = false) { + size_t srcOffset = 0; + amd::Memory *srcMemory = getMemoryObject(srcDevice, srcOffset); + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + srcRect.start_ += srcOffset; + srcRect.end_ += srcOffset; + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D srcSize(srcRect.end_ - srcRect.start_, 1, 1); + if (!srcMemory->validateRegion(srcStart, srcSize)) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + + amd::ReadMemoryCommand* command = new amd::ReadMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_READ_BUFFER_RECT, + amd::Command::EventWaitList{}, + *srcMemory, + srcStart, + copyRegion, + dstHost, + srcRect, + dstRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyHtoD(const void* srcHost, + void* dstDevice, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + size_t dstRowPitch, + size_t dstSlicePitch, + hipStream_t stream, + bool isAsync = false) { + size_t dstOffset = 0; + amd::Memory *dstMemory = getMemoryObject(dstDevice, dstOffset); + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + dstRect.start_ += dstOffset; + dstRect.end_ += dstOffset; + + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D dstSize(dstRect.end_ - dstRect.start_, 1, 1); + if (!dstMemory->validateRegion(dstStart, dstSize)) { + return hipErrorInvalidValue; + } + + amd::WriteMemoryCommand* command = new amd::WriteMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_WRITE_BUFFER_RECT, + amd::Command::EventWaitList{}, + *dstMemory, + dstStart, + copyRegion, + srcHost, + dstRect, + srcRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyHtoH(const void* srcHost, + void* dstHost, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + size_t dstRowPitch, + size_t dstSlicePitch) { + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + + for (size_t slice = 0; slice < copyRegion[2]; slice++) { + for (size_t row = 0; row < copyRegion[1]; row++) { + const void* srcRow = static_cast(srcHost) + srcRect.start_ + row * srcRect.rowPitch_ + slice * srcRect.slicePitch_; + void* dstRow = static_cast(dstHost) + dstRect.start_ + row * dstRect.rowPitch_ + slice * dstRect.slicePitch_; + std::memcpy(dstRow, srcRow, copyRegion[0]); + } + } + + return hipSuccess; +} + +hipError_t ihipMemcpyAtoA(hipArray* srcArray, + hipArray* dstArray, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + hipStream_t stream, + bool isAsync = false) { + cl_mem srcMemObj = reinterpret_cast(srcArray->data); + cl_mem dstMemObj = reinterpret_cast(dstArray->data); + if (!is_valid(srcMemObj) || !is_valid(dstMemObj)) { + return hipErrorInvalidValue; + } + + amd::Image* srcImage = as_amd(srcMemObj)->asImage(); + amd::Image* dstImage = as_amd(dstMemObj)->asImage(); + + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + // Note that src and dst should have the same element size. + assert(srcImage->getImageFormat().getElementSize() == dstImage->getImageFormat().getElementSize()); + const size_t elementSize = srcImage->getImageFormat().getElementSize(); + static_cast(srcOrigin)[0] /= elementSize; + static_cast(dstOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + if (!srcImage->validateRegion(srcOrigin, copyRegion) || + !dstImage->validateRegion(dstOrigin, copyRegion)) { + return hipErrorInvalidValue; + } + + amd::CopyMemoryCommand* command = new amd::CopyMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_COPY_IMAGE, + amd::Command::EventWaitList{}, + *srcImage, + *dstImage, + srcOrigin, + dstOrigin, + copyRegion); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyHtoA(const void* srcHost, + hipArray* dstArray, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + hipStream_t stream, + bool isAsync = false) { + if (srcHost == nullptr) { + return hipErrorInvalidValue; + } + + cl_mem dstMemObj = reinterpret_cast(dstArray->data); + if (is_valid(dstMemObj) == false) { + return hipErrorInvalidValue; + } + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + + amd::Image* dstImage = as_amd(dstMemObj)->asImage(); + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + const size_t elementSize = dstImage->getImageFormat().getElementSize(); + static_cast(dstOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + if (!dstImage->validateRegion(dstOrigin, copyRegion)) { + return hipErrorInvalidValue; + } + + amd::WriteMemoryCommand* command = new amd::WriteMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_WRITE_IMAGE, + amd::Command::EventWaitList{}, + *dstImage, + dstOrigin, + copyRegion, + static_cast(srcHost) + srcRect.start_, + srcRowPitch, + srcSlicePitch); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyAtoH(hipArray* srcArray, + void* dstHost, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t dstRowPitch, + size_t dstSlicePitch, + hipStream_t stream, + bool isAsync = false) { + cl_mem srcMemObj = reinterpret_cast(srcArray->data); + if (!is_valid(srcMemObj)) { + return hipErrorInvalidValue; + } + + if (dstHost == nullptr) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + + + amd::Image* srcImage = as_amd(srcMemObj)->asImage(); + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + const size_t elementSize = srcImage->getImageFormat().getElementSize(); + static_cast(srcOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + if (!srcImage->validateRegion(srcOrigin, copyRegion) || + !srcImage->isRowSliceValid(dstRowPitch, dstSlicePitch, copyRegion[0], copyRegion[1])) { + return hipErrorInvalidValue; + } + + amd::ReadMemoryCommand* command = new amd::ReadMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_READ_IMAGE, + amd::Command::EventWaitList{}, + *srcImage, + srcOrigin, + copyRegion, + static_cast(dstHost) + dstRect.start_, + dstRowPitch, + dstSlicePitch); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyParam3D(const HIP_MEMCPY3D* pCopy, + hipStream_t stream, + bool isAsync = false) { + // If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the (unified virtual address space) + // base address of the source data and the bytes per row to apply. {src/dst}Array is ignored. + hipMemoryType srcMemoryType = pCopy->srcMemoryType; + if (srcMemoryType == hipMemoryTypeUnified) { + srcMemoryType = amd::MemObjMap::FindMemObj(pCopy->srcDevice) ? hipMemoryTypeDevice : hipMemoryTypeHost; + } + hipMemoryType dstMemoryType = pCopy->dstMemoryType; + if (dstMemoryType == hipMemoryTypeUnified) { + dstMemoryType = amd::MemObjMap::FindMemObj(pCopy->dstDevice) ? hipMemoryTypeDevice : hipMemoryTypeHost; + } + + amd::Coord3D srcOrigin = {pCopy->srcXInBytes, pCopy->srcY, pCopy->srcZ}; + amd::Coord3D dstOrigin = {pCopy->dstXInBytes, pCopy->dstY, pCopy->dstZ}; + amd::Coord3D copyRegion = {pCopy->WidthInBytes, (pCopy->Height != 0) ? pCopy->Height : 1, (pCopy->Depth != 0) ? pCopy->Depth : 1}; + + if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeHost)) { + // Host to Host. + return ihipMemcpyHtoH(pCopy->srcHost, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight); + } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeDevice)) { + // Host to Device. + return ihipMemcpyHtoD(pCopy->srcHost, pCopy->dstDevice, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) { + // Device to Host. + return ihipMemcpyDtoH(pCopy->srcDevice, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) { + // Device to Device. + return ihipMemcpyDtoD(pCopy->srcDevice, pCopy->dstDevice, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) { + // Host to Image. + return ihipMemcpyHtoA(pCopy->srcHost, pCopy->dstArray, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) { + // Image to Host. + return ihipMemcpyAtoH(pCopy->srcArray, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) { + // Device to Image. + return ihipMemcpyDtoA(pCopy->srcDevice, pCopy->dstArray, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) { + // Image to Device. + return ihipMemcpyAtoD(pCopy->srcArray, pCopy->dstDevice, srcOrigin, dstOrigin, copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) { + // Image to Image. + return ihipMemcpyAtoA(pCopy->srcArray, pCopy->dstArray, srcOrigin, dstOrigin, copyRegion, stream, isAsync); + } else { + ShouldNotReachHere(); + } + + return hipSuccess; +} + +hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, + hipStream_t stream, + bool isAsync = false) { + HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*pCopy); + + return ihipMemcpyParam3D(&desc, stream, isAsync); +} + +hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = 0; + desc.srcY = 0; + desc.srcMemoryType = std::get<0>(hip::getMemoryType(kind)); + desc.srcHost = src; + desc.srcDevice = const_cast(src); + desc.srcArray = nullptr; // Ignored. + desc.srcPitch = spitch; + + desc.dstXInBytes = 0; + desc.dstY = 0; + desc.dstMemoryType = std::get<1>(hip::getMemoryType(kind)); + desc.dstHost = dst; + desc.dstDevice = dst; + desc.dstArray = nullptr; // Ignored. + desc.dstPitch = dpitch; + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { + HIP_INIT_API(hipMemcpyParam2D, pCopy); + + HIP_RETURN(ihipMemcpyParam2D(pCopy, nullptr)); +} + +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2D, dst, dpitch, src, spitch, width, height, kind); + + HIP_RETURN(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, nullptr)); +} + +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DAsync, dst, dpitch, src, spitch, width, height, kind, stream); + + HIP_RETURN(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, stream, true)); +} + +hipError_t ihipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = 0; + desc.srcY = 0; + desc.srcMemoryType = std::get<0>(hip::getMemoryType(kind)); + desc.srcHost = const_cast(src); + desc.srcDevice = const_cast(src); + desc.srcArray = nullptr; + desc.srcPitch = spitch; + + desc.dstXInBytes = wOffset; + desc.dstY = hOffset; + desc.dstMemoryType = hipMemoryTypeArray; + desc.dstHost = nullptr; + desc.dstDevice = nullptr; + desc.dstArray = dst; + desc.dstPitch = 0; // Ignored. + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind); + + HIP_RETURN(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, height, kind, nullptr)); +} + +hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyToArray, dst, wOffset, hOffset, src, count, kind); + + if (dst == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (dst->height != 0) ? dst->height : 1; + const size_t witdthInBytes = count / arrayHeight; + + const size_t height = (count / dst->width) / hip::getElementSize(dst); + + HIP_RETURN(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, 0 /* spitch */, witdthInBytes, height, kind, nullptr)); +} + +hipError_t ihipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = wOffsetSrc; + desc.srcY = hOffsetSrc; + desc.srcMemoryType = hipMemoryTypeArray; + desc.srcHost = nullptr; + desc.srcDevice = nullptr; + desc.srcArray = const_cast(src); + desc.srcPitch = 0; // Ignored. + + desc.dstXInBytes = 0; + desc.dstY = 0; + desc.dstMemoryType = std::get<1>(hip::getMemoryType(kind)); + desc.dstHost = dst; + desc.dstDevice = dst; + desc.dstArray = nullptr; + desc.dstPitch = dpitch; + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyFromArray, dst, src, wOffsetSrc, hOffset, count, kind); + + if (src == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (src->height != 0) ? src->height : 1; + const size_t witdthInBytes = count / arrayHeight; + + const size_t height = (count / src->width) / hip::getElementSize(src); + + HIP_RETURN(ihipMemcpy2DFromArray(dst, 0 /* dpitch */, src, wOffsetSrc, hOffset, witdthInBytes, height, kind, nullptr)); +} + +hipError_t hipMemcpyHtoA(hipArray* dstArray, + size_t dstOffset, + const void* srcHost, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyHtoA, dstArray, dstOffset, srcHost, ByteCount); + + HIP_RETURN(ihipMemcpyHtoA(srcHost, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t hipMemcpyAtoH(void* dstHost, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyAtoH, dstHost, srcArray, srcOffset, ByteCount); + + HIP_RETURN(ihipMemcpyAtoH(srcArray, dstHost, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, + hipStream_t stream, + bool isAsync = false) { + // The struct passed to hipMemcpy3D() must specify one of srcArray or srcPtr and one of dstArray or dstPtr. + // Passing more than one non-zero source or destination will cause hipMemcpy3D() to return an error. + if (((p->srcArray != nullptr) && (p->srcPtr.ptr != nullptr)) || + ((p->dstArray != nullptr) && (p->dstPtr.ptr != nullptr))) { + return hipErrorInvalidValue; + } + + // If the source and destination are both arrays, hipMemcpy3D() will return an error if they do not have the same element size. + if (((p->srcArray != nullptr) && (p->dstArray != nullptr)) && + (hip::getElementSize(p->dstArray) != hip::getElementSize(p->dstArray))) { + return hipErrorInvalidValue; + } + + const HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*p); + + return ihipMemcpyParam3D(&desc, stream, isAsync); +} + +hipError_t hipMemcpy3D(const hipMemcpy3DParms* p) { + HIP_INIT_API(hipMemcpy3D, p); + + HIP_RETURN(ihipMemcpy3D(p, nullptr)); +} + +hipError_t hipMemcpy3DAsync(const hipMemcpy3DParms* p, hipStream_t stream) { + HIP_INIT_API(hipMemcpy3DAsync, p, stream); + + HIP_RETURN(ihipMemcpy3D(p, stream, true)); +} + +hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy) { + HIP_INIT_API(hipDrvMemcpy3D, pCopy); + + HIP_RETURN(ihipMemcpyParam3D(pCopy, nullptr)); +} + +hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) { + HIP_INIT_API(hipDrvMemcpy3DAsync, pCopy, stream); + + HIP_RETURN(ihipMemcpyParam3D(pCopy, stream, true)); +} + +hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, + hipStream_t stream, bool isAsync = false) { + if (sizeBytes == 0) { + // Skip if nothing needs filling. + return hipSuccess; + } + + if (dst == nullptr) { + return hipErrorInvalidValue; + } + + size_t offset = 0; + amd::HostQueue* queue = hip::getQueue(stream); + amd::Memory* memory = getMemoryObject(dst, offset); + + if (memory != nullptr) { + // Device memory + amd::Command::EventWaitList waitList; + amd::Coord3D fillOffset(offset, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, valueSize, fillOffset, fillSize); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + } else { + // Host alloced memory + memset(dst, value, sizeBytes); + } + + return hipSuccess; +} + +hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { + HIP_INIT_API(hipMemset, dst, value, sizeBytes); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, nullptr)); +} + +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(hipMemsetAsync, dst, value, sizeBytes, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, stream, true)); +} + +hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t count) { + HIP_INIT_API(hipMemsetD8, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), count * sizeof(int8_t), nullptr)); +} + +hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD8Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), count * sizeof(int8_t), stream, true)); +} + +hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t count) { + HIP_INIT_API(hipMemsetD16, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int16_t), count * sizeof(int16_t), nullptr)); +} + +hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD16Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int16_t), count * sizeof(int16_t), stream, true)); +} + +hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { + HIP_INIT_API(hipMemsetD32, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int32_t), count * sizeof(int32_t), nullptr)); +} + +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD32Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int32_t), count * sizeof(int32_t), stream, true)); +} + +hipError_t ihipMemset3D(hipPitchedPtr pitchedDevPtr, + int value, + hipExtent extent, + hipStream_t stream, + bool isAsync = false) { + if (pitchedDevPtr.pitch == extent.width) { + return ihipMemset(pitchedDevPtr.ptr, value, sizeof(int8_t), extent.width * extent.height * extent.depth, stream, isAsync); + } + + // Workaround for cases when pitch > row untill fill kernel will be updated to support pitch. + // Fallback to filling one row at a time. + + amd::HostQueue* queue = hip::getQueue(stream); + + size_t offset = 0; + amd::Memory* memory = getMemoryObject(pitchedDevPtr.ptr, offset); + + amd::Coord3D origin(offset); + amd::Coord3D region(pitchedDevPtr.xsize, pitchedDevPtr.ysize, extent.depth); + amd::BufferRect rect; + if (!rect.create(static_cast(origin), static_cast(region), pitchedDevPtr.pitch, 0)) { + return hipErrorInvalidValue; + } + + if (memory != nullptr) { + std::vector commands; + + for (size_t slice = 0; slice < extent.depth; slice++) { + for (size_t row = 0; row < extent.height; row++) { + const size_t rowOffset = rect.offset(0, row, slice); + amd::FillMemoryCommand* command = new amd::FillMemoryCommand(*queue, + CL_COMMAND_FILL_BUFFER, + amd::Command::EventWaitList{}, + *memory->asBuffer(), + &value, + sizeof(int8_t), + amd::Coord3D{rowOffset, 0, 0}, + amd::Coord3D{extent.width, 1, 1}); + + command->enqueue(); + commands.push_back(command); + } + } + + for (auto &command: commands) { + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + } + } else { + for (size_t slice = 0; slice < extent.depth; slice++) { + for (size_t row = 0; row < extent.height; row++) { + const size_t rowOffset = rect.offset(0, row, slice); + std::memset(pitchedDevPtr.ptr, value, extent.width); + } + } + } + + return hipSuccess; +} + +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { + HIP_INIT_API(hipMemset2D, dst, pitch, value, width, height); + + HIP_RETURN(ihipMemset3D({dst, pitch, width, height}, value, {width, height, 1}, nullptr)); +} + +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, + size_t width, size_t height, hipStream_t stream) { + HIP_INIT_API(hipMemset2DAsync, dst, pitch, value, width, height, stream); + + HIP_RETURN(ihipMemset3D({dst, pitch, width, height}, value, {width, height, 1}, stream, true)); +} + +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { + HIP_INIT_API(hipMemset3D, pitchedDevPtr, value, extent); + + HIP_RETURN(ihipMemset3D(pitchedDevPtr, value, extent, nullptr)); +} + +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream) { + HIP_INIT_API(hipMemset3DAsync, pitchedDevPtr, value, extent, stream); + + HIP_RETURN(ihipMemset3D(pitchedDevPtr, value, extent, stream, false)); +} + +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, + size_t height, unsigned int elementSizeBytes) { + HIP_INIT_API(hipMemAllocPitch, dptr, pitch, widthInBytes, height, elementSizeBytes); + + HIP_RETURN(hipMallocPitch(dptr, pitch, widthInBytes, height)); +} + +hipError_t hipMemAllocHost(void** ptr, size_t size) { + HIP_INIT_API(hipMemAllocHost, ptr, size); + + HIP_RETURN(hipHostMalloc(ptr, size, 0)); +} + +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) { + HIP_INIT_API(hipIpcGetMemHandle, handle, dev_ptr); + + size_t offset = 0; + amd::Memory* amd_mem_obj = nullptr; + device::Memory* dev_mem_obj = nullptr; + ihipIpcMemHandle_t* ihandle = nullptr; + + if ((handle == nullptr) || (dev_ptr == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get AMD::Memory object corresponding to this pointer */ + amd_mem_obj = getMemoryObject(dev_ptr, offset); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Get Device::Memory object pointer */ + dev_mem_obj = amd_mem_obj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0],false); + if (dev_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Create an handle for IPC. Store the memory size inside the handle */ + ihandle = reinterpret_cast(handle); + dev_mem_obj->IpcCreate(offset, &(ihandle->psize), &(ihandle->ipc_handle)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipIpcOpenMemHandle(void** dev_ptr, hipIpcMemHandle_t handle, unsigned int flags) { + HIP_INIT_API(hipIpcOpenMemHandle, dev_ptr, &handle, flags); + + amd::Memory* amd_mem_obj = nullptr; + amd::Device* device = nullptr; + ihipIpcMemHandle_t* ihandle = nullptr; + + if (dev_ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Call the IPC Attach from Device class */ + device = hip::getCurrentDevice()->devices()[0]; + ihandle = reinterpret_cast(&handle); + + amd_mem_obj = device->IpcAttach(&(ihandle->ipc_handle), ihandle->psize, flags, dev_ptr); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Add the memory to the MemObjMap */ + amd::MemObjMap::AddMemObj(*dev_ptr, amd_mem_obj); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipIpcCloseMemHandle(void* dev_ptr) { + HIP_INIT_API(hipIpcCloseMemHandle, dev_ptr); + + size_t offset = 0; + amd::Device* device = nullptr; + amd::Memory* amd_mem_obj = nullptr; + + hip::syncStreams(); + hip::getNullStream()->finish(); + + if (dev_ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get the amd::Memory object */ + amd_mem_obj = getMemoryObject(dev_ptr, offset); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Call IPC Detach from Device class */ + device = hip::getCurrentDevice()->devices()[0]; + if (device == nullptr) { + HIP_RETURN(hipErrorNoDevice); + } + + /* Remove the memory from MemObjMap */ + amd::MemObjMap::RemoveMemObj(amd_mem_obj); + + /* detach the memory */ + device->IpcDetach(*amd_mem_obj); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) { + HIP_INIT_API(hipHostGetDevicePointer, devicePointer, hostPointer, flags); + + size_t offset = 0; + + amd::Memory* memObj = getMemoryObject(hostPointer, offset); + if (!memObj) { + HIP_RETURN(hipErrorInvalidValue); + } +*devicePointer = reinterpret_cast(memObj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0])->virtualAddress() + offset); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { + HIP_INIT_API(hipPointerGetAttributes, attributes, ptr); + + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(ptr, offset); + int device = 0; + + if (memObj != nullptr) { + attributes->memoryType = (CL_MEM_SVM_FINE_GRAIN_BUFFER & memObj->getMemFlags())? hipMemoryTypeHost : hipMemoryTypeDevice; + attributes->hostPointer = memObj->getSvmPtr(); + attributes->devicePointer = memObj->getSvmPtr(); + attributes->isManaged = 0; + attributes->allocationFlags = memObj->getMemFlags() >> 16; + + amd::Context* memObjCtx = &memObj->getContext(); + if (hip::host_device->asContext() == memObjCtx) { + attributes->device = ihipGetDevice(); + HIP_RETURN(hipSuccess); + } + for (auto& ctx : g_devices) { + if (ctx->asContext() == memObjCtx) { + attributes->device = device; + HIP_RETURN(hipSuccess); + } + ++device; + } + HIP_RETURN(hipErrorInvalidDevice); + } + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipArrayDestroy(hipArray* array) { + HIP_INIT_API(hipArrayDestroy, array); + + HIP_RETURN(ihipArrayDestroy(array)); +} + +hipError_t hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, + hipArray* array) { + HIP_INIT_API(hipArray3DGetDescriptor, pArrayDescriptor, array); + + assert(false && "Unimplemented"); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, + hipArray* array) { + HIP_INIT_API(hipArrayGetDescriptor, pArrayDescriptor, array); + + assert(false && "Unimplemented"); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyParam2D, pCopy); + + HIP_RETURN(ihipMemcpyParam2D(pCopy, stream, true)); +} + +hipError_t ihipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = wOffsetSrc; + desc.srcY = hOffsetSrc; + desc.srcMemoryType = hipMemoryTypeArray; + desc.srcHost = nullptr; + desc.srcDevice = nullptr; + desc.srcArray = const_cast(src); + desc.srcPitch = 0; // Ignored. + + desc.dstXInBytes = wOffsetDst; + desc.dstY = hOffsetDst; + desc.dstMemoryType = hipMemoryTypeArray; + desc.dstHost = nullptr; + desc.dstDevice = nullptr; + desc.dstArray = dst; + desc.dstPitch = 0; // Ignored. + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DArrayToArray, dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind); + + HIP_RETURN(ihipMemcpy2DArrayToArray(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind, nullptr)); +} + +hipError_t hipMemcpyArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyArrayToArray, dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind); + + HIP_RETURN(ihipMemcpy2DArrayToArray(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind, nullptr)); +} + +hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DFromArray, dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind); + + HIP_RETURN(ihipMemcpy2DFromArray(dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind, nullptr)); +} + +hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DFromArrayAsync, dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, height, kind, stream); + + HIP_RETURN(ihipMemcpy2DFromArray(dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, height, kind, stream, true)); +} + +hipError_t hipMemcpyFromArrayAsync(void* dst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyFromArrayAsync, dst, src, wOffsetSrc, hOffsetSrc, count, kind, stream); + + if (src == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (src->height != 0) ? src->height : 1; + const size_t widthInBytes = count / arrayHeight; + + const size_t height = (count / src->width) / hip::getElementSize(src); + + HIP_RETURN(ihipMemcpy2DFromArray(dst, 0 /* dpitch */, src, wOffsetSrc, hOffsetSrc, widthInBytes, height, kind, stream, true)); +} + +hipError_t hipMemcpy2DToArrayAsync(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DToArrayAsync, dst, wOffset, hOffset, src, spitch, width, height, kind); + + HIP_RETURN(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, height, kind, stream, true)); +} + +hipError_t hipMemcpyToArrayAsync(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyToArrayAsync, dst, wOffset, hOffset, src, count, kind); + + if (dst == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (dst->height != 0) ? dst->height : 1; + const size_t widthInBytes = count / arrayHeight; + + const size_t height = (count / dst->width) / hip::getElementSize(dst); + + HIP_RETURN(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, 0 /* spitch */, widthInBytes, height, kind, stream, true)); +} + +hipError_t hipMemcpyAtoA(hipArray* dstArray, + size_t dstOffset, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyAtoA, dstArray, dstOffset, srcArray, srcOffset, ByteCount); + + HIP_RETURN(ihipMemcpyAtoA(srcArray, dstArray, {srcOffset, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, nullptr)); +} + +hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyAtoD, dstDevice, srcArray, srcOffset, ByteCount); + + HIP_RETURN(ihipMemcpyAtoD(srcArray, dstDevice, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t hipMemcpyAtoHAsync(void* dstHost, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyAtoHAsync, dstHost, srcArray, srcOffset, ByteCount, stream); + + HIP_RETURN(ihipMemcpyAtoH(srcArray, dstHost, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, stream, true)); +} + +hipError_t hipMemcpyDtoA(hipArray* dstArray, + size_t dstOffset, + hipDeviceptr_t srcDevice, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyDtoA, dstArray, dstOffset, srcDevice, ByteCount); + + HIP_RETURN(ihipMemcpyDtoA(srcDevice, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t hipMemcpyHtoAAsync(hipArray* dstArray, + size_t dstOffset, + const void* srcHost, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyHtoAAsync, dstArray, dstOffset, srcHost, ByteCount, stream); + + HIP_RETURN(ihipMemcpyHtoA(srcHost, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, stream, true)); +} + +hipError_t hipMipmappedArrayCreate(hipMipmappedArray_t* pHandle, + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, + unsigned int numMipmapLevels) { + HIP_INIT_API(hipMipmappedArrayCreate, pHandle, pMipmappedArrayDesc, numMipmapLevels); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMallocMipmappedArray(hipMipmappedArray_t *mipmappedArray, + const hipChannelFormatDesc* desc, + hipExtent extent, + unsigned int numLevels, + unsigned int flags) { + HIP_INIT_API(hipMallocMipmappedArray, mipmappedArray, desc, extent, numLevels, flags); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMipmappedArrayDestroy(hipMipmappedArray_t hMipmappedArray) { + HIP_INIT_API(hipMipmappedArrayDestroy, hMipmappedArray); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray) { + HIP_INIT_API(hipFreeMipmappedArray, mipmappedArray); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMipmappedArrayGetLevel(hipArray_t* pLevelArray, + hipMipmappedArray_t hMipMappedArray, + unsigned int level) { + HIP_INIT_API(hipMipmappedArrayGetLevel, pLevelArray, hMipMappedArray, level); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipGetMipmappedArrayLevel(hipArray_t *levelArray, + hipMipmappedArray_const_t mipmappedArray, + unsigned int level) { + HIP_INIT_API(hipGetMipmappedArrayLevel, levelArray, mipmappedArray, level); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMallocHost(void** ptr, + size_t size) { + HIP_INIT_API(hipMallocHost, ptr, size); + + if (ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMalloc(ptr, size, CL_MEM_SVM_FINE_GRAIN_BUFFER)); +} + +hipError_t hipFreeHost(void *ptr) { + HIP_INIT_API(hipFreeHost, ptr); + + HIP_RETURN(ihipFree(ptr)); +} diff --git a/projects/hip/vdi/hip_module.cpp b/projects/hip/vdi/hip_module.cpp new file mode 100755 index 0000000000..3d40d8c967 --- /dev/null +++ b/projects/hip/vdi/hip_module.cpp @@ -0,0 +1,665 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include "hip_event.hpp" +#include "hip_platform.hpp" + +hipError_t ihipModuleLoadData(hipModule_t *module, const void *image); + +const std::string& FunctionName(const hipFunction_t f) +{ + return hip::Function::asFunction(f)->function_->name(); +} + +static uint64_t ElfSize(const void *emi) +{ + const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi; + const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff); + + uint64_t max_offset = ehdr->e_shoff; + uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum; + + for (uint16_t i=0; i < ehdr->e_shnum; ++i){ + uint64_t cur_offset = static_cast(shdr[i].sh_offset); + if (max_offset < cur_offset) { + max_offset = cur_offset; + total_size = max_offset; + if(SHT_NOBITS != shdr[i].sh_type) { + total_size += static_cast(shdr[i].sh_size); + } + } + } + return total_size; +} + +hipError_t hipModuleLoad(hipModule_t* module, const char* fname) +{ + HIP_INIT_API(hipModuleLoad, module, fname); + + if (!fname) { + HIP_RETURN(hipErrorInvalidValue); + } + + std::ifstream file(fname, std::ios::binary); + + if (!file.is_open()) { + HIP_RETURN(hipErrorFileNotFound); + } + + std::vector tmp{std::istreambuf_iterator{file}, std::istreambuf_iterator{}}; + + HIP_RETURN(ihipModuleLoadData(module, tmp.data())); +} + +bool ihipModuleUnregisterGlobal(hipModule_t hmod) { + std::vector< std::pair >* modules = + PlatformState::instance().unregisterVar(hmod); + if (modules != nullptr) { + delete modules; + } + return true; +} + +hipError_t hipModuleUnload(hipModule_t hmod) +{ + HIP_INIT_API(hipModuleUnload, hmod); + + if (hmod == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + if(!PlatformState::instance().unregisterFunc(hmod)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + if(!ihipModuleUnregisterGlobal(hmod)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + program->release(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipModuleLoadData(hipModule_t *module, const void *image) +{ + HIP_INIT_API(hipModuleLoadData, module, image); + + HIP_RETURN(ihipModuleLoadData(module, image)); +} + +hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, + unsigned int numOptions, hipJitOption* options, + void** optionsValues) +{ + /* TODO: Pass options to Program */ + HIP_INIT_API(hipModuleLoadData, module, image); + + HIP_RETURN(ihipModuleLoadData(module, image)); +} + +extern hipError_t __hipExtractCodeObjectFromFatBinary(const void* data, + const std::vector& devices, + std::vector>& code_objs); + +inline bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) { + + std::vector undef_vars; + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + if (!dev_program->getUndefinedVarFromCodeObj(&undef_vars)) { + return false; + } + + for (auto it = undef_vars.begin(); it != undef_vars.end(); ++it) { + auto modules = new std::vector >(g_devices.size()); + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, true); + } + + texture* tex_hptr + = new texture(); + memset(tex_hptr, 0x00, sizeof(texture)); + + PlatformState::DeviceVar dvar{ reinterpret_cast(tex_hptr), it->c_str(), sizeof(*tex_hptr), modules, + std::vector{ g_devices.size()}, true }; + PlatformState::instance().registerVar(it->c_str(), dvar); + } + + return true; +} + +inline bool ihipModuleRegisterFunc(amd::Program* program, hipModule_t* module) { + + std::vector func_names; + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + // Get all the global func names from COMGR + if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) { + return false; + } + + return PlatformState::instance().registerModFuncs(func_names, module); +} + + +inline bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { + + size_t var_size = 0; + hipDeviceptr_t device_ptr = nullptr; + std::vector var_names; + + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { + return false; + } + + for (auto it = var_names.begin(); it != var_names.end(); ++it) { + auto modules = new std::vector >(g_devices.size()); + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, true); + } + + PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules, + std::vector{ g_devices.size()}, false }; + PlatformState::instance().registerVar(it->c_str(), dvar); + } + + return true; +} + +hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) +{ + std::vector> code_objs; + hipError_t code_obj_err = __hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentDevice()->devices()[0]->info().name_}, code_objs); + if (code_obj_err == hipSuccess) { + image = code_objs[0].first; + } else if(code_obj_err == hipErrorNoBinaryForGpu) { + return code_obj_err; + } + + amd::Program* program = new amd::Program(*hip::getCurrentDevice()->asContext()); + if (program == NULL) { + return hipErrorOutOfMemory; + } + + program->setVarInfoCallBack(&getSvarInfo); + + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentDevice()->devices()[0], image, ElfSize(image))) { + return hipErrorInvalidKernelFile; + } + + *module = reinterpret_cast(as_cl(program)); + + if (!ihipModuleRegisterGlobal(program, module)) { + return hipErrorSharedObjectSymbolNotFound; + } + + if (!ihipModuleRegisterUndefined(program, module)) { + return hipErrorSharedObjectSymbolNotFound; + } + + if(CL_SUCCESS != program->build(hip::getCurrentDevice()->devices(), nullptr, nullptr, nullptr)) { + return hipErrorSharedObjectInitFailed; + } + + if (!ihipModuleRegisterFunc(program, module)) { + return hipErrorSharedObjectSymbolNotFound; + } + + return hipSuccess; +} + +hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) +{ + HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name); + + if (!PlatformState::instance().findModFunc(hfunc, hmod, name)) { + HIP_RETURN(hipErrorNotFound); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name) +{ + HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name); + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), hmod, + dptr, bytes)) { + HIP_RETURN(hipErrorNotFound); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc) { + HIP_INIT_API(hipFuncGetAttribute, value, attrib, hfunc); + + if ((value == nullptr) || (hfunc == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::Function* function = hip::Function::asFunction(hfunc); + if (function == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + amd::Kernel* kernel = function->function_; + if (kernel == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + const device::Kernel::WorkGroupInfo* wrkGrpInfo + = kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo(); + if (wrkGrpInfo == nullptr) { + HIP_RETURN(hipErrorMissingConfiguration); + } + + switch(attrib) { + case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->localMemSize_ + - wrkGrpInfo->privateMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: + *value = static_cast(wrkGrpInfo->wavefrontPerSIMD_ + * wrkGrpInfo->wavefrontSize_); + break; + case HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: + *value = 0; + break; + case HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->localMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_NUM_REGS: + *value = static_cast(wrkGrpInfo->availableGPRs_); + break; + case HIP_FUNC_ATTRIBUTE_PTX_VERSION: + *value = 30; // Defaults to 3.0 as HCC + break; + case HIP_FUNC_ATTRIBUTE_BINARY_VERSION: + *value = static_cast(kernel->signature().version()); + break; + case HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA: + *value = 0; + break; + case HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->availableLDSSize_); + break; + case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: + *value = 0; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) +{ + HIP_INIT_API(hipFuncGetAttributes, attr, func); + + if (!PlatformState::instance().getFuncAttr(func, attr)) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t ihipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra, + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, + uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0, + uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0) { + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params); + + hip::Function* function = hip::Function::asFunction(f); + amd::Kernel* kernel = function->function_; + + amd::ScopedLock lock(function->lock_); + + hip::Event* eStart = reinterpret_cast(startEvent); + hip::Event* eStop = reinterpret_cast(stopEvent); + amd::HostQueue* queue = hip::getQueue(hStream); + const amd::Device& device = queue->vdev()->device(); + + // Make sure dispatch doesn't exceed max workgroup size limit + if (blockDimX * blockDimY * blockDimZ > device.info().maxWorkGroupSize_) { + return hipErrorInvalidConfiguration; + } + + if (params & amd::NDRangeKernelCommand::CooperativeGroups) { + if (!device.info().cooperativeGroups_) { + return hipErrorLaunchFailure; + } + int num_blocks = 0; + int num_grids = 0; + int block_size = blockDimX * blockDimY * blockDimZ; + hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &num_grids, device, f, block_size, sharedMemBytes, true); + if (((gridDimX * gridDimY * gridDimZ) / block_size) > unsigned(num_grids)) { + return hipErrorCooperativeLaunchTooLarge; + } + } + if (params & amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups) { + if (!device.info().cooperativeMultiDeviceGroups_) { + return hipErrorLaunchFailure; + } + } + if (!queue) { + return hipErrorOutOfMemory; + } + + size_t globalWorkOffset[3] = {0}; + size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ }; + size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ }; + amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); + amd::Command::EventWaitList waitList; + + address kernargs = nullptr; + + // 'extra' is a struct that contains the following info: { + // HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs, + // HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size, + // HIP_LAUNCH_PARAM_END } + if (extra != nullptr) { + if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || + extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || extra[4] != HIP_LAUNCH_PARAM_END) { + return hipErrorNotInitialized; + } + kernargs = reinterpret_cast
(extra[1]); + } + + const amd::KernelSignature& signature = kernel->signature(); + for (size_t i = 0; i < signature.numParameters(); ++i) { + const amd::KernelParameterDescriptor& desc = signature.at(i); + if (kernelParams == nullptr) { + assert(kernargs != nullptr); + kernel->parameters().set(i, desc.size_, kernargs + desc.offset_, + desc.type_ == T_POINTER/*svmBound*/); + } else { + assert(extra == nullptr); + kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER/*svmBound*/); + } + } + + amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand( + *queue, waitList, *kernel, ndrange, sharedMemBytes, + params, gridId, numGrids, prevGridSum, allGridSum, firstDevice); + if (!command) { + return hipErrorOutOfMemory; + } + + // Capture the kernel arguments + if (CL_SUCCESS != command->captureAndValidate()) { + delete command; + return hipErrorOutOfMemory; + } + + command->enqueue(); + + if(startEvent != nullptr) { + eStart->addMarker(queue, command); + command->retain(); + } + if(stopEvent != nullptr) { + eStop->addMarker(queue, command); + command->retain(); + } + command->release(); + + return hipSuccess; +} + +hipError_t hipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra) +{ + HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra); + + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr)); +} + +hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) +{ + HIP_INIT_API(NONE, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent, flags); + + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, + localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags)); +} + + + +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, + uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, + hipEvent_t stopEvent) +{ + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent); + + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); +} + +hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, + uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, + hipEvent_t stopEvent) +{ + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent); + + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); +} + +hipError_t hipLaunchCooperativeKernel(const void* f, + dim3 gridDim, dim3 blockDim, + void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream) +{ + HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, + sharedMemBytes, hStream); + + int deviceId = ihipGetDevice(); + hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); + if (func == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + HIP_RETURN(ihipModuleLaunchKernel(func, gridDim.x * blockDim.x, gridDim.y * blockDim.y, gridDim.z * blockDim.z, + blockDim.x, blockDim.y, blockDim.z, + sharedMemBytes, hStream, kernelParams, nullptr, nullptr, nullptr, 0, + amd::NDRangeKernelCommand::CooperativeGroups)); +} + +hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags, uint32_t extFlags) +{ + int numActiveGPUs = 0; + ihipDeviceGetCount(&numActiveGPUs); + + if ((numDevices > numActiveGPUs) || (launchParamsList == nullptr)) { + return hipErrorInvalidValue; + } + + hipError_t result = hipErrorUnknown; + uint64_t allGridSize = 0; + std::vector mgpu_list(numDevices); + + for (int i = 0; i < numDevices; ++i) { + const hipLaunchParams& launch = launchParamsList[i]; + allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z; + + // Make sure block dimensions are valid + if (0 == launch.blockDim.x * launch.blockDim.y * launch.blockDim.z) { + return hipErrorInvalidConfiguration; + } + if (launch.stream != nullptr) { + // Validate devices to make sure it dosn't have duplicates + amd::HostQueue* queue = reinterpret_cast(launch.stream)->asHostQueue(); + auto device = &queue->vdev()->device(); + for (int j = 0; j < numDevices; ++j) { + if (mgpu_list[j] == device) { + return hipErrorInvalidDevice; + } + } + mgpu_list[i] = device; + } else { + return hipErrorInvalidResourceHandle; + } + } + uint64_t prevGridSize = 0; + uint32_t firstDevice = 0; + + // Sync the execution streams on all devices + if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) { + for (int i = 0; i < numDevices; ++i) { + amd::HostQueue* queue = + reinterpret_cast(launchParamsList[i].stream)->asHostQueue(); + queue->finish(); + } + } + + for (int i = 0; i < numDevices; ++i) { + const hipLaunchParams& launch = launchParamsList[i]; + amd::HostQueue* queue = reinterpret_cast(launch.stream)->asHostQueue(); + hipFunction_t func = nullptr; + // The order of devices in the launch may not match the order in the global array + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + // Find the matching device and request the kernel function + if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) { + func = PlatformState::instance().getFunc(launch.func, dev); + // Save VDI index of the first device in the launch + if (i == 0) { + firstDevice = queue->vdev()->device().index(); + } + break; + } + } + if (func == nullptr) { + result = hipErrorInvalidDeviceFunction; + HIP_RETURN(result); + } + + result = ihipModuleLaunchKernel(func, + launch.gridDim.x * launch.blockDim.x, + launch.gridDim.y * launch.blockDim.y, + launch.gridDim.z * launch.blockDim.z, + launch.blockDim.x, launch.blockDim.y, launch.blockDim.z, + launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr, + flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice); + if (result != hipSuccess) { + break; + } + prevGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z; + } + + // Sync the execution streams on all devices + if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) { + for (int i = 0; i < numDevices; ++i) { + amd::HostQueue* queue = + reinterpret_cast(launchParamsList[i].stream)->asHostQueue(); + queue->finish(); + } + } + + return result; +} + +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) +{ + HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags); + + return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, + (amd::NDRangeKernelCommand::CooperativeGroups | + amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)); +} + +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags); + + return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0); +} + +hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) { + HIP_INIT_API(hipModuleGetTexRef, texRef, hmod, name); + + /* input args check */ + if ((texRef == nullptr) || (name == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getTexRef(name, hmod, texRef)) { + HIP_RETURN(hipErrorNotFound); + } + + // Texture references created by HIP driver API + // have the default read mode set to normalized float. + (*texRef)->readMode = hipReadModeNormalizedFloat; + + HIP_RETURN(hipSuccess); +} + diff --git a/projects/hip/vdi/hip_peer.cpp b/projects/hip/vdi/hip_peer.cpp new file mode 100644 index 0000000000..225361d525 --- /dev/null +++ b/projects/hip/vdi/hip_peer.cpp @@ -0,0 +1,127 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) { + HIP_INIT_API(NONE, canAccessPeer, thisCtx, peerCtx); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx, + size_t sizeBytes) { + HIP_INIT_API(NONE, dst, dstCtx, src, srcCtx, sizeBytes); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(NONE, dst, dstDevice, src, srcDevice, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t canAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId){ + amd::Device* device = nullptr; + amd::Device* peer_device = nullptr; + if (canAccessPeer == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + /* Peer cannot be self */ + if (deviceId == peerDeviceId) { + *canAccessPeer = 0; + HIP_RETURN(hipSuccess); + } + /* Cannot exceed the max number of devices */ + if (static_cast(deviceId) >= g_devices.size() + || static_cast(peerDeviceId) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + device = g_devices[deviceId]->devices()[0]; + peer_device = g_devices[peerDeviceId]->devices()[0]; + *canAccessPeer = static_cast(std::find(device->p2pDevices_.begin(), + device->p2pDevices_.end(), as_cl(peer_device)) + != device->p2pDevices_.end()); + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceCanAccessPeer(int* canAccess, int deviceId, int peerDeviceId) { + HIP_INIT_API(hipDeviceCanAccessPeer, canAccess, deviceId, peerDeviceId); + HIP_RETURN(canAccessPeer(canAccess, deviceId, peerDeviceId)); +} + +hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { + HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId); + int deviceId = hip::getCurrentDevice()->deviceId(); + int canAccess = 0; + if ((hipSuccess != canAccessPeer(&canAccess, deviceId, peerDeviceId)) || (canAccess == 0)) { + HIP_RETURN(hipErrorInvalidDevice); + } + HIP_RETURN(hip::getCurrentDevice()->DisablePeerAccess(peerDeviceId)); +} + +hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) { + HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags); + int deviceId = hip::getCurrentDevice()->deviceId(); + int canAccess = 0; + if (flags != 0) { + HIP_RETURN(hipErrorInvalidValue); + } + if ((hipSuccess != canAccessPeer(&canAccess, deviceId, peerDeviceId)) || (canAccess == 0)) { + HIP_RETURN(hipErrorInvalidDevice); + } + HIP_RETURN(hip::getCurrentDevice()->EnablePeerAccess(peerDeviceId)); +} + +hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes) { + HIP_INIT_API(hipMemcpyPeer, dst, dstDevice, src, srcDevice, sizeBytes); + + HIP_RETURN(hipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice)); +} + +hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(hipMemcpyPeerAsync, dst, dstDevice, src, srcDevice, sizeBytes, stream); + + HIP_RETURN(hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream)); +} + +hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { + HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { + HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx); + + HIP_RETURN(hipSuccess); +} diff --git a/projects/hip/vdi/hip_platform.cpp b/projects/hip/vdi/hip_platform.cpp new file mode 100755 index 0000000000..81ba2a2125 --- /dev/null +++ b/projects/hip/vdi/hip_platform.cpp @@ -0,0 +1,1229 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include "platform/runtime.hpp" + +#include +#include "elfio.hpp" + +constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" + +thread_local std::stack execStack_; +PlatformState* PlatformState::platform_ = new PlatformState(); + +struct __CudaFatBinaryWrapper { + unsigned int magic; + unsigned int version; + void* binary; + void* dummy1; +}; + +#define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" +#define HIP_AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa" +#define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn-amd-amdhsa-" + +struct __ClangOffloadBundleDesc { + uint64_t offset; + uint64_t size; + uint64_t tripleSize; + const char triple[1]; +}; + +struct __ClangOffloadBundleHeader { + const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1]; + uint64_t numBundles; + __ClangOffloadBundleDesc desc[1]; +}; + +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, + hipModule_t hmod, const char* name); + +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, + hipDeviceptr_t* dptr, size_t* bytes); + +static bool isCompatibleCodeObject(const std::string& codeobj_target_id, + const char* device_name) { + // Workaround for device name mismatch. + // Device name may contain feature strings delimited by '+', e.g. + // gfx900+xnack. Currently HIP-Clang does not include feature strings + // in code object target id in fat binary. Therefore drop the feature + // strings from device name before comparing it with code object target id. + std::string short_name(device_name); + auto feature_loc = short_name.find('+'); + if (feature_loc != std::string::npos) { + short_name.erase(feature_loc); + } + return codeobj_target_id == short_name; +} + +// Extracts code objects from fat binary in data for device names given in devices. +// Returns true if code objects are extracted successfully. +hipError_t __hipExtractCodeObjectFromFatBinary(const void* data, + const std::vector& devices, + std::vector>& code_objs) +{ + std::string magic((const char*)data, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { + return hipErrorInvalidKernelFile; + } + + code_objs.resize(devices.size()); + const auto obheader = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + unsigned num_code_objs = 0; + for (uint64_t i = 0; i < obheader->numBundles; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { + + std::size_t offset = 0; + if (!std::strncmp(desc->triple, HIP_AMDGCN_AMDHSA_TRIPLE, + sizeof(HIP_AMDGCN_AMDHSA_TRIPLE) - 1)) { + offset = sizeof(HIP_AMDGCN_AMDHSA_TRIPLE); //For code objects created by CLang + } else if (!std::strncmp(desc->triple, HCC_AMDGCN_AMDHSA_TRIPLE, + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1)) { + offset = sizeof(HCC_AMDGCN_AMDHSA_TRIPLE); //For code objects created by Hcc + } else { + continue; + } + std::string target(desc->triple + offset, desc->tripleSize - offset); + + const void *image = reinterpret_cast( + reinterpret_cast(obheader) + desc->offset); + size_t size = desc->size; + + for (size_t dev = 0; dev < devices.size(); ++dev) { + const char* name = devices[dev]; + + if (!isCompatibleCodeObject(target, name)) { + continue; + } + code_objs[dev] = std::make_pair(image, size); + num_code_objs++; + } + } + if (num_code_objs == devices.size()) + return hipSuccess; + else + return hipErrorNoBinaryForGpu; +} + +extern "C" std::vector>* __hipRegisterFatBinary(const void* data) +{ + const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); + if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { + return nullptr; + } + + return PlatformState::instance().addFatBinary(fbwrapper->binary); +} + +void PlatformState::digestFatBinary(const void* data, std::vector>& programs) +{ + if (programs.size() > 0) { + return; + } + + std::vector> code_objs; + std::vector devices; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + devices.push_back(g_devices[dev]->devices()[0]->info().name_); + } + + if (hipSuccess != __hipExtractCodeObjectFromFatBinary((char*)data, devices, code_objs)) { + return; + } + + programs.resize(g_devices.size()); + + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]->asContext(); + amd::Program* program = new amd::Program(*ctx); + if (program == nullptr) { + return; + } + if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { + programs.at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); + } + } +} + +void PlatformState::init() +{ + amd::ScopedLock lock(lock_); + + if(initialized_ || g_devices.empty()) { + return; + } + initialized_ = true; + + for (auto& it : modules_) { + digestFatBinary(it.first, it.second); + } + for (auto& it : functions_) { + it.second.functions.resize(g_devices.size()); + } + for (auto& it : vars_) { + it.second.rvars.resize(g_devices.size()); + } +} + +bool PlatformState::unregisterFunc(hipModule_t hmod) { + amd::ScopedLock lock(lock_); + auto mod_it = module_map_.find(hmod); + if (mod_it != module_map_.cend()) { + PlatformState::Module* mod_ptr = mod_it->second; + if(mod_ptr != nullptr) { + for (auto func_it = mod_ptr->functions_.begin(); func_it != mod_ptr->functions_.end(); ++func_it) { + PlatformState::DeviceFunction &devFunc = func_it->second; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + if (devFunc.functions[dev] != 0) { + hip::Function* f = reinterpret_cast(devFunc.functions[dev]); + delete f; + } + } + delete devFunc.modules; + } + delete mod_ptr; + } + } + return true; +} + +std::vector< std::pair >* PlatformState::unregisterVar(hipModule_t hmod) { + amd::ScopedLock lock(lock_); + std::vector< std::pair >* rmodules = nullptr; + auto it = vars_.begin(); + while (it != vars_.end()) { + DeviceVar& dvar = it->second; + if ((*dvar.modules)[0].first == hmod) { + rmodules = dvar.modules; + if (dvar.dyn_undef) { + texture* tex_hptr + = reinterpret_cast *>(dvar.shadowVptr); + delete tex_hptr; + } + vars_.erase(it++); + } else { + ++it; + } + } + return rmodules; +} + +PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int deviceId, hipModule_t hmod) { + DeviceVar* dvar = nullptr; + if (hmod != nullptr) { + // If module is provided, then get the var only from that module + auto var_range = vars_.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + if ((*it->second.modules)[deviceId].first == hmod) { + dvar = &(it->second); + break; + } + } + } else { + // If var count is < 2, return the var + if (vars_.count(hostVar) < 2) { + auto it = vars_.find(hostVar); + dvar = ((it == vars_.end()) ? nullptr : &(it->second)); + } else { + // If var count is > 2, return the original var, + // if original var count != 1, return vars_.end()/Invalid + size_t orig_global_count = 0; + auto var_range = vars_.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + // when dyn_undef is set, it is a shadow var + if (it->second.dyn_undef == false) { + ++orig_global_count; + dvar = &(it->second); + } + } + dvar = ((orig_global_count == 1) ? dvar : nullptr); + } + } + + return dvar; +} + +bool PlatformState::findSymbol(const void *hostVar, std::string &symbolName) { + auto it = symbols_.find(hostVar); + if (it != symbols_.end()) { + symbolName = it->second; + return true; + } + return false; +} + +void PlatformState::registerVarSym(const void *hostVar, const char *symbolName) { + amd::ScopedLock lock(lock_); + symbols_.insert(std::make_pair(hostVar, std::string(symbolName))); +} + +void PlatformState::registerVar(const char* hostvar, + const DeviceVar& rvar) { + amd::ScopedLock lock(lock_); + vars_.insert(std::make_pair(std::string(hostvar), rvar)); +} + +void PlatformState::registerFunction(const void* hostFunction, + const DeviceFunction& func) { + amd::ScopedLock lock(lock_); + functions_.insert(std::make_pair(hostFunction, func)); +} + +bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) { + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + const auto it = dev_program->kernels().find(std::string(func_name)); + if (it == dev_program->kernels().cend()) { + return false; + } + + const device::Kernel::WorkGroupInfo* wginfo = it->second->workGroupInfo(); + func_attr->localSizeBytes = wginfo->localMemSize_; + func_attr->sharedSizeBytes = wginfo->size_; + func_attr->maxThreadsPerBlock = wginfo->wavefrontSize_; + func_attr->numRegs = wginfo->usedVGPRs_; + + return true; +} + +bool PlatformState::getShadowVarInfo(std::string var_name, hipModule_t hmod, + void** var_addr, size_t* var_size) { + DeviceVar* dvar = findVar(var_name, ihipGetDevice(), hmod); + if (dvar != nullptr) { + *var_addr = dvar->shadowVptr; + *var_size = dvar->size; + return true; + } else { + return false; + } +} + +bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, + size_t* var_size) { + return PlatformState::instance().getShadowVarInfo(var_name, reinterpret_cast(program), + var_addr, var_size); +} + +bool PlatformState::registerModFuncs(std::vector& func_names, hipModule_t* module) { + amd::ScopedLock lock(lock_); + PlatformState::Module* mod_ptr = new PlatformState::Module(*module); + + for (auto it = func_names.begin(); it != func_names.end(); ++it) { + auto modules = new std::vector >(g_devices.size()); + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, true); + } + + PlatformState::DeviceFunction dfunc{*it, modules, + std::vector(g_devices.size(), 0)}; + mod_ptr->functions_.insert(std::make_pair(*it, dfunc)); + } + + module_map_.insert(std::make_pair(*module, mod_ptr)); + return true; +} + +bool PlatformState::findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name) { + amd::ScopedLock lock(lock_); + + auto mod_it = module_map_.find(hmod); + if (mod_it != module_map_.cend()) { + auto func_it = mod_it->second->functions_.find(name); + if (func_it != mod_it->second->functions_.cend()) { + PlatformState::DeviceFunction& devFunc = func_it->second; + if (devFunc.functions[ihipGetDevice()] == 0) { + if(!createFunc(&devFunc.functions[ihipGetDevice()], hmod, name)) { + return false; + } + } + *hfunc = devFunc.functions[ihipGetDevice()]; + return true; + } + } + return false; +} + +bool PlatformState::createFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + const amd::Symbol* symbol = program->findSymbol(name); + if (!symbol) { + return false; + } + + amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name); + if (!kernel) { + return false; + } + + hip::Function* f = new hip::Function(kernel); + *hfunc = f->asHipFunction(); + + return true; +} + + +hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { + amd::ScopedLock lock(lock_); + const auto it = functions_.find(hostFunction); + if (it != functions_.cend()) { + PlatformState::DeviceFunction& devFunc = it->second; + if (devFunc.functions[deviceId] == 0) { + hipModule_t module = (*devFunc.modules)[deviceId].first; + if (!(*devFunc.modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast(module)); + program->setVarInfoCallBack(&getSvarInfo); + if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + return nullptr; + } + (*devFunc.modules)[deviceId].second = true; + } + hipFunction_t function = nullptr; + if (createFunc(&function, module, devFunc.deviceName.c_str()) && + function != nullptr) { + devFunc.functions[deviceId] = function; + } + else { + // tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" + // " device %d\n", deviceName, deviceId); + } + } + return devFunc.functions[deviceId]; + } + return nullptr; +} + +bool PlatformState::getFuncAttr(const void* hostFunction, + hipFuncAttributes* func_attr) { + if (func_attr == nullptr) { + return false; + } + + const auto it = functions_.find(hostFunction); + if (it == functions_.cend()) { + return false; + } + + PlatformState::DeviceFunction& devFunc = it->second; + int deviceId = ihipGetDevice(); + + /* If module has not been initialized yet, build the kernel now*/ + if (!(*devFunc.modules)[deviceId].second) { + if (nullptr == PlatformState::instance().getFunc(hostFunction, deviceId)) { + return false; + } + } + + amd::Program* program = as_amd(reinterpret_cast((*devFunc.modules)[deviceId].first)); + if (!ihipGetFuncAttributes(devFunc.deviceName.c_str(), program, func_attr)) { + return false; + } + return true; +} + +bool PlatformState::getTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef) { + amd::ScopedLock lock(lock_); + DeviceVar* dvar = findVar(std::string(hostVar), ihipGetDevice(), hmod); + if (dvar == nullptr) { + return false; + } + + if (!dvar->dyn_undef) { + return false; + } + + *texRef = new (dvar->shadowVptr) texture{}; + + return true; +} + +bool PlatformState::getGlobalVar(const char* hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + amd::ScopedLock lock(lock_); + DeviceVar* dvar = findVar(std::string(hostVar), deviceId, hmod); + if (dvar != nullptr) { + if (dvar->rvars[deviceId].getdeviceptr() == nullptr) { + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + amd::Memory* amd_mem_obj = nullptr; + + if (!(*dvar->modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast((*dvar->modules)[deviceId].first)); + program->setVarInfoCallBack(&getSvarInfo); + if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + return false; + } + (*dvar->modules)[deviceId].second = true; + } + if((hipSuccess == ihipCreateGlobalVarObj(dvar->hostVar.c_str(), (*dvar->modules)[deviceId].first, + &amd_mem_obj, &device_ptr, &sym_size)) + && (device_ptr != nullptr)) { + dvar->rvars[deviceId].size_ = sym_size; + dvar->rvars[deviceId].devicePtr_ = device_ptr; + dvar->rvars[deviceId].amd_mem_obj_ = amd_mem_obj; + amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj); + } else { + LogError("[HIP] __hipRegisterVar cannot find kernel for device \n"); + } + } + *size_ptr = dvar->rvars[deviceId].getvarsize(); + *dev_ptr = dvar->rvars[deviceId].getdeviceptr(); + return true; + } else { + return false; + } +} + +void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { + auto& arguments = execStack_.top().arguments_; + + if (arguments.size() < offset + size) { + arguments.resize(offset + size); + } + + ::memcpy(&arguments[offset], arg, size); +} + +void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { + execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); +} + +void PlatformState::popExec(ihipExec_t& exec) { + exec = std::move(execStack_.top()); + execStack_.pop(); +} + +extern "C" void __hipRegisterFunction( + std::vector >* modules, + const void* hostFunction, + char* deviceFunction, + const char* deviceName, + unsigned int threadLimit, + uint3* tid, + uint3* bid, + dim3* blockDim, + dim3* gridDim, + int* wSize) +{ + PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{g_devices.size()}}; + PlatformState::instance().registerFunction(hostFunction, func); +// for (size_t i = 0; i < g_devices.size(); ++i) { +// PlatformState::instance().getFunc(hostFunction, i); +// } +} + +// Registers a device-side global variable. +// For each global variable in device code, there is a corresponding shadow +// global variable in host code. The shadow host variable is used to keep +// track of the value of the device side global variable between kernel +// executions. +extern "C" void __hipRegisterVar( + std::vector >* modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int ext, // Whether this variable is external + size_t size, // Size of the variable + int constant, // Whether this variable is constant + int global) // Unknown, always 0 +{ + PlatformState::DeviceVar dvar{var, std::string{ hostVar }, size, modules, + std::vector{g_devices.size()}, false }; + + PlatformState::instance().registerVar(hostVar, dvar); + PlatformState::instance().registerVarSym(var, deviceVar); +} + +extern "C" void __hipUnregisterFatBinary(std::vector< std::pair >* modules) +{ + HIP_INIT(); + + std::for_each(modules->begin(), modules->end(), [](std::pair module){ + if (module.first != nullptr) { + as_amd(reinterpret_cast(module.first))->release(); + } + }); + if (modules->size() > 0) { + PlatformState::instance().unregisterVar((*modules)[0].first); + } + PlatformState::instance().removeFatBinary(modules); +} + +extern "C" hipError_t hipConfigureCall( + dim3 gridDim, + dim3 blockDim, + size_t sharedMem, + hipStream_t stream) +{ + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + + PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t __hipPushCallConfiguration( + dim3 gridDim, + dim3 blockDim, + size_t sharedMem, + hipStream_t stream) +{ + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + + PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim, + dim3 *blockDim, + size_t *sharedMem, + hipStream_t *stream) { + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + + ihipExec_t exec; + PlatformState::instance().popExec(exec); + *gridDim = exec.gridDim_; + *blockDim = exec.blockDim_; + *sharedMem = exec.sharedMem_; + *stream = exec.hStream_; + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t hipSetupArgument( + const void *arg, + size_t size, + size_t offset) +{ + HIP_INIT_API(NONE, arg, size, offset); + + PlatformState::instance().setupArgument(arg, size, offset); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) +{ + HIP_INIT_API(NONE, hostFunction); + + ihipExec_t exec; + PlatformState::instance().popExec(exec); + + hip::Stream* stream = reinterpret_cast(exec.hStream_); + int deviceId = (stream != nullptr)? stream->device->deviceId() : ihipGetDevice(); + if (deviceId == -1) { + HIP_RETURN(hipErrorNoDevice); + } + hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); + if (func == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + size_t size = exec.arguments_.size(); + void *extra[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0], + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END + }; + + HIP_RETURN(hipModuleLaunchKernel(func, + exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, + exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, + exec.sharedMem_, exec.hStream_, nullptr, extra)); +} + +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { + HIP_INIT_API(hipGetSymbolAddress, devPtr, symbol); + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + size_t size = 0; + if(!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + devPtr, &size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) { + HIP_INIT_API(hipGetSymbolSize, sizePtr, symbol); + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + hipDeviceptr_t devPtr = nullptr; + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + &devPtr, sizePtr)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + HIP_RETURN(hipSuccess); +} + +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, + hipDeviceptr_t* dptr, size_t* bytes) +{ + HIP_INIT(); + + amd::Program* program = nullptr; + device::Program* dev_program = nullptr; + + /* Get Device Program pointer*/ + program = as_amd(reinterpret_cast(hmod)); + dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + if (dev_program == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + /* Find the global Symbols */ + if (!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + HIP_RETURN(hipSuccess); +} + + +namespace hip_impl { +hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, int* numGrids, + const amd::Device& device, hipFunction_t func, int blockSize, + size_t dynamicSMemSize, bool bCalcPotentialBlkSz) +{ + hip::Function* function = hip::Function::asFunction(func); + const amd::Kernel& kernel = *function->function_; + + const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel.getDeviceKernel(device)->workGroupInfo(); + if (blockSize == 0) { + if (bCalcPotentialBlkSz == false){ + return hipErrorInvalidValue; + } + else { + blockSize = device.info().maxWorkGroupSize_; // maxwavefrontperblock + } + } + + // Make sure the requested block size is smaller than max supported + if (blockSize > int(device.info().maxWorkGroupSize_)) { + numBlocks = 0; + numGrids = 0; + return hipSuccess; + } + + // Find threads accupancy per CU => simd_per_cu * GPR usage + constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD + size_t VgprWaves = MaxWavesPerSimd; + if (wrkGrpInfo->usedVGPRs_ > 0) { + VgprWaves = wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4); + } + + size_t GprWaves = VgprWaves; + if (wrkGrpInfo->usedSGPRs_ > 0) { + const size_t maxSGPRs = (device.info().gfxipVersion_ < 800) ? 512 : 800; + size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16); + GprWaves = std::min(VgprWaves, SgprWaves); + } + + size_t alu_accupancy = device.info().simdPerCU_ * std::min(MaxWavesPerSimd, GprWaves); + alu_accupancy *= wrkGrpInfo->wavefrontSize_; + // Calculate blocks occupancy per CU + *numBlocks = alu_accupancy / amd::alignUp(blockSize, wrkGrpInfo->wavefrontSize_); + + size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize; + if (total_used_lds != 0) { + // Calculate LDS occupancy per CU. lds_per_cu / (static_lsd + dynamic_lds) + int lds_occupancy = static_cast(device.info().localMemSize_ / total_used_lds); + *numBlocks = std::min(*numBlocks, lds_occupancy); + } + + if (bCalcPotentialBlkSz) { + *numGrids = *numBlocks * device.info().numRTCUs_; + } + + return hipSuccess; +} +} + +extern "C" { +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + const void* f, size_t dynSharedMemPerBlk, + int blockSizeLimit) +{ + HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit); + if ((gridSize == nullptr) || (blockSize == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); + } + hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); + if (func == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int num_grids = 0; + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &num_grids, device, func, 0, dynSharedMemPerBlk,true); + if (ret == hipSuccess) { + *blockSize = num_blocks; + *gridSize = num_grids; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit) +{ + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit); + if ((gridSize == nullptr) || (blockSize == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int num_grids = 0; + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &num_grids, device, f, 0, dynSharedMemPerBlk,true); + if (ret == hipSuccess) { + *blockSize = num_blocks; + *gridSize = num_grids; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags) +{ + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk, blockSizeLimit, flags); + if ((gridSize == nullptr) || (blockSize == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int num_grids = 0; + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &num_grids, device, f, 0, dynSharedMemPerBlk,true); + if (ret == hipSuccess) { + *blockSize = num_blocks; + *gridSize = num_grids; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) +{ + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynSharedMemPerBlk); + if (numBlocks == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, nullptr, device, f, blockSize, dynSharedMemPerBlk, false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + hipFunction_t f, int blockSize, + size_t dynSharedMemPerBlk, unsigned int flags) +{ + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynSharedMemPerBlk, flags); + if (numBlocks == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, nullptr, device, f, blockSize, dynSharedMemPerBlk, false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + const void* f, int blockSize, size_t dynamicSMemSize) +{ + HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynamicSMemSize); + if (numBlocks == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + + hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); + if (func == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, nullptr, device, func, blockSize, dynamicSMemSize, false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + const void* f, + int blockSize, size_t dynamicSMemSize, unsigned int flags) +{ + HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize, flags); + if (numBlocks == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + + hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); + if (func == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, nullptr, device, func, blockSize, dynamicSMemSize, false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} +} + +#if defined(ATI_OS_LINUX) + +namespace hip_impl { + +struct dl_phdr_info { + ELFIO::Elf64_Addr dlpi_addr; + const char *dlpi_name; + const ELFIO::Elf64_Phdr *dlpi_phdr; + ELFIO::Elf64_Half dlpi_phnum; +}; + +extern "C" int dl_iterate_phdr( + int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data +); + +struct Symbol { + std::string name; + ELFIO::Elf64_Addr value = 0; + ELFIO::Elf_Xword size = 0; + ELFIO::Elf_Half sect_idx = 0; + uint8_t bind = 0; + uint8_t type = 0; + uint8_t other = 0; +}; + +inline Symbol read_symbol(const ELFIO::symbol_section_accessor& section, unsigned int idx) { + assert(idx < section.get_symbols_num()); + + Symbol r; + section.get_symbol(idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other); + + return r; +} + +template +inline ELFIO::section* find_section_if(ELFIO::elfio& reader, P p) { + const auto it = find_if(reader.sections.begin(), reader.sections.end(), std::move(p)); + + return it != reader.sections.end() ? *it : nullptr; +} + +std::vector> function_names_for(const ELFIO::elfio& reader, + ELFIO::section* symtab) { + std::vector> r; + ELFIO::symbol_section_accessor symbols{reader, symtab}; + + for (auto i = 0u; i != symbols.get_symbols_num(); ++i) { + auto tmp = read_symbol(symbols, i); + + if (tmp.type == STT_FUNC && tmp.sect_idx != SHN_UNDEF && !tmp.name.empty()) { + r.emplace_back(tmp.value, tmp.name); + } + } + + return r; +} + +const std::vector>& function_names_for_process() { + static constexpr const char self[] = "/proc/self/exe"; + + static std::vector> r; + static std::once_flag f; + + std::call_once(f, []() { + ELFIO::elfio reader; + + if (reader.load(self)) { + const auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; }); + + if (it) r = function_names_for(reader, it); + } + }); + + return r; +} + + +const std::unordered_map& function_names() +{ + static std::unordered_map r{ + function_names_for_process().cbegin(), + function_names_for_process().cend()}; + static std::once_flag f; + + std::call_once(f, []() { + dl_iterate_phdr([](dl_phdr_info* info, size_t, void*) { + ELFIO::elfio reader; + + if (reader.load(info->dlpi_name)) { + const auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; }); + + if (it) { + auto n = function_names_for(reader, it); + + for (auto&& f : n) f.first += info->dlpi_addr; + + r.insert(make_move_iterator(n.begin()), make_move_iterator(n.end())); + } + } + return 0; + }, + nullptr); + }); + + return r; +} + +std::vector bundles_for_process() { + static constexpr const char self[] = "/proc/self/exe"; + static constexpr const char kernel_section[] = ".kernel"; + std::vector r; + + ELFIO::elfio reader; + + if (reader.load(self)) { + auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_name() == kernel_section; }); + + if (it) r.insert(r.end(), it->get_data(), it->get_data() + it->get_size()); + } + + return r; +} + +const std::vector& modules() { + static std::vector r; + static std::once_flag f; + + std::call_once(f, []() { + static std::vector> bundles{bundles_for_process()}; + + dl_iterate_phdr( + [](dl_phdr_info* info, std::size_t, void*) { + ELFIO::elfio tmp; + if (tmp.load(info->dlpi_name)) { + const auto it = find_section_if( + tmp, [](const ELFIO::section* x) { return x->get_name() == ".kernel"; }); + + if (it) bundles.emplace_back(it->get_data(), it->get_data() + it->get_size()); + } + return 0; + }, + nullptr); + + for (auto&& bundle : bundles) { + if (bundle.empty()) { + continue; + } + std::string magic(&bundle[0], sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) + continue; + + const auto obheader = reinterpret_cast(&bundle[0]); + const auto* desc = &obheader->desc[0]; + for (uint64_t i = 0; i < obheader->numBundles; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { + + std::string triple(desc->triple, sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1); + if (triple.compare(HCC_AMDGCN_AMDHSA_TRIPLE)) + continue; + + std::string target(desc->triple + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE), + desc->tripleSize - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE)); + + if (isCompatibleCodeObject(target, hip::getCurrentDevice()->devices()[0]->info().name_)) { + hipModule_t module; + if (hipSuccess == hipModuleLoadData(&module, reinterpret_cast( + reinterpret_cast(obheader) + desc->offset))) + r.push_back(module); + break; + } + } + } + }); + + return r; +} + +const std::unordered_map& functions() +{ + static std::unordered_map r; + static std::once_flag f; + + std::call_once(f, []() { + for (auto&& function : function_names()) { + for (auto&& module : modules()) { + hipFunction_t f; + if (hipSuccess == hipModuleGetFunction(&f, module, function.second.c_str())) { + r[function.first] = f; + } + } + } + }); + + return r; +} + + +void hipLaunchKernelGGLImpl( + uintptr_t function_address, + const dim3& numBlocks, + const dim3& dimBlocks, + uint32_t sharedMemBytes, + hipStream_t stream, + void** kernarg) +{ + HIP_INIT(); + + const auto it = functions().find(function_address); + if (it == functions().cend()) + assert(0); + + hipModuleLaunchKernel(it->second, + numBlocks.x, numBlocks.y, numBlocks.z, + dimBlocks.x, dimBlocks.y, dimBlocks.z, + sharedMemBytes, stream, nullptr, kernarg); +} + +void hipLaunchCooperativeKernelGGLImpl( + uintptr_t function_address, + const dim3& numBlocks, + const dim3& dimBlocks, + uint32_t sharedMemBytes, + hipStream_t stream, + void** kernarg) +{ + HIP_INIT(); + + hipLaunchCooperativeKernel(reinterpret_cast(function_address), + numBlocks, dimBlocks, kernarg, sharedMemBytes, stream); +} + +} + +#endif // defined(ATI_OS_LINUX) + +extern "C" hipError_t hipLaunchKernel(const void *hostFunction, + dim3 gridDim, + dim3 blockDim, + void** args, + size_t sharedMemBytes, + hipStream_t stream) +{ + HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, + stream); + + hip::Stream* s = reinterpret_cast(stream); + int deviceId = (s != nullptr)? s->device->deviceId() : ihipGetDevice(); + if (deviceId == -1) { + HIP_RETURN(hipErrorNoDevice); + } + hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); + if (func == nullptr) { +#ifdef ATI_OS_LINUX + const auto it = hip_impl::functions().find(reinterpret_cast(hostFunction)); + if (it == hip_impl::functions().cend()) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + func = it->second; +#else + HIP_RETURN(hipErrorInvalidDeviceFunction); +#endif + } + + HIP_RETURN(hipModuleLaunchKernel(func, gridDim.x, gridDim.y, gridDim.z, + blockDim.x, blockDim.y, blockDim.z, + sharedMemBytes, stream, args, nullptr)); +} + +// conversion routines between float and half precision +static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; } +static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; } +static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); } + +// half float, the f16 is in the low 16 bits of the input argument +static inline float __convert_half_to_float(std::uint32_t a) noexcept { + std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; + std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U; + u = (a & 0x7fff) != 0 ? v : u; + return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/; +} + +// float half with nearest even rounding +// The lower 16 bits of the result is the bit pattern for the f16 +static inline std::uint32_t __convert_float_to_half(float a) noexcept { + std::uint32_t u = f32_as_u32(a); + int e = static_cast((u >> 23) & 0xff) - 127 + 15; + std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0); + std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0); + std::uint32_t n = ((std::uint32_t)e << 12) | m; + std::uint32_t s = (u >> 16) & 0x8000; + int b = clamp_int(1-e, 0, 13); + std::uint32_t d = (0x1000 | m) >> b; + d |= (d << b) != (0x1000 | m); + std::uint32_t v = e < 1 ? d : n; + v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5)); + v = e > 30 ? 0x7c00 : v; + v = e == 143 ? i : v; + return s | v; +} + +extern "C" float __gnu_h2f_ieee(unsigned short h){ + return __convert_half_to_float((std::uint32_t) h); +} + +extern "C" unsigned short __gnu_f2h_ieee(float f){ + return (unsigned short)__convert_float_to_half(f); +} diff --git a/projects/hip/vdi/hip_platform.hpp b/projects/hip/vdi/hip_platform.hpp new file mode 100644 index 0000000000..8e5eaa191f --- /dev/null +++ b/projects/hip/vdi/hip_platform.hpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ +#pragma once + +#include "device/device.hpp" + +namespace hip_impl { +hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, int* numGrids, + const amd::Device& device, hipFunction_t func, int blockSize, + size_t dynamicSMemSize, bool bCalcPotentialBlkSz); +} \ No newline at end of file diff --git a/projects/hip/vdi/hip_prof_api.h b/projects/hip/vdi/hip_prof_api.h new file mode 100644 index 0000000000..ff81fb7cf5 --- /dev/null +++ b/projects/hip/vdi/hip_prof_api.h @@ -0,0 +1,250 @@ +/* Copyright (c) 2019-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIP_SRC_HIP_PROF_API_H +#define HIP_SRC_HIP_PROF_API_H + +#include +#include +#include + +#if USE_PROF_API +#include "hip/hcc_detail/hip_prof_str.h" +#include "platform/prof_protocol.h" + +// HIP API callbacks spawner object macro +#define HIP_CB_SPAWNER_OBJECT(CB_ID) \ + api_callbacks_spawner_t __api_tracer; \ + { \ + hip_api_data_t* api_data = __api_tracer.get_api_data_ptr(); \ + if (api_data != NULL) { \ + hip_api_data_t& api_data_ref = *api_data; \ + INIT_CB_ARGS_DATA(CB_ID, api_data_ref); \ + __api_tracer.call(); \ + } \ + } + +static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API; +typedef activity_record_t hip_api_record_t; +typedef activity_rtapi_callback_t hip_api_callback_t; +typedef activity_sync_callback_t hip_act_callback_t; + +class api_callbacks_table_t { + public: + typedef std::mutex mutex_t; + + typedef hip_api_record_t record_t; + typedef hip_api_callback_t fun_t; + typedef hip_act_callback_t act_t; + + // HIP API callbacks table + struct hip_cb_table_entry_t { + volatile std::atomic sync; + volatile std::atomic sem; + act_t act; + void* a_arg; + fun_t fun; + void* arg; + }; + + struct hip_cb_table_t { + hip_cb_table_entry_t arr[HIP_API_ID_NUMBER]; + }; + + api_callbacks_table_t() { + memset(&callbacks_table_, 0, sizeof(callbacks_table_)); + } + + bool set_activity(uint32_t id, act_t fun, void* arg) { + std::lock_guard lock(mutex_); + bool ret = true; + + if (id < HIP_API_ID_NUMBER) { + cb_sync(id); + callbacks_table_.arr[id].act = fun; + callbacks_table_.arr[id].a_arg = arg; + enabled_ = true; + cb_release(id); + } else { + ret = false; + } + + return ret; + } + + bool set_callback(uint32_t id, fun_t fun, void* arg) { + std::lock_guard lock(mutex_); + bool ret = true; + + if (id < HIP_API_ID_NUMBER) { + cb_sync(id); + callbacks_table_.arr[id].fun = fun; + callbacks_table_.arr[id].arg = arg; + enabled_ = true; + cb_release(id); + } else { + ret = false; + } + + return ret; + } + + void set_enabled(const bool& enabled) { + enabled_ = enabled; + } + + inline hip_cb_table_entry_t& entry(const uint32_t& id) { + return callbacks_table_.arr[id]; + } + + inline void sem_sync(const uint32_t& id) { + sem_increment(id); + if (entry(id).sync.load() == true) sync_wait(id); + } + + inline void sem_release(const uint32_t& id) { + sem_decrement(id); + } + + inline bool is_enabled() const { + return enabled_; + } + + private: + inline void cb_sync(const uint32_t& id) { + entry(id).sync.store(true); + while (entry(id).sem.load() != 0) {} + } + + inline void cb_release(const uint32_t& id) { + entry(id).sync.store(false); + } + + inline void sem_increment(const uint32_t& id) { + const uint32_t prev = entry(id).sem.fetch_add(1); + if (prev == UINT32_MAX) { + std::cerr << "sem overflow id = " << id << std::endl << std::flush; + abort(); + } + } + + inline void sem_decrement(const uint32_t& id) { + const uint32_t prev = entry(id).sem.fetch_sub(1); + if (prev == 0) { + std::cerr << "sem corrupted id = " << id << std::endl << std::flush; + abort(); + } + } + + void sync_wait(const uint32_t& id) { + sem_decrement(id); + while (entry(id).sync.load() == true) {} + sem_increment(id); + } + + mutex_t mutex_; + hip_cb_table_t callbacks_table_; + bool enabled_; +}; + +extern api_callbacks_table_t callbacks_table; + +template +class api_callbacks_spawner_t { + public: + api_callbacks_spawner_t() : + api_data_(NULL) + { + if (!is_enabled()) return; + + if (cid_ >= HIP_API_ID_NUMBER) { + fprintf(stderr, "HIP %s bad id %d\n", __FUNCTION__, cid_); + abort(); + } + callbacks_table.sem_sync(cid_); + + hip_act_callback_t act = entry(cid_).act; + if (act != NULL) api_data_ = (hip_api_data_t*) act(cid_, NULL, NULL, NULL); + } + + void call() { + hip_api_callback_t fun = entry(cid_).fun; + void* arg = entry(cid_).arg; + if (fun != NULL) { + fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + api_data_->phase = ACTIVITY_API_PHASE_EXIT; + } + } + + ~api_callbacks_spawner_t() { + if (!is_enabled()) return; + + if (api_data_ != NULL) { + hip_api_callback_t fun = entry(cid_).fun; + void* arg = entry(cid_).arg; + hip_act_callback_t act = entry(cid_).act; + void* a_arg = entry(cid_).a_arg; + if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + if (act != NULL) act(cid_, NULL, NULL, a_arg); + } + + callbacks_table.sem_release(cid_); + } + + hip_api_data_t* get_api_data_ptr() { + return api_data_; + } + + bool is_enabled() const { + return callbacks_table.is_enabled(); + } + + private: + inline api_callbacks_table_t::hip_cb_table_entry_t& entry(const uint32_t& id) { + return callbacks_table.entry(id); + } + + hip_api_data_t* api_data_; +}; + +template <> +class api_callbacks_spawner_t { + public: + api_callbacks_spawner_t() {} + void call() {} + hip_api_data_t* get_api_data_ptr() { return NULL; } + bool is_enabled() const { return false; } +}; + +#else + +#define HIP_CB_SPAWNER_OBJECT(x) do {} while(0) + +class api_callbacks_table_t { + public: + typedef void* act_t; + typedef void* fun_t; + bool set_activity(uint32_t id, act_t fun, void* arg) { return false; } + bool set_callback(uint32_t id, fun_t fun, void* arg) { return false; } +}; + +#endif + +#endif // HIP_SRC_HIP_PROF_API_H diff --git a/projects/hip/vdi/hip_prof_gen.py b/projects/hip/vdi/hip_prof_gen.py new file mode 100755 index 0000000000..04f92e0a00 --- /dev/null +++ b/projects/hip/vdi/hip_prof_gen.py @@ -0,0 +1,612 @@ +#!/usr/bin/python + +# Copyright (c) 2019-present Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import os, sys, re + +PROF_HEADER = "hip_prof_str.h" +OUTPUT = PROF_HEADER +REC_MAX_LEN = 1024 + +# Recursive sources processing +recursive_mode = 0 +# HIP_INIT_API macro patching +hip_patch_mode = 0 +# API matching types check +types_check_mode = 0 +# Private API check +private_check_mode = 0 + +# Messages and errors controll +verbose = 0 +errexit = 0 +inp_file = 'none' +line_num = -1 + +# Verbose message +def message(msg): + if verbose: sys.stdout.write(msg + '\n') + +# Fatal error termination +def error(msg): + if line_num != -1: + msg += ", file '" + inp_file + "', line (" + str(line_num) + ")" + if errexit: + msg = " Error: " + msg + else: + msg = " Warning: " + msg + + sys.stdout.write(msg + '\n') + sys.stderr.write(sys.argv[0] + msg +'\n') + +def fatal(msg): + error(msg) + sys.exit(1) + +############################################################# +# Normalizing API name +def filtr_api_name(name): + name = re.sub(r'\s*$', r'', name); + return name + +def filtr_api_decl(record): + record = re.sub("\s__dparm\([^\)]*\)", '', record); + record = re.sub("\(void\*\)", '', record); + return record + +# Normalizing API arguments +def filtr_api_args(args_str): + args_str = re.sub(r'^\s*', r'', args_str); + args_str = re.sub(r'\s*$', r'', args_str); + args_str = re.sub(r'\s*,\s*', r',', args_str); + args_str = re.sub(r'\s+', r' ', args_str); + #args_str = re.sub(r'void \*', r'void* ', args_str); + args_str = re.sub(r'\s*(\*+)\s*', r'\1 ', args_str); + args_str = re.sub(r'(enum|struct) ', '', args_str); + return args_str + +# Normalizing types +def norm_api_types(type_str): + type_str = re.sub(r'uint32_t', r'unsigned int', type_str) + type_str = re.sub(r'^unsigned$', r'unsigned int', type_str) + return type_str + +# Creating a list of arguments [(type, name), ...] +def list_api_args(args_str): + args_str = filtr_api_args(args_str) + args_list = [] + if args_str != '': + for arg_pair in args_str.split(','): + if arg_pair == 'void': continue + arg_pair = re.sub(r'\s*=\s*\S+$','', arg_pair); + m = re.match("^(.*)\s(\S+)$", arg_pair); + if m: + arg_type = norm_api_types(m.group(1)) + arg_name = m.group(2) + args_list.append((arg_type, arg_name)) + else: + fatal("bad args: args_str: '" + args_str + "' arg_pair: '" + arg_pair + "'") + return args_list; + +# Creating arguments string "type0, type1, ..." +def filtr_api_types(args_str): + args_list = list_api_args(args_str) + types_str = '' + for arg_tuple in args_list: + types_str += arg_tuple[0] + ', ' + return types_str + +# Creating options list [opt0, opt1, ...] +def filtr_api_opts(args_str): + args_list = list_api_args(args_str) + opts_list = [] + for arg_tuple in args_list: + opts_list.append(arg_tuple[1]) + return opts_list +############################################################# +# Parsing API header +# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); +def parse_api(inp_file_p, out): + global inp_file + global line_num + inp_file = inp_file_p + + beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\("); + api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)"); + end_pattern = re.compile("Texture"); + hidden_pattern = re.compile(r'__attribute__\(\(visibility\("hidden"\)\)\)') + nms_open_pattern = re.compile(r'namespace hip_impl {') + nms_close_pattern = re.compile(r'}') + + inp = open(inp_file, 'r') + + found = 0 + hidden = 0 + nms_level = 0; + record = "" + line_num = -1 + + for line in inp.readlines(): + record += re.sub(r'^\s+', r' ', line[:-1]) + line_num += 1 + + if len(record) > REC_MAX_LEN: + fatal("bad record \"" + record + "\"") + + m = beg_pattern.match(line) + if m: + name = m.group(2) + if hidden != 0: + message("api: " + name + " - hidden") + elif nms_level != 0: + message("api: " + name + " - hip_impl") + else: + message("api: " + name) + found = 1 + + if found != 0: + record = re.sub("\s__dparm\([^\)]*\)", '', record); + m = api_pattern.match(record) + if m: + found = 0 + if end_pattern.search(record): break + api_name = filtr_api_name(m.group(2)) + api_args = m.group(3) + if not api_name in out: + out[api_name] = api_args + else: continue + + hidden = 0 + if hidden_pattern.match(line): hidden = 1 + + if nms_open_pattern.match(line): nms_level += 1 + if (nms_level > 0) and nms_close_pattern.match(line): nms_level -= 1 + if nms_level < 0: + fatal("nms level < 0") + + record = "" + + inp.close() + line_num = -1 +############################################################# +# Parsing API implementation +# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) { +# HIP_INIT_API(hipSetupArgument, arg, size, offset); +# inp_file - input implementation source file +# api_map - input public API map [] => +# out - output map [] => [opt0, opt1, ...] +def parse_content(inp_file_p, api_map, out): + global hip_patch_mode + global types_check_mode + global private_check_mode + global inp_file + global line_num + inp_file = inp_file_p + + # API method begin pattern + beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\("); + # API declaration pattern + decl_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)\s*;"); + # API definition pattern + api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)\s*{"); + # API init macro pattern + init_pattern = re.compile("(^\s*HIP_INIT_API\s*)\((([^,]+)(,.*|)|)(\);|,)\s*$"); + + # Open input file + inp = open(inp_file, 'r') + + # API name + api_name = "" + # Valid public API found flag + api_valid = 0 + + # Input file patched content + content = '' + # Sub content for found API defiition + sub_content = '' + # Current record, accumulating several API definition related lines + record = '' + # Current input file line number + line_num = -1 + # API beginning found flag + found = 0 + + # Reading input file + for line in inp.readlines(): + # Accumulating record + record += re.sub(r'^\s+', r' ', line[:-1]) + line_num += 1 + + if len(record) > REC_MAX_LEN: + fatal("bad record \"" + record + "\"") + break; + + # Looking for API begin + if found == 0: + if beg_pattern.match(record): + found = 1 + record = filtr_api_decl(record) + + # Matching API declaration + if found == 1: + if decl_pattern.match(record): + found = 0 + + # Matching API definition + if found == 1: + m = api_pattern.match(record) + # Checking if complete API matched + if m: + found = 2 + api_name = filtr_api_name(m.group(2)) + # Checking if API name is in the API map + if (private_check_mode == 0) or (api_name in api_map): + if not api_name in api_map: api_map[api_name] = '' + # Getting API arguments + api_args = m.group(3) + # Getting etalon arguments from the API map + eta_args = api_map[api_name] + if eta_args == '': + eta_args = api_args + api_map[api_name] = eta_args + # Normalizing API arguments + api_types = filtr_api_types(api_args) + # Normalizing etalon arguments + eta_types = filtr_api_types(eta_args) + if (api_types == eta_types) or ((types_check_mode == 0) and (not api_name in out)): + # API is already found and not is mismatched + if (api_name in out): + fatal("API redefined \"" + api_name + "\", record \"" + record + "\"") + # Set valid public API found flag + api_valid = 1 + # Set output API map with API arguments list + out[api_name] = filtr_api_opts(api_args) + # Register missmatched API methods + else: + # Warning about mismatched API, possible non public overloaded version + api_diff = '\t\t' + inp_file + " line(" + str(line_num) + ")\n\t\tapi: " + api_types + "\n\t\teta: " + eta_types + message("\t" + api_name + ' args mismatch:\n' + api_diff + '\n') + + if hip_patch_mode != 0: + # Looking for INIT macro + m = init_pattern.match(line) + if m: + if api_valid == 0: api_name = 'NONE' + + if api_name == m.group(3): + if hip_patch_mode == 1: hip_patch_mode = 0 + else: fatal("patching failed") + else: + hip_patch_mode = 2 + init_args = m.group(2) + if init_args != '': init_args = ', ' + init_args + line = m.group(1) + '(' + api_name + init_args + m.group(5) + '\n' + non_public_api = 0 + + # API found action + if found == 2: + # Looking for INIT macro + m = init_pattern.match(line) + if m: + found = 0 + non_public_api = 0 + + if api_valid == 1: + api_valid = 0 + message("\t" + api_name) + else: + non_public_api = 1 + + if non_public_api == 1: + # Registering dummy API for non public API if the name in INIT is not NONE + init_name = m.group(3) + # Ignore if it is initialized as NONE + if init_name != 'NONE': + # Check if init name matching API name + if init_name != api_name: + fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'") + # If init name is not in public API map then it is private API + # else it was not identified and will be checked on finish + if not init_name in api_map: + if init_name in out: + fatal("API reinit \"" + api_name + "\", record \"" + record + "\"") + out[init_name] = [] + elif re.search('}', line): + found = 0 + # Expect INIT macro for valid public API + # Removing and registering non-conformant APIs with missing HIP_INIT macro + if api_valid == 1: + api_valid = 0 + if api_name in out: + del out[api_name] + del api_map[api_name] + # Registering non-conformant APIs + out['.' + api_name] = 1 + else: + fatal("API is not in out \"" + api_name + "\", record \"" + record + "\"") + + if found != 1: record = "" + content += line + + inp.close() + line_num = -1 + + if len(out) != 0: + return content + else: + return '' + +# src path walk +def parse_src(api_map, src_path, src_patt, out): + global recursive_mode + + pattern = re.compile(src_patt) + src_path = re.sub(r'\s', '', src_path) + for src_dir in src_path.split(':'): + message("Parsing " + src_dir + " for '" + src_patt + "'") + for root, dirs, files in os.walk(src_dir): + for fnm in files: + if pattern.search(fnm): + file = root + '/' + fnm + message(file) + content = parse_content(file, api_map, out); + if (hip_patch_mode != 0) and (content != ''): + f = open(file, 'w') + f.write(content) + f.close() + if recursive_mode == 0: break +############################################################# +# Generating profiling primitives header +# api_map - public API map [] => [(type, name), ...] +# opts_map - opts map [] => [opt0, opt1, ...] +def generate_prof_header(f, api_map, opts_map): + # Private API list + priv_lst = [] + + f.write('// automatically generated sources\n') + f.write('#ifndef _HIP_PROF_STR_H\n'); + f.write('#define _HIP_PROF_STR_H\n'); + f.write('#define HIP_PROF_VER 1\n') + + # Generating dummy macro for non-public API + f.write('\n// Dummy API primitives\n') + f.write('#define INIT_NONE_CB_ARGS_DATA(cb_data) {};\n') + for name in opts_map: + if not name in api_map: + opts_lst = opts_map[name] + if len(opts_lst) != 0: + fatal("bad dummy API \"" + name + "\", args: " + str(opts_lst)) + f.write('#define INIT_'+ name + '_CB_ARGS_DATA(cb_data) {};\n') + priv_lst.append(name) + + for name in priv_lst: + message("Private: " + name) + + # Generating the callbacks ID enumaration + f.write('\n// HIP API callbacks ID enumaration\n') + f.write('enum hip_api_id_t {\n') + cb_id = 0 + for name in api_map.keys(): + f.write(' HIP_API_ID_' + name + ' = ' + str(cb_id) + ',\n') + cb_id += 1 + f.write(' HIP_API_ID_NUMBER = ' + str(cb_id) + ',\n') + f.write('\n') + f.write(' HIP_API_ID_NONE = HIP_API_ID_NUMBER,\n') + for name in priv_lst: + f.write(' HIP_API_ID_' + name + ' = HIP_API_ID_NUMBER,\n') + f.write('};\n') + + # Generating the callbacks ID enumaration + f.write('\n// Return HIP API string\n') + f.write('inline const char* hip_api_name(const uint32_t id) {\n') + f.write(' switch(id) {\n') + for name in api_map.keys(): + f.write(' case HIP_API_ID_' + name + ': return "' + name + '";\n') + f.write(' };\n') + f.write(' return "unknown";\n') + f.write('};\n') + + # Generating the callbacks data structure + f.write('\n// HIP API callbacks data structure\n') + f.write( + 'typedef struct hip_api_data_s {\n' + + ' uint64_t correlation_id;\n' + + ' uint32_t phase;\n' + + ' union {\n' + ) + for name, args in api_map.items(): + if len(args) != 0: + f.write(' struct {\n') + for arg_tuple in args: + if arg_tuple[0] == "hipLimit_t": + f.write(' enum ' + arg_tuple[0] + ' ' + arg_tuple[1] + ';\n') + else: + f.write(' ' + arg_tuple[0] + ' ' + arg_tuple[1] + ';\n') + f.write(' } ' + name + ';\n') + f.write( + ' } args;\n' + + '} hip_api_data_t;\n' + ) + + # Generating the callbacks args data filling macros + f.write('\n// HIP API callbacks args data filling macros\n') + for name, args in api_map.items(): + f.write('// ' + name + str(args) + '\n') + f.write('#define INIT_' + name + '_CB_ARGS_DATA(cb_data) { \\\n') + if name in opts_map: + opts_list = opts_map[name] + if len(args) != len(opts_list): + fatal("\"" + name + "\" API args and opts mismatch, args: " + str(args) + ", opts: " + str(opts_list)) + # API args iterating: + # type is args[][0] + # name is args[][1] + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_type = arg_tuple[0] + fld_name = arg_tuple[1] + arg_name = opts_list[ind] + f.write(' cb_data.args.' + name + '.' + fld_name + ' = (' + arg_type + ')' + arg_name + '; \\\n') + f.write('};\n') + f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n') + + # Generating the method for the API string, name and parameters + f.write('\n') + f.write('#if HIP_PROF_HIP_API_STRING\n') + f.write('#include \n'); + f.write('#include \n'); + f.write('// HIP API string method, method name and parameters\n') + f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') + f.write(' std::ostringstream oss;\n') + f.write(' switch (id) {\n') + for name, args in api_map.items(): + f.write(' case HIP_API_ID_' + name + ':\n') + f.write(' oss << "' + name + '("') + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_name = arg_tuple[1] + if ind != 0: f.write(' << ","') + f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) + f.write('\n << ")";\n') + f.write(' break;\n') + f.write(' default: oss << "unknown";\n') + f.write(' };\n') + f.write(' return strdup(oss.str().c_str());\n') + f.write('};\n') + f.write('#endif // HIP_PROF_HIP_API_STRING\n') + + f.write('#endif // _HIP_PROF_STR_H\n'); + +############################################################# +# main +while len(sys.argv) > 1: + if not re.match(r'-', sys.argv[1]): break + + if (sys.argv[1] == '-v'): + verbose = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-r'): + recursive_mode = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-t'): + types_check_mode = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '--priv'): + private_check_mode = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-e'): + errexit = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-p'): + hip_patch_mode = 1 + sys.argv.pop(1) + +# Usage +if (len(sys.argv) < 3): + fatal ("Usage: " + sys.argv[0] + " [-v] []\n" + + " -v - verbose messages\n" + + " -r - process source directory recursively\n" + + " -t - API types matching check\n" + + " --priv - private API check\n" + + " -e - on error exit mode\n" + + " -p - HIP_INIT_API macro patching mode\n" + + "\n" + + " Example:\n" + + " $ " + sys.argv[0] + " -v -p -t --priv ./api/hip/include/hip/hcc_detail/hip_runtime_api.h ./api/hip ./api/hip/include/hip/hcc_detail/hip_prof_str.h"); + +# API header file given as an argument +src_pat = "\.cpp$" +api_hfile = sys.argv[1] +if not os.path.isfile(api_hfile): + fatal("input file '" + api_hfile + "' not found") + +# Srcs directory given as an argument +src_dir = sys.argv[2] +if not os.path.isdir(src_dir): + fatal("src directory " + src_dir + "' not found") + +if len(sys.argv) > 3: OUTPUT = sys.argv[3] + +# API declaration map +api_map = { + 'hipSetupArgument': '', + 'hipMalloc3DArray': '', + 'hipFuncGetAttribute': '', + 'hipMemset3DAsync': '', + 'hipKernelNameRef': '', + 'hipStreamGetPriority': '', + 'hipLaunchByPtr': '', + 'hipFreeHost': '', + 'hipGetErrorName': '', + 'hipMemcpy3DAsync': '', + 'hipMemcpyParam2DAsync': '', + 'hipArray3DCreate': '', + 'hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags': '', + 'hipOccupancyMaxPotentialBlockSize': '', + 'hipMallocManaged': '', + 'hipOccupancyMaxActiveBlocksPerMultiprocessor': '', + 'hipGetErrorString': '', + 'hipMallocHost': '', + 'hipModuleLoadDataEx': '', + 'hipGetDeviceProperties': '', + 'hipConfigureCall': '', + 'hipHccModuleLaunchKernel': '', + 'hipExtModuleLaunchKernel': '', +} +# API options map +opts_map = {} + +# Parsing API header +parse_api(api_hfile, api_map) + +# Parsing sources +parse_src(api_map, src_dir, src_pat, opts_map) + +# Checking for non-conformant APIs with missing HIP_INIT macro +for name in list(opts_map.keys()): + m = re.match(r'\.(\S*)', name) + if m: + message("Init missing: " + m.group(1)) + del opts_map[name] + +# Converting api map to map of lists +# Checking for not found APIs +not_found = 0 +if len(opts_map) != 0: + for name in api_map.keys(): + args_str = api_map[name]; + api_map[name] = list_api_args(args_str) + if not name in opts_map: + error("implementation not found: " + name) + not_found += 1 +if not_found != 0: + error(str(not_found) + " API calls missing in interception layer") + +# Generating output header file +with open(OUTPUT, 'w') as f: + generate_prof_header(f, api_map, opts_map) + +# Successfull exit +sys.exit(0) diff --git a/projects/hip/vdi/hip_profile.cpp b/projects/hip/vdi/hip_profile.cpp new file mode 100644 index 0000000000..3422f428ea --- /dev/null +++ b/projects/hip/vdi/hip_profile.cpp @@ -0,0 +1,40 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipProfilerStart() { + HIP_INIT_API(hipProfilerStart); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + + +hipError_t hipProfilerStop() { + HIP_INIT_API(hipProfilerStop); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} diff --git a/projects/hip/vdi/hip_rtc.cpp b/projects/hip/vdi/hip_rtc.cpp new file mode 100644 index 0000000000..9897b98b7f --- /dev/null +++ b/projects/hip/vdi/hip_rtc.cpp @@ -0,0 +1,393 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hiprtc_internal.hpp" +#include +#include "platform/program.hpp" + +namespace hiprtc { +thread_local hiprtcResult g_lastRtcError = HIPRTC_SUCCESS; +} + +class ProgramState { + amd::Monitor lock_; +private: + static ProgramState* programState_; + + ProgramState() : lock_("Guards program state") {} + ~ProgramState() {} +public: + std::unordered_map, std::vector>> progHeaders_; + + std::map> nameExpresssion_; + + static ProgramState& instance(); + void createProgramHeaders(amd::Program* program, int numHeaders, + const char** headers, const char** headerNames); + void getProgramHeaders(amd::Program* program, int* numHeaders, char** headers, char ** headerNames); + uint32_t addNameExpression(const char* name_expression); + char* getLoweredName(const char* name_expression); +}; + +ProgramState* ProgramState::programState_ = nullptr; + +ProgramState& ProgramState::instance() { + if (programState_ == nullptr) { + programState_ = new ProgramState; + } + return *programState_; +} + +void ProgramState::createProgramHeaders(amd::Program* program, int numHeaders, + const char** headers, const char** headerNames) { + amd::ScopedLock lock(lock_); + std::vector vHeaderNames; + std::vector vHeaders; + for (auto i = 0; i != numHeaders; ++i) { + vHeaders.emplace_back(headers[i]); + vHeaderNames.emplace_back(headerNames[i]); + progHeaders_[program] = std::make_pair(std::move(vHeaders), std::move(vHeaderNames)); + } +} + +void ProgramState::getProgramHeaders(amd::Program* program, int* numHeaders, + char** headers, char ** headerNames) { + amd::ScopedLock lock(lock_); + + const auto it = progHeaders_.find(program); + if (it != progHeaders_.cend()) { + *numHeaders = it->second.first.size(); + *headers = reinterpret_cast(it->second.first.data()); + *headerNames = reinterpret_cast(it->second.second.data()); + } +} + +uint32_t ProgramState::addNameExpression(const char* name_expression) { + amd::ScopedLock lock(lock_); + + // Strip clean of any '(' or ')' or '&' + std::string strippedName(name_expression); + if (strippedName.back() == ')') { + strippedName.pop_back(); + strippedName.erase(0, strippedName.find('(')); + } + if (strippedName.front() == '&') { + strippedName.erase(0, 1); + } + auto it = nameExpresssion_.find(name_expression); + if (it == nameExpresssion_.end()) { + nameExpresssion_.insert(std::pair> + (name_expression, std::make_pair(strippedName,""))); + } + return nameExpresssion_.size(); +} + +char* demangle(const char* loweredName) { + if (!loweredName) { + return nullptr; + } +#if __linux__ + int status = 0; + char* demangledName = DEMANGLE(loweredName, nullptr, nullptr, &status); + if (status != 0) { + return nullptr; + } +#elif defined(_WIN32) + char* demangledName = (char*)malloc(UNDECORATED_SIZE); + + if (!UnDecorateSymbolName(loweredName, demangledName, + UNDECORATED_SIZE/ sizeof(*demangledName), UNDNAME_COMPLETE)) + { + free(demangledName); + return nullptr; + } +#else +#error "Only Linux and Windows are supported" +#endif // __linux__ + return demangledName; +} + +static std::string handleMangledName(std::string name) { + std::string loweredName; + char* demangled = demangle(name.c_str()); + loweredName.assign(demangled == nullptr ? std::string() : demangled); + free(demangled); + + if (loweredName.empty()) { + return name; + } + + if (loweredName.find(".kd") != std::string::npos) { + return {}; + } + + if (loweredName.find("void ") == 0) { + loweredName.erase(0, strlen("void ")); + } + + auto dx{loweredName.find_first_of("(<")}; + + if (dx == std::string::npos) { + return loweredName; + } + + if (loweredName[dx] == '<') { + uint32_t count = 1; + do { + ++dx; + count += (loweredName[dx] == '<') ? 1 : ((loweredName[dx] == '>') ? -1 : 0); + } while (count); + + loweredName.erase(++dx); + } else { + loweredName.erase(dx); + } + + return loweredName; +} + +const char* hiprtcGetErrorString(hiprtcResult x) { + switch (x) { + case HIPRTC_SUCCESS: + return "HIPRTC_SUCCESS"; + case HIPRTC_ERROR_OUT_OF_MEMORY: + return "HIPRTC_ERROR_OUT_OF_MEMORY"; + case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE: + return "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE"; + case HIPRTC_ERROR_INVALID_INPUT: + return "HIPRTC_ERROR_INVALID_INPUT"; + case HIPRTC_ERROR_INVALID_PROGRAM: + return "HIPRTC_ERROR_INVALID_PROGRAM"; + case HIPRTC_ERROR_INVALID_OPTION: + return "HIPRTC_ERROR_INVALID_OPTION"; + case HIPRTC_ERROR_COMPILATION: + return "HIPRTC_ERROR_COMPILATION"; + case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE: + return "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE"; + case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: + return "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION"; + case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: + return "HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION"; + case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID: + return "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID"; + case HIPRTC_ERROR_INTERNAL_ERROR: + return "HIPRTC_ERROR_INTERNAL_ERROR"; + default: + return nullptr; + }; + + ShouldNotReachHere(); + + return nullptr; +} + +hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, + int numHeaders, const char** headers, const char** headerNames) { + HIPRTC_INIT_API(prog, src, name, numHeaders, headers, headerNames); + + if (prog == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_PROGRAM); + } + if (numHeaders < 0) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + if (numHeaders && (headers == nullptr || headerNames == nullptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + amd::Program* program = new amd::Program(*hip::getCurrentDevice()->asContext(), src, amd::Program::HIP); + if (program == NULL) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentDevice()->devices()[0])) { + program->release(); + HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE); + } + + ProgramState::instance().createProgramHeaders(program, numHeaders, headers, headerNames); + + *prog = reinterpret_cast(as_cl(program)); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) { + + // FIXME[skudchad] Add headers to amd::Program::build and device::Program::build, + // pass the saved from ProgramState to amd::Program::build + HIPRTC_INIT_API(prog, numOptions, options); + + amd::Program* program = as_amd(reinterpret_cast(prog)); + + std::ostringstream ostrstr; + std::vector oarr(&options[0], &options[numOptions]); + std::copy(oarr.begin(), oarr.end(), std::ostream_iterator(ostrstr, " ")); + + ostrstr.str().append(" -DHIP_VERSION_MAJOR=9"); + ostrstr.str().append(" -DHIP_VERSION_MINOR=0"); + + std::vector devices{hip::getCurrentDevice()->devices()[0]}; + if (CL_SUCCESS != program->build(devices, ostrstr.str().c_str(), nullptr, nullptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression) { + HIPRTC_INIT_API(prog, name_expression); + + if (name_expression == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + amd::Program* program = as_amd(reinterpret_cast(prog)); + + uint32_t id = ProgramState::instance().addNameExpression(name_expression); + + const auto var{"__hiprtc_" + std::to_string(id)}; + const auto code{"\nextern \"C\" constexpr auto " + var + " = " + name_expression + ';'}; + + program->appendToSource(code.c_str()); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, + const char** loweredName) { + HIPRTC_INIT_API(prog, name_expression, loweredName); + + if (name_expression == nullptr || loweredName == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + amd::Program* program = as_amd(reinterpret_cast(prog)); + + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + auto it = ProgramState::instance().nameExpresssion_.find(name_expression); + if (it == ProgramState::instance().nameExpresssion_.end()) { + return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID; + } + + std::string strippedName = it->second.first; + std::vector mangledNames; + + if (!dev_program->getLoweredNames(&mangledNames)) { + HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION); + } + + for (auto &name : mangledNames) { + std::string demangledName = handleMangledName(name); + if (demangledName == strippedName) { + it->second.second.assign(name); + } + } + + *loweredName = it->second.second.c_str(); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) { + HIPRTC_INIT_API(prog); + + if (prog == NULL) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + // Release program. hiprtcProgram is a double pointer so free *prog + amd::Program* program = as_amd(reinterpret_cast(*prog)); + + program->release(); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) { + HIPRTC_INIT_API(prog, binaryMem); + + + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program::binary_t& binary = + program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0])->binary(); + + ::memcpy(binaryMem, binary.first, binary.second); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) { + + HIPRTC_INIT_API(prog, binarySizeRet); + + amd::Program* program = as_amd(reinterpret_cast(prog)); + + *binarySizeRet = + program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0])->binary().second; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) { + + HIPRTC_INIT_API(prog, dst); + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program* devProgram = + program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + auto log = program->programLog() + devProgram->buildLog().c_str(); + + log.copy(dst, log.size()); + dst[log.size()] = '\0'; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) { + + HIPRTC_INIT_API(prog, logSizeRet); + + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program* devProgram = + program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + auto log = program->programLog() + devProgram->buildLog().c_str(); + + *logSizeRet = log.size() + 1; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcVersion(int* major, int* minor) { + HIPRTC_INIT_API(major, minor); + + if (major == nullptr || minor == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + *major = 9; + *minor = 0; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} diff --git a/projects/hip/vdi/hip_stream.cpp b/projects/hip/vdi/hip_stream.cpp new file mode 100644 index 0000000000..eac42c0203 --- /dev/null +++ b/projects/hip/vdi/hip_stream.cpp @@ -0,0 +1,274 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "hip_event.hpp" +#include "thread/monitor.hpp" + +static amd::Monitor streamSetLock("Guards global stream set"); +static std::unordered_set streamSet; + +// Internal structure for stream callback handler +class StreamCallback { + public: + StreamCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + amd::Command* command) + : stream_(stream), callBack_(callback), + userData_(userData), command_(command) { + }; + hipStream_t stream_; + hipStreamCallback_t callBack_; + void* userData_; + amd::Command* command_; +}; + +namespace hip { + +void syncStreams(int devId) { + amd::ScopedLock lock(streamSetLock); + + for (const auto& it : streamSet) { + if (it->device->deviceId() == devId) { + it->finish(); + } + } +} + +void syncStreams() { + syncStreams(getCurrentDevice()->deviceId()); +} + +Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f) : + queue(nullptr), lock("Stream Callback lock"), device(dev), priority(p), flags(f) {} + +void Stream::create() { + cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + queue = new amd::HostQueue(*device->asContext(), *device->devices()[0], properties, + amd::CommandQueue::RealTimeDisabled, priority); + assert(queue != nullptr); + queue->create(); +} + +amd::HostQueue* Stream::asHostQueue() { + if (queue == nullptr) { + create(); + } + return queue; +} + +void Stream::destroy() { + if (queue != nullptr) { + queue->release(); + queue = nullptr; + } +} + +void Stream::finish() { + if (queue != nullptr) { + queue->finish(); + } +} + +}; + +void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { + hipError_t status = hipSuccess; + StreamCallback* cbo = reinterpret_cast(user_data); + { + amd::ScopedLock lock(reinterpret_cast(cbo->stream_)->lock); + cbo->callBack_(cbo->stream_, status, cbo->userData_); + } + cbo->command_->release(); + delete cbo; +} + +static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd::CommandQueue::Priority priority) { + hip::Stream* hStream = new hip::Stream(hip::getCurrentDevice(), priority, flags); + + if (hStream == nullptr) { + return hipErrorOutOfMemory; + } + + if (!(flags & hipStreamNonBlocking)) { + hip::syncStreams(); + + { + amd::ScopedLock lock(streamSetLock); + streamSet.insert(hStream); + } + } + + *stream = reinterpret_cast(hStream); + + ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipStreamCreate: %zx", hStream); + + return hipSuccess; +} + +hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { + HIP_INIT_API(hipStreamCreateWithFlags, stream, flags); + + HIP_RETURN(ihipStreamCreate(stream, flags, amd::CommandQueue::Priority::Normal)); +} + +hipError_t hipStreamCreate(hipStream_t *stream) { + HIP_INIT_API(hipStreamCreate, stream); + + HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, amd::CommandQueue::Priority::Normal)); +} + +hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { + HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority); + + if (priority > static_cast(amd::CommandQueue::Priority::High)) { + priority = static_cast(amd::CommandQueue::Priority::High); + } else if (priority < static_cast(amd::CommandQueue::Priority::Normal)) { + priority = static_cast(amd::CommandQueue::Priority::Normal); + } + + return HIP_RETURN(ihipStreamCreate(stream, flags, static_cast(priority))); +} + +hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { + HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority); + + if (leastPriority != nullptr) { + *leastPriority = static_cast(amd::CommandQueue::Priority::Normal); + } + if (greatestPriority != nullptr) { + // Only report one kind of priority for now. + *greatestPriority = static_cast(amd::CommandQueue::Priority::Normal); + } + return HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { + HIP_INIT_API(hipStreamGetFlags, stream, flags); + + hip::Stream* hStream = reinterpret_cast(stream); + + if(flags != nullptr && hStream != nullptr) { + *flags = hStream->flags; + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamSynchronize(hipStream_t stream) { + HIP_INIT_API(hipStreamSynchronize, stream); + + amd::HostQueue* hostQueue = hip::getQueue(stream); + hostQueue->finish(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamDestroy(hipStream_t stream) { + HIP_INIT_API(hipStreamDestroy, stream); + + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + amd::ScopedLock lock(streamSetLock); + + hip::Stream* hStream = reinterpret_cast(stream); + + hStream->destroy(); + streamSet.erase(hStream); + + delete hStream; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { + HIP_INIT_API(hipStreamWaitEvent, stream, event, flags); + + amd::HostQueue* queue; + + if (stream == nullptr) { + queue = hip::getNullStream(); + } else { + queue = reinterpret_cast(stream)->asHostQueue(); + } + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* e = reinterpret_cast(event); + + return HIP_RETURN(e->streamWait(queue, flags)); +} + +hipError_t hipStreamQuery(hipStream_t stream) { + HIP_INIT_API(hipStreamQuery, stream); + + amd::HostQueue* hostQueue; + if (stream == nullptr) { + hostQueue = hip::getNullStream(); + } else { + hostQueue = reinterpret_cast(stream)->asHostQueue(); + } + + amd::Command* command = hostQueue->getLastQueuedCommand(true); + if (command == nullptr) { + HIP_RETURN(hipSuccess); + } + + amd::Event& event = command->event(); + if (command->type() != 0) { + event.notifyCmdQueue(); + } + hipError_t status = (command->status() == CL_COMPLETE) ? hipSuccess : hipErrorNotReady; + command->release(); + HIP_RETURN(status); +} + +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags) { + HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); + + amd::HostQueue* hostQueue = reinterpret_cast + (stream)->asHostQueue(); + amd::Command* command = hostQueue->getLastQueuedCommand(true); + if (command == nullptr) { + amd::Command::EventWaitList eventWaitList; + command = new amd::Marker(*hostQueue, false, eventWaitList); + command->enqueue(); + } + amd::Event& event = command->event(); + StreamCallback* cbo = new StreamCallback(stream, callback, userData, command); + + if(!event.setCallback(CL_COMPLETE, ihipStreamCallback, reinterpret_cast(cbo))) { + command->release(); + return hipErrorInvalidHandle; + } + + event.notifyCmdQueue(); + + HIP_RETURN(hipSuccess); +} + + diff --git a/projects/hip/vdi/hip_surface.cpp b/projects/hip/vdi/hip_surface.cpp new file mode 100644 index 0000000000..5adaf418cf --- /dev/null +++ b/projects/hip/vdi/hip_surface.cpp @@ -0,0 +1,37 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" +#include + +hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, + const hipResourceDesc* pResDesc) { + HIP_INIT_API(hipCreateSurfaceObject, pSurfObject, pResDesc); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { + HIP_INIT_API(hipDestroySurfaceObject, surfaceObject); + + HIP_RETURN(hipErrorNotSupported); +} diff --git a/projects/hip/vdi/hip_texture.cpp b/projects/hip/vdi/hip_texture.cpp new file mode 100644 index 0000000000..b837729721 --- /dev/null +++ b/projects/hip/vdi/hip_texture.cpp @@ -0,0 +1,1207 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include +#include "hip_internal.hpp" +#include "hip_conversions.hpp" +#include "platform/sampler.hpp" + +struct __hip_texture { + uint32_t imageSRD[HIP_IMAGE_OBJECT_SIZE_DWORD]; + uint32_t samplerSRD[HIP_SAMPLER_OBJECT_SIZE_DWORD]; + amd::Image* image; + amd::Sampler* sampler; + hipResourceDesc resDesc; + hipTextureDesc texDesc; + hipResourceViewDesc resViewDesc; + + __hip_texture(amd::Image* image_, + amd::Sampler* sampler_, + const hipResourceDesc& resDesc_, + const hipTextureDesc& texDesc_, + const hipResourceViewDesc& resViewDesc_) : + image(image_), + sampler(sampler_), + resDesc(resDesc_), + texDesc(texDesc_), + resViewDesc(resViewDesc_) { + amd::Context& context = *hip::getCurrentDevice()->asContext(); + amd::Device& device = *context.devices()[0]; + + device::Memory* imageMem = image->getDeviceMemory(device); + std::memcpy(imageSRD, imageMem->cpuSrd(), sizeof(imageSRD)); + + device::Sampler* samplerMem = sampler->getDeviceSampler(device); + std::memcpy(samplerSRD, samplerMem->hwState(), sizeof(samplerSRD)); + } +}; + +amd::Image* ihipImageCreate(const cl_channel_order channelOrder, + const cl_channel_type channelType, + const cl_mem_object_type imageType, + const size_t imageWidth, + const size_t imageHeight, + const size_t imageDepth, + const size_t imageArraySize, + const size_t imageRowPitch, + const size_t imageSlicePitch, + const uint32_t numMipLevels, + amd::Memory* buffer); + +hipError_t ihipCreateTextureObject(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) { + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + + // pResViewDesc can only be specified if the type of resource is a HIP array or a HIP mipmapped array. + if ((pResViewDesc != nullptr) && + ((pResDesc->resType != hipResourceTypeArray) && (pResDesc->resType != hipResourceTypeMipmappedArray))) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypeArray, + // hipResourceDesc::res::array::array must be set to a valid HIP array handle. + if ((pResDesc->resType == hipResourceTypeArray) && + (pResDesc->res.array.array == nullptr)) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypeMipmappedArray, + // hipResourceDesc::res::mipmap::mipmap must be set to a valid HIP mipmapped array handle + // and hipTextureDesc::normalizedCoords must be set to true. + if ((pResDesc->resType == hipResourceTypeMipmappedArray) && + ((pResDesc->res.mipmap.mipmap == nullptr) || (pTexDesc->normalizedCoords == 0))) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypeLinear, + // hipResourceDesc::res::linear::devPtr must be set to a valid device pointer, that is aligned to hipDeviceProp::textureAlignment. + // The total number of elements in the linear address range cannot exceed hipDeviceProp::maxTexture1DLinear. + if ((pResDesc->resType == hipResourceTypeLinear) && + ((pResDesc->res.linear.devPtr == nullptr) || + (!amd::isMultipleOf(pResDesc->res.linear.devPtr, info.imageBaseAddressAlignment_)) || + ((pResDesc->res.linear.sizeInBytes / hip::getElementSize(pResDesc->res.linear.desc)) >= info.imageMaxBufferSize_))) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypePitch2D, + // hipResourceDesc::res::pitch2D::devPtr must be set to a valid device pointer, that is aligned to hipDeviceProp::textureAlignment. + // hipResourceDesc::res::pitch2D::width and hipResourceDesc::res::pitch2D::height specify the width and height of the array in elements, + // and cannot exceed hipDeviceProp::maxTexture2DLinear[0] and hipDeviceProp::maxTexture2DLinear[1] respectively. + // hipResourceDesc::res::pitch2D::pitchInBytes specifies the pitch between two rows in bytes and has to be aligned to hipDeviceProp::texturePitchAlignment. + // Pitch cannot exceed hipDeviceProp::maxTexture2DLinear[2]. + if ((pResDesc->resType == hipResourceTypePitch2D) && + ((pResDesc->res.pitch2D.devPtr == nullptr) || + (!amd::isMultipleOf(pResDesc->res.pitch2D.devPtr, info.imageBaseAddressAlignment_)) || + (pResDesc->res.pitch2D.width >= info.image2DMaxWidth_) || + (pResDesc->res.pitch2D.height >= info.image2DMaxHeight_) || + (!amd::isMultipleOf(pResDesc->res.pitch2D.pitchInBytes, info.imagePitchAlignment_)))) { + // TODO check pitch limits. + return hipErrorInvalidValue; + } + + // Mipmaps are currently not supported. + if (pResDesc->resType == hipResourceTypeMipmappedArray) { + return hipErrorNotSupported; + } + // We don't program the border_color_ptr field in the HW sampler SRD. + if (pTexDesc->addressMode[0] == hipAddressModeBorder) { + return hipErrorNotSupported; + } + // We don't program the max_ansio_ratio field in the the HW sampler SRD. + if (pTexDesc->maxAnisotropy != 0) { + return hipErrorNotSupported; + } + // We don't program the lod_bias field in the HW sampler SRD. + if (pTexDesc->mipmapLevelBias != 0) { + return hipErrorNotSupported; + } + // We don't program the min_lod field in the HW sampler SRD. + if (pTexDesc->minMipmapLevelClamp != 0) { + return hipErrorNotSupported; + } + // We don't program the max_lod field in the HW sampler SRD. + if (pTexDesc->maxMipmapLevelClamp != 0) { + return hipErrorNotSupported; + } + + // TODO VDI assumes all dimensions have the same addressing mode. + cl_addressing_mode addressMode = CL_ADDRESS_NONE; + // If hipTextureDesc::normalizedCoords is set to zero, + // hipAddressModeWrap and hipAddressModeMirror won't be supported + // and will be switched to hipAddressModeClamp. + if ((pTexDesc->normalizedCoords == 0) && + ((pTexDesc->addressMode[0] == hipAddressModeWrap) || (pTexDesc->addressMode[0] == hipAddressModeMirror))) { + addressMode = hip::getCLAddressingMode(hipAddressModeClamp); + } + // hipTextureDesc::addressMode is ignored if hipResourceDesc::resType is hipResourceTypeLinear + else if (pResDesc->resType != hipResourceTypeLinear) { + addressMode = hip::getCLAddressingMode(pTexDesc->addressMode[0]); + } + +#ifndef CL_FILTER_NONE +#define CL_FILTER_NONE 0x1142 +#endif + cl_filter_mode filterMode = CL_FILTER_NONE; +#undef CL_FILTER_NONE + // hipTextureDesc::filterMode is ignored if hipResourceDesc::resType is hipResourceTypeLinear. + if (pResDesc->resType != hipResourceTypeLinear) { + filterMode = hip::getCLFilterMode(pTexDesc->filterMode); + } + +#ifndef CL_FILTER_NONE +#define CL_FILTER_NONE 0x1142 +#endif + cl_filter_mode mipFilterMode = CL_FILTER_NONE; +#undef CL_FILTER_NONE + if (pResDesc->resType == hipResourceTypeMipmappedArray) { + mipFilterMode = hip::getCLFilterMode(pTexDesc->mipmapFilterMode); + } + + amd::Sampler* sampler = new amd::Sampler(*hip::getCurrentDevice()->asContext(), + pTexDesc->normalizedCoords, + addressMode, + filterMode, + mipFilterMode, + pTexDesc->minMipmapLevelClamp, + pTexDesc->maxMipmapLevelClamp); + + if (sampler == nullptr) { + return hipErrorOutOfMemory; + } + + if (!sampler->create()) { + delete sampler; + return hipErrorOutOfMemory; + } + + amd::Image* image = nullptr; + switch (pResDesc->resType) { + case hipResourceTypeArray: { + cl_mem memObj = reinterpret_cast(pResDesc->res.array.array->data); + if (!is_valid(memObj)) { + return hipErrorInvalidValue; + } + image = as_amd(memObj)->asImage(); + + hipTextureReadMode readMode = pTexDesc->readMode; + // 32-bit integer format will not be promoted, regardless of whether or not + // this hipTextureDesc::readMode is set hipReadModeNormalizedFloat is specified. + if ((pResDesc->res.array.array->Format == HIP_AD_FORMAT_SIGNED_INT32) || + (pResDesc->res.array.array->Format == HIP_AD_FORMAT_UNSIGNED_INT32)) { + readMode = hipReadModeElementType; + } + + // We need to create an image view if the user requested to use normalized pixel values, + // due to already having the image created with a different format. + if ((pResViewDesc != nullptr) || + (readMode == hipReadModeNormalizedFloat) || + (pTexDesc->sRGB == 1)) { + // TODO VDI currently right now can only change the format of the image. + const cl_channel_order channelOrder = (pResViewDesc != nullptr) ? hip::getCLChannelOrder(hip::getNumChannels(pResViewDesc->format), pTexDesc->sRGB) : + hip::getCLChannelOrder(pResDesc->res.array.array->NumChannels, pTexDesc->sRGB); + const cl_channel_type channelType = (pResViewDesc != nullptr) ? hip::getCLChannelType(hip::getArrayFormat(pResViewDesc->format), readMode) : + hip::getCLChannelType(pResDesc->res.array.array->Format, readMode); + const amd::Image::Format imageFormat(cl_image_format{channelOrder, channelType}); + if (!imageFormat.isValid()) { + return hipErrorInvalidValue; + } + + image = image->createView(*hip::getCurrentDevice()->asContext(), imageFormat, nullptr); + if (image == nullptr) { + return hipErrorInvalidValue; + } + } + break; + } + case hipResourceTypeMipmappedArray: { + ShouldNotReachHere(); + break; + } + case hipResourceTypeLinear: { + const cl_channel_order channelOrder = hip::getCLChannelOrder(hip::getNumChannels(pResDesc->res.linear.desc), pTexDesc->sRGB); + const cl_channel_type channelType = hip::getCLChannelType(hip::getArrayFormat(pResDesc->res.linear.desc), pTexDesc->readMode); + const amd::Image::Format imageFormat({channelOrder, channelType}); + const cl_mem_object_type imageType = hip::getCLMemObjectType(pResDesc->resType); + size_t offset = 0; + image = ihipImageCreate(channelOrder, + channelType, + imageType, + (pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize()), /* imageWidth */ + 0, /* imageHeight */ + 0, /* imageDepth */ + 0, /* imageArraySize */ + 0, /* imageRowPitch */ + 0, /* imageSlicePitch */ + 0, /* numMipLevels */ + getMemoryObject(pResDesc->res.linear.devPtr, offset)); + // TODO take care of non-zero offset. + assert(offset == 0); + if (image == nullptr) { + return hipErrorInvalidValue; + } + break; + } + case hipResourceTypePitch2D: { + const cl_channel_order channelOrder = hip::getCLChannelOrder(hip::getNumChannels(pResDesc->res.pitch2D.desc), pTexDesc->sRGB); + const cl_channel_type channelType = hip::getCLChannelType(hip::getArrayFormat(pResDesc->res.pitch2D.desc), pTexDesc->readMode); + const cl_mem_object_type imageType = hip::getCLMemObjectType(pResDesc->resType); + size_t offset = 0; + image = ihipImageCreate(channelOrder, + channelType, + imageType, + pResDesc->res.pitch2D.width, /* imageWidth */ + pResDesc->res.pitch2D.height, /* imageHeight */ + 0, /* imageDepth */ + 0, /* imageArraySize */ + pResDesc->res.pitch2D.pitchInBytes, /* imageRowPitch */ + 0, /* imageSlicePitch */ + 0, /* numMipLevels */ + getMemoryObject(pResDesc->res.pitch2D.devPtr, offset)); + // TODO take care of non-zero offset. + assert(offset == 0); + if (image == nullptr) { + return hipErrorInvalidValue; + } + break; + } + } + + void *texObjectBuffer = nullptr; + ihipMalloc(&texObjectBuffer, sizeof(__hip_texture), CL_MEM_SVM_FINE_GRAIN_BUFFER); + if (texObjectBuffer == nullptr) { + return hipErrorOutOfMemory; + } + *pTexObject = new (texObjectBuffer) __hip_texture{image, sampler, *pResDesc, *pTexDesc, (pResViewDesc != nullptr) ? *pResViewDesc : hipResourceViewDesc{}}; + + return hipSuccess; +} + +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) { + HIP_INIT_API(hipCreateTextureObject, pTexObject, pResDesc, pTexDesc, pResViewDesc); + + HIP_RETURN(ihipCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc)); +} + + +hipError_t ihipDestroyTextureObject(hipTextureObject_t texObject) { + if (texObject == nullptr) { + return hipErrorInvalidValue; + } + + const hipResourceType type = texObject->resDesc.resType; + const bool isImageFromBuffer = (type == hipResourceTypeLinear) || (type == hipResourceTypePitch2D); + const bool isImageView = ((type == hipResourceTypeArray) || (type == hipResourceTypeMipmappedArray)) && + !texObject->image->isParent(); + if (isImageFromBuffer || isImageView) { + texObject->image->release(); + } + + // TODO Should call ihipFree() to not polute the api trace. + return hipFree(texObject); +} + +hipError_t hipDestroyTextureObject(hipTextureObject_t texObject) { + HIP_INIT_API(hipDestroyTextureObject, texObject); + + HIP_RETURN(ihipDestroyTextureObject(texObject)); +} + + +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipGetTextureObjectResourceDesc, pResDesc, texObject); + + if ((pResDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pResDesc = texObject->resDesc; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipGetTextureObjectResourceViewDesc, pResViewDesc, texObject); + + if ((pResViewDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pResViewDesc = texObject->resViewDesc; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipGetTextureObjectTextureDesc, pTexDesc, texObject); + + if ((pTexDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pTexDesc = texObject->texDesc; + + HIP_RETURN(hipSuccess); +} + +inline bool ihipGetTextureAlignmentOffset(size_t* offset, + const void* devPtr) { + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + + const char* alignedDevPtr = amd::alignUp(static_cast(devPtr), info.imageBaseAddressAlignment_); + const size_t alignedOffset = alignedDevPtr - static_cast(devPtr); + + // If the device memory pointer was returned from hipMalloc(), + // the offset is guaranteed to be 0 and NULL may be passed as the offset parameter. + if ((alignedOffset != 0) && + (offset == nullptr)) { + return false; + } + + if (offset != nullptr) { + *offset = alignedOffset; + } + + return true; +} + +hipError_t ihipBindTexture(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size) { + if ((texref == nullptr) || + (devPtr == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texref->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeLinear; + resDesc.res.linear.devPtr = const_cast(devPtr); + resDesc.res.linear.desc = *desc; + resDesc.res.linear.sizeInBytes = size; + + if (ihipGetTextureAlignmentOffset(offset, devPtr)) { + // Align the user ptr to HW requirments. + resDesc.res.linear.devPtr = static_cast(const_cast(devPtr)) - *offset; + } else { + return hipErrorInvalidValue; + } + + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, nullptr); +} + +hipError_t ihipBindTexture2D(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch) { + if ((texref == nullptr) || + (devPtr == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texref->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypePitch2D; + resDesc.res.pitch2D.devPtr = const_cast(devPtr); + resDesc.res.pitch2D.desc = *desc; + resDesc.res.pitch2D.width = width; + resDesc.res.pitch2D.height = height; + resDesc.res.pitch2D.pitchInBytes = pitch; + + if (ihipGetTextureAlignmentOffset(offset, devPtr)) { + // Align the user ptr to HW requirments. + resDesc.res.pitch2D.devPtr = static_cast(const_cast(devPtr)) - *offset; + } else { + return hipErrorInvalidValue; + } + + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, nullptr); +} + +hipError_t hipBindTexture2D(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch) { + HIP_INIT_API(hipBindTexture2D, offset, texref, devPtr, desc, width, height, pitch); + + HIP_RETURN(ihipBindTexture2D(offset, texref, devPtr, desc, width, height, pitch)); +} + +hipError_t ihipBindTextureToArray(const textureReference* texref, + hipArray_const_t array, + const hipChannelFormatDesc* desc) { + if ((texref == nullptr) || + (array == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texref->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = const_cast(array); + + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + hipResourceViewFormat format = hip::getResourceViewFormat(*desc); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(array, format); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, &resViewDesc); +} + +hipError_t hipBindTextureToArray(const textureReference* texref, + hipArray_const_t array, + const hipChannelFormatDesc* desc) { + HIP_INIT_API(hipBindTextureToArray, texref, array, desc); + + HIP_RETURN(ihipBindTextureToArray(texref, array, desc)); +} + +hipError_t ihipBindTextureToMipmappedArray(const textureReference* texref, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc) { + if ((texref == nullptr) || + (mipmappedArray == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texref->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeMipmappedArray; + resDesc.res.mipmap.mipmap = const_cast(mipmappedArray); + + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + hipResourceViewFormat format = hip::getResourceViewFormat(*desc); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(mipmappedArray, format); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, &resViewDesc); +} + +hipError_t hipBindTextureToMipmappedArray(const textureReference* texref, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc) { + HIP_INIT_API(hipBindTextureToMipmappedArray, texref, mipmappedArray, desc); + + HIP_RETURN(ihipBindTextureToMipmappedArray(texref, mipmappedArray, desc)); +} + +hipError_t hipUnbindTexture(const textureReference* texref) { + HIP_INIT_API(hipUnbindTexture, texref); + + if (texref == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const hipTextureObject_t textureObject = texref->textureObject; + const_cast(texref)->textureObject = nullptr; + + HIP_RETURN(ihipDestroyTextureObject(textureObject)); +} + +hipError_t hipBindTexture(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size) { + HIP_INIT_API(hipBindTexture, offset, texref, devPtr, desc, size); + + HIP_RETURN(ihipBindTexture(offset, texref, devPtr, desc, size)); +} + +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, + hipArray_const_t array) { + HIP_INIT_API(hipGetChannelDesc, desc, array); + + if ((desc == nullptr) || + (array == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // It is UB to call hipGetChannelDesc() on an array created via hipArrayCreate()/hipArray3DCreate(). + // This is due to hip not differentiating between runtime and driver types. + *desc = array->desc; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureAlignmentOffset(size_t* offset, + const textureReference* texref) { + HIP_INIT_API(hipGetTextureAlignmentOffset, offset, texref); + + if ((offset == nullptr) || + (texref == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // TODO enforce alignment on devPtr. + *offset = 0; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol) { + HIP_INIT_API(hipGetTextureReference, texref, symbol); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipTexRefSetFormat(textureReference* texRef, + hipArray_Format fmt, + int NumPackedComponents) { + HIP_INIT_API(hipTexRefSetFormat, texRef, fmt, NumPackedComponents); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->format = fmt; + texRef->numChannels = NumPackedComponents; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetFlags(textureReference* texRef, + unsigned int Flags) { + HIP_INIT_API(hipTexRefSetFlags, texRef, Flags); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->readMode = hipReadModeNormalizedFloat; + texRef->normalized = 0; + texRef->sRGB = 0; + + if (Flags & HIP_TRSF_READ_AS_INTEGER) { + texRef->readMode = hipReadModeElementType; + } + + if (Flags & HIP_TRSF_NORMALIZED_COORDINATES) { + texRef->normalized = 1; + } + + if (Flags & HIP_TRSF_SRGB) { + texRef->sRGB = 1; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetFilterMode(textureReference* texRef, + hipTextureFilterMode fm) { + HIP_INIT_API(hipTexRefSetFilterMode, texRef, fm); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->filterMode = fm; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* pam, + const textureReference* texRef, + int dim) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetAddressMode, pam, texRef, dim); + + if ((pam == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Currently, the only valid value for dim are 0 and 1. + if ((dim != 0) || (dim != 1)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pam = texRef->addressMode[dim]; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetAddressMode(textureReference* texRef, + int dim, + hipTextureAddressMode am) { + HIP_INIT_API(hipTexRefSetAddressMode, texRef, dim, am); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if ((dim < 0) || (dim > 2)) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->addressMode[dim] = am; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetArray(hipArray_t* pArray, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetArray, pArray, texRef); + + if ((pArray == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = {}; + // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. + hipError_t error = hipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); + if (error != hipSuccess) { + return HIP_RETURN(error); + } + + switch (resDesc.resType) { + case hipResourceTypeLinear: + case hipResourceTypePitch2D: + case hipResourceTypeMipmappedArray: + HIP_RETURN(hipErrorInvalidValue); + case hipResourceTypeArray: + *pArray = resDesc.res.array.array; + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetArray(textureReference* texRef, + hipArray_const_t array, + unsigned int flags) { + HIP_INIT_API(hipTexRefSetArray, texRef, array, flags); + + if ((texRef == nullptr) || + (array == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (flags != HIP_TRSA_OVERRIDE_FORMAT) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texRef->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = const_cast(array); + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + hipResourceViewFormat format = hip::getResourceViewFormat(hip::getChannelFormatDesc(texRef->numChannels, texRef->format)); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(array, format); + + HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc)); +} + +hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetAddress, dptr, texRef); + + if ((dptr == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = {}; + // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. + hipError_t error = hipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); + if (error != hipSuccess) { + return HIP_RETURN(error); + } + + switch (resDesc.resType) { + // Need to verify. + // If the texture reference is not bound to any device memory range, + // return hipErroInvalidValue. + case hipResourceTypeArray: + case hipResourceTypeMipmappedArray: + HIP_RETURN(hipErrorInvalidValue); + case hipResourceTypeLinear: + *dptr = resDesc.res.linear.devPtr; + break; + case hipResourceTypePitch2D: + *dptr = resDesc.res.pitch2D.devPtr; + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetAddress(size_t* ByteOffset, + textureReference* texRef, + hipDeviceptr_t dptr, + size_t bytes) { + HIP_INIT_API(hipTexRefSetAddress, ByteOffset, texRef, dptr, bytes); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texRef->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeLinear; + resDesc.res.linear.devPtr = dptr; + resDesc.res.linear.desc = hip::getChannelFormatDesc(texRef->numChannels, texRef->format); + resDesc.res.linear.sizeInBytes = bytes; + + if (ihipGetTextureAlignmentOffset(ByteOffset, dptr)) { + // Align the user ptr to HW requirments. + resDesc.res.linear.devPtr = static_cast(dptr) - *ByteOffset; + } else { + return HIP_RETURN(hipErrorInvalidValue); + } + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr)); +} + +hipError_t hipTexRefSetAddress2D(textureReference* texRef, + const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t dptr, + size_t Pitch) { + HIP_INIT_API(hipTexRefSetAddress2D, texRef, desc, dptr, Pitch); + + if ((texRef == nullptr) || + (desc == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texRef->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypePitch2D; + resDesc.res.linear.devPtr = dptr; + resDesc.res.linear.desc = hip::getChannelFormatDesc(desc->NumChannels, desc->Format); // Need to verify. + resDesc.res.pitch2D.width = desc->Width; + resDesc.res.pitch2D.height = desc->Height; + resDesc.res.pitch2D.pitchInBytes = Pitch; + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr)); +} + +hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { + return {x, y, z, w, f}; +} + +hipError_t hipTexRefGetBorderColor(float* pBorderColor, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetBorderColor, pBorderColor, texRef); + + if ((pBorderColor == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // TODO add textureReference::borderColor. + assert(false && "textureReference::borderColor is missing in header"); + // std::memcpy(pBorderColor, texRef.borderColor, sizeof(texRef.borderColor)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetFilterMode(hipTextureFilterMode* pfm, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetFilterMode, pfm, texRef); + + if ((pfm == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pfm = texRef->filterMode; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetFlags(unsigned int* pFlags, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetFlags, pFlags, texRef); + + if ((pFlags == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pFlags = 0; + + if (texRef->readMode == hipReadModeElementType) { + *pFlags |= HIP_TRSF_READ_AS_INTEGER; + } + + if (texRef->normalized == 1) { + *pFlags |= HIP_TRSF_NORMALIZED_COORDINATES; + } + + if (texRef->sRGB == 1) { + *pFlags |= HIP_TRSF_SRGB; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetFormat(hipArray_Format* pFormat, + int* pNumChannels, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetFormat, pFormat, pNumChannels, texRef); + + if ((pFormat == nullptr) || + (pNumChannels == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pFormat = texRef->format; + *pNumChannels = texRef->numChannels; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetMaxAnisotropy(int* pmaxAnsio, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMaxAnisotropy, pmaxAnsio, texRef); + + if ((pmaxAnsio == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pmaxAnsio = texRef->maxAnisotropy; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmapFilterMode(hipTextureFilterMode* pfm, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmapFilterMode, pfm, texRef); + + if ((pfm == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pfm = texRef->mipmapFilterMode; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmapLevelBias(float* pbias, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmapLevelBias, pbias, texRef); + + if ((pbias == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pbias = texRef->mipmapLevelBias; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, + float* pmaxMipmapLevelClamp, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmapLevelClamp, pminMipmapLevelClamp, pmaxMipmapLevelClamp, texRef); + + if ((pminMipmapLevelClamp == nullptr) || + (pmaxMipmapLevelClamp == nullptr) || + (texRef == nullptr)){ + HIP_RETURN(hipErrorInvalidValue); + } + + *pminMipmapLevelClamp = texRef->minMipmapLevelClamp; + *pmaxMipmapLevelClamp = texRef->maxMipmapLevelClamp; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmappedArray(hipMipmappedArray_t* pArray, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmappedArray, pArray, &texRef); + + if ((pArray == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = {}; + // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. + hipError_t error = hipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); + if (error != hipSuccess) { + return HIP_RETURN(error); + } + + switch (resDesc.resType) { + case hipResourceTypeLinear: + case hipResourceTypePitch2D: + case hipResourceTypeArray: + HIP_RETURN(hipErrorInvalidValue); + case hipResourceTypeMipmappedArray: + *pArray = resDesc.res.mipmap.mipmap; + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetBorderColor(textureReference* texRef, + float* pBorderColor) { + HIP_INIT_API(hipTexRefSetBorderColor, texRef, pBorderColor); + + if ((texRef == nullptr) || + (pBorderColor == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // TODO add textureReference::borderColor. + assert(false && "textureReference::borderColor is missing in header"); + // std::memcpy(texRef.borderColor, pBorderColor, sizeof(texRef.borderColor)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMaxAnisotropy(textureReference* texRef, + unsigned int maxAniso) { + HIP_INIT_API(hipTexRefSetMaxAnisotropy, texRef, maxAniso); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->maxAnisotropy = maxAniso; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmapFilterMode(textureReference* texRef, + hipTextureFilterMode fm) { + HIP_INIT_API(hipTexRefSetMipmapFilterMode, texRef, fm); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->mipmapFilterMode = fm; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmapLevelBias(textureReference* texRef, + float bias) { + HIP_INIT_API(hipTexRefSetMipmapLevelBias, texRef, bias); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->mipmapLevelBias = bias; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmapLevelClamp(textureReference* texRef, + float minMipMapLevelClamp, + float maxMipMapLevelClamp) { + HIP_INIT_API(hipTexRefSetMipmapLevelClamp, minMipMapLevelClamp, maxMipMapLevelClamp); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->minMipmapLevelClamp = minMipMapLevelClamp; + texRef->maxMipmapLevelClamp = maxMipMapLevelClamp; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, + hipMipmappedArray* mipmappedArray, + unsigned int Flags) { + HIP_INIT_API(hipTexRefSetMipmappedArray, texRef, mipmappedArray, Flags); + + if ((texRef == nullptr) || + (mipmappedArray == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (Flags != HIP_TRSA_OVERRIDE_FORMAT) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texRef->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeMipmappedArray; + resDesc.res.mipmap.mipmap = mipmappedArray; + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + hipResourceViewFormat format = hip::getResourceViewFormat(hip::getChannelFormatDesc(texRef->numChannels, texRef->format)); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(mipmappedArray, format); + + HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc)); +} + +hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, + const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc) { + HIP_INIT_API(hipTexObjectCreate, pTexObject, pResDesc, pTexDesc, pResViewDesc); + + if ((pTexObject == nullptr) || + (pResDesc == nullptr) || (pTexDesc == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = hip::getResourceDesc(*pResDesc); + hipTextureDesc texDesc = hip::getTextureDesc(*pTexDesc); + + if (pResViewDesc != nullptr) { + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(*pResViewDesc); + HIP_RETURN(ihipCreateTextureObject(pTexObject, &resDesc, &texDesc, &resViewDesc)); + } else { + HIP_RETURN(ihipCreateTextureObject(pTexObject, &resDesc, &texDesc, nullptr)); + } +} + +hipError_t hipTexObjectDestroy(hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectDestroy, texObject); + + HIP_RETURN(ihipDestroyTextureObject(texObject)); +} + +hipError_t hipTexObjectGetResourceDesc(HIP_RESOURCE_DESC* pResDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectGetResourceDesc, pResDesc, texObject); + + if ((pResDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pResDesc = hip::getResourceDesc(texObject->resDesc); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexObjectGetResourceViewDesc(HIP_RESOURCE_VIEW_DESC* pResViewDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectGetResourceViewDesc, pResViewDesc, texObject); + + if ((pResViewDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pResViewDesc = hip::getResourceViewDesc(texObject->resViewDesc); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectGetTextureDesc, pTexDesc, texObject); + + if ((pTexDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pTexDesc = hip::getTextureDesc(texObject->texDesc); + + HIP_RETURN(hipSuccess); +} diff --git a/projects/hip/vdi/hiprtc_internal.hpp b/projects/hip/vdi/hiprtc_internal.hpp new file mode 100644 index 0000000000..4e533c9716 --- /dev/null +++ b/projects/hip/vdi/hiprtc_internal.hpp @@ -0,0 +1,65 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIPRTC_SRC_HIP_INTERNAL_H +#define HIPRTC_SRC_HIP_INTERNAL_H + +#include "hip_internal.hpp" + +#if __linux__ +#include + +#if HIPRTC_USE_CXXABI +#include + +#define DEMANGLE abi::__cxa_demangle + +#else +extern "C" char * __cxa_demangle(const char *mangled_name, char *output_buffer, + size_t *length, int *status); + +#define DEMANGLE __cxa_demangle +#endif //HIPRTC_USE_CXXABI + +#elif defined(_WIN32) +#include +#include + +#define UNDECORATED_SIZE 4096 + +#endif // __linux__ + +// This macro should be called at the beginning of every HIP RTC API. +#define HIPRTC_INIT_API(...) \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ + amd::Thread* thread = amd::Thread::current(); \ + if (!VDI_CHECK_THREAD(thread)) { \ + HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ + } \ + HIP_INIT(); + +#define HIPRTC_RETURN(ret) \ + hiprtc::g_lastRtcError = ret; \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, \ + hiprtcGetErrorString(hiprtc::g_lastRtcError)); \ + return hiprtc::g_lastRtcError; + + +#endif // HIPRTC_SRC_HIP_INTERNAL_H diff --git a/projects/hip/vdi/trace_helper.h b/projects/hip/vdi/trace_helper.h new file mode 100644 index 0000000000..432cabb583 --- /dev/null +++ b/projects/hip/vdi/trace_helper.h @@ -0,0 +1,254 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once + +#include +#include +#include +#include +//--- +// Helper functions to convert HIP function arguments into strings. +// Handles POD data types as well as enumerations (ie hipMemcpyKind). +// The implementation uses C++11 variadic templates and template specialization. +// The hipMemcpyKind example below is a good example that shows how to implement conversion for a +// new HSA type. + + +// Handy macro to convert an enumeration to a stringified version of same: +#define CASE_STR(x) \ + case x: \ + return #x; + +inline const char* ihipErrorString(hipError_t hip_error) { + switch (hip_error) { + CASE_STR(hipSuccess); + CASE_STR(hipErrorOutOfMemory); + CASE_STR(hipErrorNotInitialized); + CASE_STR(hipErrorDeinitialized); + CASE_STR(hipErrorProfilerDisabled); + CASE_STR(hipErrorProfilerNotInitialized); + CASE_STR(hipErrorProfilerAlreadyStarted); + CASE_STR(hipErrorProfilerAlreadyStopped); + CASE_STR(hipErrorInvalidImage); + CASE_STR(hipErrorInvalidContext); + CASE_STR(hipErrorContextAlreadyCurrent); + CASE_STR(hipErrorMapFailed); + CASE_STR(hipErrorUnmapFailed); + CASE_STR(hipErrorArrayIsMapped); + CASE_STR(hipErrorAlreadyMapped); + CASE_STR(hipErrorNoBinaryForGpu); + CASE_STR(hipErrorAlreadyAcquired); + CASE_STR(hipErrorNotMapped); + CASE_STR(hipErrorNotMappedAsArray); + CASE_STR(hipErrorNotMappedAsPointer); + CASE_STR(hipErrorECCNotCorrectable); + CASE_STR(hipErrorUnsupportedLimit); + CASE_STR(hipErrorContextAlreadyInUse); + CASE_STR(hipErrorPeerAccessUnsupported); + CASE_STR(hipErrorInvalidKernelFile); + CASE_STR(hipErrorInvalidGraphicsContext); + CASE_STR(hipErrorInvalidSource); + CASE_STR(hipErrorFileNotFound); + CASE_STR(hipErrorSharedObjectSymbolNotFound); + CASE_STR(hipErrorSharedObjectInitFailed); + CASE_STR(hipErrorOperatingSystem); + CASE_STR(hipErrorSetOnActiveProcess); + CASE_STR(hipErrorInvalidHandle); + CASE_STR(hipErrorNotFound); + CASE_STR(hipErrorIllegalAddress); + CASE_STR(hipErrorMissingConfiguration); + CASE_STR(hipErrorLaunchFailure); + CASE_STR(hipErrorPriorLaunchFailure); + CASE_STR(hipErrorLaunchTimeOut); + CASE_STR(hipErrorLaunchOutOfResources); + CASE_STR(hipErrorInvalidDeviceFunction); + CASE_STR(hipErrorInvalidConfiguration); + CASE_STR(hipErrorInvalidDevice); + CASE_STR(hipErrorInvalidValue); + CASE_STR(hipErrorInvalidDevicePointer); + CASE_STR(hipErrorInvalidMemcpyDirection); + CASE_STR(hipErrorUnknown); + CASE_STR(hipErrorNotReady); + CASE_STR(hipErrorNoDevice); + CASE_STR(hipErrorPeerAccessAlreadyEnabled); + CASE_STR(hipErrorPeerAccessNotEnabled); + CASE_STR(hipErrorRuntimeMemory); + CASE_STR(hipErrorRuntimeOther); + CASE_STR(hipErrorHostMemoryAlreadyRegistered); + CASE_STR(hipErrorHostMemoryNotRegistered); + CASE_STR(hipErrorTbd); + default: + return "hipErrorUnknown"; + }; +}; + +// Building block functions: +template +inline std::string ToHexString(T v) { + std::ostringstream ss; + ss << "0x" << std::hex << v; + return ss.str(); +}; + +template +inline std::string ToString(T* v) { + std::ostringstream ss; + if (v == NULL) { + ss << "char array:"; + } else { + ss << v; + } + return ss.str(); +}; + +template +inline std::string ToString(T** v) { + std::ostringstream ss; + if (v == NULL) { + ss << "char array:"; + } else { + ss << v; + } + return ss.str(); +}; + +//--- +// Template overloads for ToString to handle specific types + +// This is the default which works for most types: +template +inline std::string ToString(T v) { + std::ostringstream ss; + ss << v; + return ss.str(); +}; + +template <> +inline std::string ToString(hipFunction_t v) { + std::ostringstream ss; + ss << "0x" << std::hex << static_cast(v); + return ss.str(); +}; + +// hipEvent_t specialization. TODO - maybe add an event ID for debug? +template <> +inline std::string ToString(hipEvent_t v) { + std::ostringstream ss; + ss << "event:" << std::hex << static_cast(v); + return ss.str(); +}; + +// hipIpcEventHandle_t +template <> +inline std::string ToString(hipIpcEventHandle_t v) { + std::ostringstream ss; + ss << "ipc event:" << std::hex << static_cast(&v); + return ss.str(); +}; + +// hipStream_t +template <> +inline std::string ToString(hipStream_t v) { + std::ostringstream ss; + if (v == NULL) { + ss << "stream:"; + } else { + ss << "stream:" << std::hex << static_cast(v); + } + + return ss.str(); +}; + +// hipCtx_t +template <> +inline std::string ToString(hipCtx_t v) { + std::ostringstream ss; + if (v == NULL) { + ss << "context:"; + } else { + ss << "context:" << std::hex << static_cast(v); + } + + return ss.str(); +}; + +// hipPitchedPtr +template <> +inline std::string ToString(hipPitchedPtr v) { + std::ostringstream ss; + ss << "pitchPtr:" << std::hex << static_cast(v.ptr); + return ss.str(); +}; + +// hipMemcpyKind specialization +template <> +inline std::string ToString(hipMemcpyKind v) { + switch (v) { + CASE_STR(hipMemcpyHostToHost); + CASE_STR(hipMemcpyHostToDevice); + CASE_STR(hipMemcpyDeviceToHost); + CASE_STR(hipMemcpyDeviceToDevice); + CASE_STR(hipMemcpyDefault); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipFuncCache_t v) { + switch (v) { + CASE_STR(hipFuncCachePreferNone); + CASE_STR(hipFuncCachePreferShared); + CASE_STR(hipFuncCachePreferL1); + CASE_STR(hipFuncCachePreferEqual); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipSharedMemConfig v) { + switch (v) { + CASE_STR(hipSharedMemBankSizeDefault); + CASE_STR(hipSharedMemBankSizeFourByte); + CASE_STR(hipSharedMemBankSizeEightByte); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipError_t v) { + return ihipErrorString(v); +}; + +// Catch empty arguments case +inline std::string ToString() { return (""); } + + +//--- +// C++11 variadic template - peels off first argument, converts to string, and calls itself again to +// peel the next arg. Strings are automatically separated by comma+space. +template +inline std::string ToString(T first, Args... args) { + return ToString(first) + ", " + ToString(args...); +} +