Merge in the rocclr based hip runtime (#2032)
* Merge master-next changes in master (include vdi development in master branch)
[ROCm/hip commit: a0b5dfd625]
This commit is contained in:
+121
-37
@@ -1,5 +1,15 @@
|
||||
cmake_minimum_required(VERSION 3.4.3)
|
||||
project(hip)
|
||||
# sample command for hip-vdi, you'll need to have vdi installed
|
||||
# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi ..
|
||||
# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi -DVDI_DIR=/extra/lmoriche/hip-vdi/vdi -DOPENCL_DIR=/extra/lmoriche/clients/lmoriche_opencl_dev2/drivers/opencl/api/opencl -DLIBVDI_STATIC_DIR=/extra/lmoriche/hip-vdi/build/vdi ..
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
|
||||
#############################
|
||||
# Options
|
||||
#############################
|
||||
option(BUILD_HIPIFY_CLANG "Enable building the CUDA->HIP converter" OFF)
|
||||
|
||||
#############################
|
||||
# Setup config generation
|
||||
@@ -100,12 +110,19 @@ add_to_config(_buildInfo HIP_COMPILER)
|
||||
# Determine HIP_RUNTIME
|
||||
# Either HCC or VDI; default is HCC
|
||||
if(NOT DEFINED ENV{HIP_RUNTIME})
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
set(HIP_RUNTIME "HCC" CACHE STRING "HIP Runtime")
|
||||
else()
|
||||
set(HIP_RUNTIME $ENV{HIP_RUNTIME} CACHE STRING "HIP Runtime")
|
||||
elseif (HIP_PLATFORM STREQUAL "vdi")
|
||||
set(HIP_RUNTIME "VDI" CACHE STRING "HIP Runtime")
|
||||
elseif (HIP_PLATFORM STREQUAL "nvcc")
|
||||
set(HIP_RUNTIME "CUDA" CACHE STRING "HIP Runtime")
|
||||
endif()
|
||||
endif()
|
||||
add_to_config(_buildInfo HIP_RUNTIME)
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "vdi")
|
||||
set(USE_PROF_API "1")
|
||||
endif()
|
||||
|
||||
# If HIP_PLATFORM is hcc, we need HCC_HOME and HSA_PATH to be defined
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
@@ -190,12 +207,14 @@ message (STATUS "ROCM Installation path(ROCM_PATH): ${ROCM_PATH}")
|
||||
set(CPACK_SET_DESTDIR ON CACHE BOOL "Installer package will install hip to CMAKE_INSTALL_PREFIX instead of CPACK_PACKAGING_INSTALL_PREFIX")
|
||||
if (NOT CPACK_SET_DESTDIR)
|
||||
set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm/hip" CACHE PATH "Default installation path of hcc installer package")
|
||||
endif (CPACK_SET_DESTDIR)
|
||||
endif (NOT CPACK_SET_DESTDIR)
|
||||
|
||||
#############################
|
||||
# Profiling API support
|
||||
#############################
|
||||
# Generate profiling API macros/structures header
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
if(USE_PROF_API EQUAL 1)
|
||||
set(PROF_API_STR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_prof_str.h")
|
||||
set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_runtime_api.h")
|
||||
set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}/src")
|
||||
@@ -207,7 +226,6 @@ execute_process(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}")
|
||||
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${PROF_API_GEN} ${PROF_API_HDR} ${PROF_API_STR})
|
||||
|
||||
# Enable profiling API
|
||||
if(USE_PROF_API EQUAL 1)
|
||||
find_path(PROF_API_HEADER_DIR prof_protocol.h
|
||||
HINTS
|
||||
${PROF_API_HEADER_PATH}
|
||||
@@ -224,6 +242,7 @@ if(USE_PROF_API EQUAL 1)
|
||||
MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#############################
|
||||
# Build steps
|
||||
@@ -233,13 +252,48 @@ set(LIB_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib)
|
||||
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/include)
|
||||
set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip)
|
||||
|
||||
# Build clang hipify if enabled
|
||||
if (BUILD_HIPIFY_CLANG)
|
||||
add_subdirectory(hipify-clang)
|
||||
endif()
|
||||
|
||||
# Build LPL an CA (fat binary generation / fat binary decomposition tools) if
|
||||
# platform is hcc; do this before the ugly hijacking of the compiler, since no
|
||||
# HC code is involved.
|
||||
if (HIP_PLATFORM STREQUAL "hcc")
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca)
|
||||
endif ()
|
||||
#if (HIP_PLATFORM STREQUAL "hcc")
|
||||
# add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca)
|
||||
#endif ()
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "vdi")
|
||||
# Determine HSA_PATH
|
||||
if(NOT DEFINED HSA_PATH)
|
||||
if(NOT DEFINED ENV{HSA_PATH})
|
||||
set(HSA_PATH "/opt/rocm/hsa" CACHE PATH "Path to which HSA runtime has been installed")
|
||||
else()
|
||||
set(HSA_PATH $ENV{HSA_PATH} CACHE PATH "Path to which HSA runtime has been installed")
|
||||
endif()
|
||||
endif()
|
||||
if(IS_ABSOLUTE ${HSA_PATH} AND EXISTS ${HSA_PATH} AND IS_DIRECTORY ${HSA_PATH})
|
||||
message(STATUS "Looking for HSA runtime in: " ${HSA_PATH})
|
||||
else()
|
||||
message(FATAL_ERROR "Don't know where to find HSA runtime. Please specify absolute path using -DHSA_PATH")
|
||||
endif()
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
add_subdirectory(vdi)
|
||||
file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo})
|
||||
|
||||
|
||||
# set(VDI_CXX_FLAGS "-hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ")
|
||||
set(HIP_VDI_BUILD_FLAGS "${HIP_VDI_BUILD_FLAGS} -fPIC ${VDI_CXX_FLAGS} -I${HSA_PATH}/include")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_VDI_BUILD_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_VDI_BUILD_FLAGS}")
|
||||
set(HCC_CXX_FLAGS "-hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ")
|
||||
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC ${HCC_CXX_FLAGS} -I${HSA_PATH}/include")
|
||||
|
||||
endif()
|
||||
|
||||
message(STATUS "\nHSA runtime in: " ${HSA_PATH})
|
||||
# Build hip_hcc if platform is hcc
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
@@ -300,18 +354,18 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
set_property ( TARGET hip_hcc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET hip_hcc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" )
|
||||
|
||||
if(HIP_COMPILER STREQUAL "hcc")
|
||||
target_link_libraries(hip_hcc PRIVATE hc_am)
|
||||
target_link_libraries(hip_hcc_static PRIVATE hc_am)
|
||||
target_link_libraries(hip_hcc PRIVATE hc_am)
|
||||
target_link_libraries(hip_hcc_static PRIVATE hc_am)
|
||||
|
||||
add_library(hiprtc SHARED src/hiprtc.cpp)
|
||||
target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY)
|
||||
set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" )
|
||||
|
||||
target_include_directories(
|
||||
hiprtc SYSTEM
|
||||
PRIVATE ${PROJECT_SOURCE_DIR}/include ${HSA_PATH}/include)
|
||||
|
||||
add_library(hiprtc SHARED src/hiprtc.cpp)
|
||||
target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY)
|
||||
set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" )
|
||||
target_include_directories(
|
||||
hiprtc SYSTEM
|
||||
PRIVATE ${PROJECT_SOURCE_DIR}/include ${HSA_PATH}/include)
|
||||
endif()
|
||||
set_target_properties(hip_hcc PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
||||
set_target_properties(hip_hcc PROPERTIES VISIBILITY_INLINES_HIDDEN 1)
|
||||
set_target_properties(hiprtc PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
||||
@@ -349,6 +403,9 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo})
|
||||
endif()
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "vdi")
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca)
|
||||
endif()
|
||||
# Generate .hipVersion
|
||||
file(WRITE "${PROJECT_BINARY_DIR}/.hipVersion" ${_versionInfo})
|
||||
|
||||
@@ -377,13 +434,11 @@ endif()
|
||||
#############################
|
||||
# Install hip_hcc if platform is hcc
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
if(HIP_COMPILER STREQUAL "hcc")
|
||||
install(TARGETS hip_hcc_static hip_hcc hiprtc DESTINATION lib)
|
||||
else()
|
||||
install(TARGETS hip_hcc_static hip_hcc DESTINATION lib)
|
||||
endif()
|
||||
install(TARGETS hip_hcc_static hip_hcc hiprtc DESTINATION lib)
|
||||
endif()
|
||||
|
||||
# Install .hipInfo
|
||||
# Install .hipInfo
|
||||
if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "vdi")
|
||||
install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib)
|
||||
endif()
|
||||
|
||||
@@ -406,6 +461,9 @@ endif()
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
install(TARGETS hip_hcc_static hip_hcc host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR})
|
||||
install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::)
|
||||
elseif( HIP_PLATFORM STREQUAL "vdi")
|
||||
# install(TARGETS hip_on_vdi host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR})
|
||||
endif()
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
configure_package_config_file(
|
||||
@@ -427,13 +485,12 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
DESTINATION
|
||||
${CONFIG_PACKAGE_INSTALL_DIR}
|
||||
)
|
||||
endif()
|
||||
|
||||
#############################
|
||||
# Packaging steps
|
||||
#############################
|
||||
# Package: hip_base
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_base)
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip-base)
|
||||
configure_file(packaging/hip-base.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
|
||||
configure_file(packaging/hip-base.postinst ${BUILD_DIR}/postinst @ONLY)
|
||||
configure_file(packaging/hip-base.prerm ${BUILD_DIR}/prerm @ONLY)
|
||||
@@ -447,12 +504,19 @@ add_custom_target(pkg_hip_base COMMAND ${CMAKE_COMMAND} .
|
||||
WORKING_DIRECTORY ${BUILD_DIR}
|
||||
DEPENDS lpl ca)
|
||||
|
||||
# Package: hip_hcc
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_hcc)
|
||||
configure_file(packaging/hip-hcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
|
||||
configure_file(packaging/hip-hcc.postinst ${BUILD_DIR}/postinst @ONLY)
|
||||
configure_file(packaging/hip-hcc.prerm ${BUILD_DIR}/prerm @ONLY)
|
||||
add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} .
|
||||
# Packaging needs to wait for hipify-clang to build if it's enabled...
|
||||
if (BUILD_HIPIFY_CLANG)
|
||||
add_dependencies(pkg_hip_base hipify-clang)
|
||||
endif()
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
message("HCC Package\n")
|
||||
# Package: hip_hcc
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_hcc)
|
||||
configure_file(packaging/hip-hcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
|
||||
configure_file(packaging/hip-hcc.postinst ${BUILD_DIR}/postinst @ONLY)
|
||||
configure_file(packaging/hip-hcc.prerm ${BUILD_DIR}/prerm @ONLY)
|
||||
add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} .
|
||||
COMMAND rm -rf *.deb *.rpm *.tar.gz
|
||||
COMMAND make package
|
||||
COMMAND cp *.deb ${PROJECT_BINARY_DIR}
|
||||
@@ -460,12 +524,23 @@ add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} .
|
||||
COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR}
|
||||
WORKING_DIRECTORY ${BUILD_DIR}
|
||||
DEPENDS hip_hcc hip_hcc_static hiprtc)
|
||||
else()
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/vdi)
|
||||
configure_file(packaging/hip-vdi.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
|
||||
configure_file(packaging/hip-vdi.postinst ${BUILD_DIR}/postinst @ONLY)
|
||||
configure_file(packaging/hip-vdi.prerm ${BUILD_DIR}/prerm @ONLY)
|
||||
add_custom_target(hip_on_vdi COMMAND ${CMAKE_COMMAND} .
|
||||
COMMAND rm -rf *.deb *.rpm *.tar.gz
|
||||
COMMAND make package
|
||||
COMMAND cp *.deb ${PROJECT_BINARY_DIR}
|
||||
COMMAND cp *.rpm ${PROJECT_BINARY_DIR}
|
||||
COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR}
|
||||
WORKING_DIRECTORY ${BUILD_DIR} )
|
||||
endif()
|
||||
|
||||
# Package: hip_nvcc
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_nvcc)
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip-nvcc)
|
||||
configure_file(packaging/hip-nvcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
|
||||
configure_file(packaging/hip-nvcc.postinst ${BUILD_DIR}/postinst @ONLY)
|
||||
configure_file(packaging/hip-nvcc.prerm ${BUILD_DIR}/prerm @ONLY)
|
||||
add_custom_target(pkg_hip_nvcc COMMAND ${CMAKE_COMMAND} .
|
||||
COMMAND rm -rf *.deb *.rpm *.tar.gz
|
||||
COMMAND make package
|
||||
@@ -475,7 +550,7 @@ add_custom_target(pkg_hip_nvcc COMMAND ${CMAKE_COMMAND} .
|
||||
WORKING_DIRECTORY ${BUILD_DIR})
|
||||
|
||||
# Package: hip_doc
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_doc)
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip-doc)
|
||||
configure_file(packaging/hip-doc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
|
||||
add_custom_target(pkg_hip_doc COMMAND ${CMAKE_COMMAND} .
|
||||
COMMAND rm -rf *.deb *.rpm *.tar.gz
|
||||
@@ -496,6 +571,7 @@ add_custom_target(pkg_hip_samples COMMAND ${CMAKE_COMMAND} .
|
||||
COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR}
|
||||
WORKING_DIRECTORY ${BUILD_DIR})
|
||||
|
||||
|
||||
# Package: all
|
||||
if(POLICY CMP0037)
|
||||
cmake_policy(PUSH)
|
||||
@@ -505,10 +581,18 @@ file(GENERATE OUTPUT ${PROJECT_BINARY_DIR}/fixnames
|
||||
CONTENT "pwd; for i in *.deb; do mv \"\$i\" \"\${i/.deb/-amd64.deb}\" ; done
|
||||
for i in *.rpm ; do mv \$i \${i/.rpm/.x86_64.rpm} ; done
|
||||
")
|
||||
add_custom_target(package
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
add_custom_target(package
|
||||
COMMAND bash ${PROJECT_BINARY_DIR}/fixnames
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
DEPENDS pkg_hip_base pkg_hip_hcc pkg_hip_nvcc pkg_hip_doc pkg_hip_samples)
|
||||
elseif(HIP_PLATFORM STREQUAL "vdi")
|
||||
add_custom_target(package
|
||||
COMMAND bash ${PROJECT_BINARY_DIR}/fixnames
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
DEPENDS pkg_hip_base hip_on_vdi pkg_hip_nvcc pkg_hip_doc pkg_hip_samples)
|
||||
endif()
|
||||
|
||||
if(POLICY CMP0037)
|
||||
cmake_policy(POP)
|
||||
endif()
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2008-2020 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -13,11 +12,9 @@ all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,132 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_EGL_H
|
||||
#define __OPENCL_CL_EGL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
|
||||
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
|
||||
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
|
||||
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
|
||||
|
||||
/* Error type for clCreateFromEGLImageKHR */
|
||||
#define CL_INVALID_EGL_OBJECT_KHR -1093
|
||||
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
|
||||
|
||||
/* CLeglImageKHR is an opaque handle to an EGLImage */
|
||||
typedef void* CLeglImageKHR;
|
||||
|
||||
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
|
||||
typedef void* CLeglDisplayKHR;
|
||||
|
||||
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
|
||||
typedef void* CLeglSyncKHR;
|
||||
|
||||
/* properties passed to clCreateFromEGLImageKHR */
|
||||
typedef intptr_t cl_egl_image_properties_khr;
|
||||
|
||||
|
||||
#define cl_khr_egl_image 1
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromEGLImageKHR(cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
#define cl_khr_egl_event 1
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromEGLSyncKHR(cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_EGL_H */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,171 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_H
|
||||
#define __OPENCL_CL_GL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_gl_object_type;
|
||||
typedef cl_uint cl_gl_texture_info;
|
||||
typedef cl_uint cl_gl_platform_info;
|
||||
typedef struct __GLsync *cl_GLsync;
|
||||
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||
#endif
|
||||
|
||||
/* cl_gl_texture_info */
|
||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define CL_GL_NUM_SAMPLES 0x2012
|
||||
#endif
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLBuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint bufobj,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#endif
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLRenderbuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint renderbuffer,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLObjectInfo(cl_mem memobj,
|
||||
cl_gl_object_type * gl_object_type,
|
||||
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLTextureInfo(cl_mem memobj,
|
||||
cl_gl_texture_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Deprecated OpenCL 1.1 APIs */
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
/* cl_khr_gl_sharing extension */
|
||||
|
||||
#define cl_khr_gl_sharing 1
|
||||
|
||||
typedef cl_uint cl_gl_context_info;
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
||||
|
||||
/* cl_gl_context_info */
|
||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
||||
|
||||
/* Additional cl_context_properties */
|
||||
#define CL_GL_CONTEXT_KHR 0x2008
|
||||
#define CL_EGL_DISPLAY_KHR 0x2009
|
||||
#define CL_GLX_DISPLAY_KHR 0x200A
|
||||
#define CL_WGL_HDC_KHR 0x200B
|
||||
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLContextInfoKHR(const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_H */
|
||||
@@ -0,0 +1,52 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_EXT_H
|
||||
#define __OPENCL_CL_GL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl_gl.h>
|
||||
|
||||
/*
|
||||
* cl_khr_gl_event extension
|
||||
*/
|
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromGLsyncKHR(cl_context context,
|
||||
cl_GLsync cl_GLsync,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_EXT_H */
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,86 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2018 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __CL_VERSION_H
|
||||
#define __CL_VERSION_H
|
||||
|
||||
/* Detect which version to target */
|
||||
#if !defined(CL_TARGET_OPENCL_VERSION)
|
||||
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
|
||||
#define CL_TARGET_OPENCL_VERSION 220
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION != 100 && \
|
||||
CL_TARGET_OPENCL_VERSION != 110 && \
|
||||
CL_TARGET_OPENCL_VERSION != 120 && \
|
||||
CL_TARGET_OPENCL_VERSION != 200 && \
|
||||
CL_TARGET_OPENCL_VERSION != 210 && \
|
||||
CL_TARGET_OPENCL_VERSION != 220
|
||||
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)")
|
||||
#undef CL_TARGET_OPENCL_VERSION
|
||||
#define CL_TARGET_OPENCL_VERSION 220
|
||||
#endif
|
||||
|
||||
|
||||
/* OpenCL Version */
|
||||
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
|
||||
#define CL_VERSION_2_2 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
|
||||
#define CL_VERSION_2_1 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
|
||||
#define CL_VERSION_2_0 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
|
||||
#define CL_VERSION_1_2 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
|
||||
#define CL_VERSION_1_1 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
|
||||
#define CL_VERSION_1_0 1
|
||||
#endif
|
||||
|
||||
/* Allow deprecated APIs for older OpenCL versions. */
|
||||
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||
#endif
|
||||
|
||||
#endif /* __CL_VERSION_H */
|
||||
@@ -0,0 +1,47 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_H
|
||||
#define __OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_gl.h>
|
||||
#include <CL/cl_gl_ext.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_H */
|
||||
@@ -0,0 +1,329 @@
|
||||
/* -*- mode: c; tab-width: 8; -*- */
|
||||
/* vi: set sw=4 ts=8: */
|
||||
/* Reference version of egl.h for EGL 1.4.
|
||||
* $Revision: 9356 $ on $Date: 2009-10-21 02:52:25 -0700 (Wed, 21 Oct 2009) $
|
||||
*/
|
||||
|
||||
/*
|
||||
** Copyright (c) 2007-2009 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a
|
||||
** copy of this software and/or associated documentation files (the
|
||||
** "Materials"), to deal in the Materials without restriction, including
|
||||
** without limitation the rights to use, copy, modify, merge, publish,
|
||||
** distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
** permit persons to whom the Materials are furnished to do so, subject to
|
||||
** the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included
|
||||
** in all copies or substantial portions of the Materials.
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
#ifndef __egl_h_
|
||||
#define __egl_h_
|
||||
|
||||
/* All platform-dependent types and macro boilerplate (such as EGLAPI
|
||||
* and EGLAPIENTRY) should go in eglplatform.h.
|
||||
*/
|
||||
#include <EGL/eglplatform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* EGL Types */
|
||||
/* EGLint is defined in eglplatform.h */
|
||||
typedef unsigned int EGLBoolean;
|
||||
typedef unsigned int EGLenum;
|
||||
typedef void *EGLConfig;
|
||||
typedef void *EGLContext;
|
||||
typedef void *EGLDisplay;
|
||||
typedef void *EGLSurface;
|
||||
typedef void *EGLClientBuffer;
|
||||
|
||||
/* EGL Versioning */
|
||||
#define EGL_VERSION_1_0 1
|
||||
#define EGL_VERSION_1_1 1
|
||||
#define EGL_VERSION_1_2 1
|
||||
#define EGL_VERSION_1_3 1
|
||||
#define EGL_VERSION_1_4 1
|
||||
|
||||
/* EGL Enumerants. Bitmasks and other exceptional cases aside, most
|
||||
* enums are assigned unique values starting at 0x3000.
|
||||
*/
|
||||
|
||||
/* EGL aliases */
|
||||
#define EGL_FALSE 0
|
||||
#define EGL_TRUE 1
|
||||
|
||||
/* Out-of-band handle values */
|
||||
#define EGL_DEFAULT_DISPLAY ((EGLNativeDisplayType)0)
|
||||
#define EGL_NO_CONTEXT ((EGLContext)0)
|
||||
#define EGL_NO_DISPLAY ((EGLDisplay)0)
|
||||
#define EGL_NO_SURFACE ((EGLSurface)0)
|
||||
|
||||
/* Out-of-band attribute value */
|
||||
#define EGL_DONT_CARE ((EGLint)-1)
|
||||
|
||||
/* Errors / GetError return values */
|
||||
#define EGL_SUCCESS 0x3000
|
||||
#define EGL_NOT_INITIALIZED 0x3001
|
||||
#define EGL_BAD_ACCESS 0x3002
|
||||
#define EGL_BAD_ALLOC 0x3003
|
||||
#define EGL_BAD_ATTRIBUTE 0x3004
|
||||
#define EGL_BAD_CONFIG 0x3005
|
||||
#define EGL_BAD_CONTEXT 0x3006
|
||||
#define EGL_BAD_CURRENT_SURFACE 0x3007
|
||||
#define EGL_BAD_DISPLAY 0x3008
|
||||
#define EGL_BAD_MATCH 0x3009
|
||||
#define EGL_BAD_NATIVE_PIXMAP 0x300A
|
||||
#define EGL_BAD_NATIVE_WINDOW 0x300B
|
||||
#define EGL_BAD_PARAMETER 0x300C
|
||||
#define EGL_BAD_SURFACE 0x300D
|
||||
#define EGL_CONTEXT_LOST 0x300E /* EGL 1.1 - IMG_power_management */
|
||||
|
||||
/* Reserved 0x300F-0x301F for additional errors */
|
||||
|
||||
/* Config attributes */
|
||||
#define EGL_BUFFER_SIZE 0x3020
|
||||
#define EGL_ALPHA_SIZE 0x3021
|
||||
#define EGL_BLUE_SIZE 0x3022
|
||||
#define EGL_GREEN_SIZE 0x3023
|
||||
#define EGL_RED_SIZE 0x3024
|
||||
#define EGL_DEPTH_SIZE 0x3025
|
||||
#define EGL_STENCIL_SIZE 0x3026
|
||||
#define EGL_CONFIG_CAVEAT 0x3027
|
||||
#define EGL_CONFIG_ID 0x3028
|
||||
#define EGL_LEVEL 0x3029
|
||||
#define EGL_MAX_PBUFFER_HEIGHT 0x302A
|
||||
#define EGL_MAX_PBUFFER_PIXELS 0x302B
|
||||
#define EGL_MAX_PBUFFER_WIDTH 0x302C
|
||||
#define EGL_NATIVE_RENDERABLE 0x302D
|
||||
#define EGL_NATIVE_VISUAL_ID 0x302E
|
||||
#define EGL_NATIVE_VISUAL_TYPE 0x302F
|
||||
#define EGL_SAMPLES 0x3031
|
||||
#define EGL_SAMPLE_BUFFERS 0x3032
|
||||
#define EGL_SURFACE_TYPE 0x3033
|
||||
#define EGL_TRANSPARENT_TYPE 0x3034
|
||||
#define EGL_TRANSPARENT_BLUE_VALUE 0x3035
|
||||
#define EGL_TRANSPARENT_GREEN_VALUE 0x3036
|
||||
#define EGL_TRANSPARENT_RED_VALUE 0x3037
|
||||
#define EGL_NONE 0x3038 /* Attrib list terminator */
|
||||
#define EGL_BIND_TO_TEXTURE_RGB 0x3039
|
||||
#define EGL_BIND_TO_TEXTURE_RGBA 0x303A
|
||||
#define EGL_MIN_SWAP_INTERVAL 0x303B
|
||||
#define EGL_MAX_SWAP_INTERVAL 0x303C
|
||||
#define EGL_LUMINANCE_SIZE 0x303D
|
||||
#define EGL_ALPHA_MASK_SIZE 0x303E
|
||||
#define EGL_COLOR_BUFFER_TYPE 0x303F
|
||||
#define EGL_RENDERABLE_TYPE 0x3040
|
||||
#define EGL_MATCH_NATIVE_PIXMAP 0x3041 /* Pseudo-attribute (not queryable) */
|
||||
#define EGL_CONFORMANT 0x3042
|
||||
|
||||
/* Reserved 0x3041-0x304F for additional config attributes */
|
||||
|
||||
/* Config attribute values */
|
||||
#define EGL_SLOW_CONFIG 0x3050 /* EGL_CONFIG_CAVEAT value */
|
||||
#define EGL_NON_CONFORMANT_CONFIG 0x3051 /* EGL_CONFIG_CAVEAT value */
|
||||
#define EGL_TRANSPARENT_RGB 0x3052 /* EGL_TRANSPARENT_TYPE value */
|
||||
#define EGL_RGB_BUFFER 0x308E /* EGL_COLOR_BUFFER_TYPE value */
|
||||
#define EGL_LUMINANCE_BUFFER 0x308F /* EGL_COLOR_BUFFER_TYPE value */
|
||||
|
||||
/* More config attribute values, for EGL_TEXTURE_FORMAT */
|
||||
#define EGL_NO_TEXTURE 0x305C
|
||||
#define EGL_TEXTURE_RGB 0x305D
|
||||
#define EGL_TEXTURE_RGBA 0x305E
|
||||
#define EGL_TEXTURE_2D 0x305F
|
||||
|
||||
/* Config attribute mask bits */
|
||||
#define EGL_PBUFFER_BIT 0x0001 /* EGL_SURFACE_TYPE mask bits */
|
||||
#define EGL_PIXMAP_BIT 0x0002 /* EGL_SURFACE_TYPE mask bits */
|
||||
#define EGL_WINDOW_BIT 0x0004 /* EGL_SURFACE_TYPE mask bits */
|
||||
#define EGL_VG_COLORSPACE_LINEAR_BIT 0x0020 /* EGL_SURFACE_TYPE mask bits */
|
||||
#define EGL_VG_ALPHA_FORMAT_PRE_BIT 0x0040 /* EGL_SURFACE_TYPE mask bits */
|
||||
#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200 /* EGL_SURFACE_TYPE mask bits */
|
||||
#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400 /* EGL_SURFACE_TYPE mask bits */
|
||||
|
||||
#define EGL_OPENGL_ES_BIT 0x0001 /* EGL_RENDERABLE_TYPE mask bits */
|
||||
#define EGL_OPENVG_BIT 0x0002 /* EGL_RENDERABLE_TYPE mask bits */
|
||||
#define EGL_OPENGL_ES2_BIT 0x0004 /* EGL_RENDERABLE_TYPE mask bits */
|
||||
#define EGL_OPENGL_BIT 0x0008 /* EGL_RENDERABLE_TYPE mask bits */
|
||||
|
||||
/* QueryString targets */
|
||||
#define EGL_VENDOR 0x3053
|
||||
#define EGL_VERSION 0x3054
|
||||
#define EGL_EXTENSIONS 0x3055
|
||||
#define EGL_CLIENT_APIS 0x308D
|
||||
|
||||
/* QuerySurface / SurfaceAttrib / CreatePbufferSurface targets */
|
||||
#define EGL_HEIGHT 0x3056
|
||||
#define EGL_WIDTH 0x3057
|
||||
#define EGL_LARGEST_PBUFFER 0x3058
|
||||
#define EGL_TEXTURE_FORMAT 0x3080
|
||||
#define EGL_TEXTURE_TARGET 0x3081
|
||||
#define EGL_MIPMAP_TEXTURE 0x3082
|
||||
#define EGL_MIPMAP_LEVEL 0x3083
|
||||
#define EGL_RENDER_BUFFER 0x3086
|
||||
#define EGL_VG_COLORSPACE 0x3087
|
||||
#define EGL_VG_ALPHA_FORMAT 0x3088
|
||||
#define EGL_HORIZONTAL_RESOLUTION 0x3090
|
||||
#define EGL_VERTICAL_RESOLUTION 0x3091
|
||||
#define EGL_PIXEL_ASPECT_RATIO 0x3092
|
||||
#define EGL_SWAP_BEHAVIOR 0x3093
|
||||
#define EGL_MULTISAMPLE_RESOLVE 0x3099
|
||||
|
||||
/* EGL_RENDER_BUFFER values / BindTexImage / ReleaseTexImage buffer targets */
|
||||
#define EGL_BACK_BUFFER 0x3084
|
||||
#define EGL_SINGLE_BUFFER 0x3085
|
||||
|
||||
/* OpenVG color spaces */
|
||||
#define EGL_VG_COLORSPACE_sRGB 0x3089 /* EGL_VG_COLORSPACE value */
|
||||
#define EGL_VG_COLORSPACE_LINEAR 0x308A /* EGL_VG_COLORSPACE value */
|
||||
|
||||
/* OpenVG alpha formats */
|
||||
#define EGL_VG_ALPHA_FORMAT_NONPRE 0x308B /* EGL_ALPHA_FORMAT value */
|
||||
#define EGL_VG_ALPHA_FORMAT_PRE 0x308C /* EGL_ALPHA_FORMAT value */
|
||||
|
||||
/* Constant scale factor by which fractional display resolutions &
|
||||
* aspect ratio are scaled when queried as integer values.
|
||||
*/
|
||||
#define EGL_DISPLAY_SCALING 10000
|
||||
|
||||
/* Unknown display resolution/aspect ratio */
|
||||
#define EGL_UNKNOWN ((EGLint)-1)
|
||||
|
||||
/* Back buffer swap behaviors */
|
||||
#define EGL_BUFFER_PRESERVED 0x3094 /* EGL_SWAP_BEHAVIOR value */
|
||||
#define EGL_BUFFER_DESTROYED 0x3095 /* EGL_SWAP_BEHAVIOR value */
|
||||
|
||||
/* CreatePbufferFromClientBuffer buffer types */
|
||||
#define EGL_OPENVG_IMAGE 0x3096
|
||||
|
||||
/* QueryContext targets */
|
||||
#define EGL_CONTEXT_CLIENT_TYPE 0x3097
|
||||
|
||||
/* CreateContext attributes */
|
||||
#define EGL_CONTEXT_CLIENT_VERSION 0x3098
|
||||
|
||||
/* Multisample resolution behaviors */
|
||||
#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A /* EGL_MULTISAMPLE_RESOLVE value */
|
||||
#define EGL_MULTISAMPLE_RESOLVE_BOX 0x309B /* EGL_MULTISAMPLE_RESOLVE value */
|
||||
|
||||
/* BindAPI/QueryAPI targets */
|
||||
#define EGL_OPENGL_ES_API 0x30A0
|
||||
#define EGL_OPENVG_API 0x30A1
|
||||
#define EGL_OPENGL_API 0x30A2
|
||||
|
||||
/* GetCurrentSurface targets */
|
||||
#define EGL_DRAW 0x3059
|
||||
#define EGL_READ 0x305A
|
||||
|
||||
/* WaitNative engines */
|
||||
#define EGL_CORE_NATIVE_ENGINE 0x305B
|
||||
|
||||
/* EGL 1.2 tokens renamed for consistency in EGL 1.3 */
|
||||
#define EGL_COLORSPACE EGL_VG_COLORSPACE
|
||||
#define EGL_ALPHA_FORMAT EGL_VG_ALPHA_FORMAT
|
||||
#define EGL_COLORSPACE_sRGB EGL_VG_COLORSPACE_sRGB
|
||||
#define EGL_COLORSPACE_LINEAR EGL_VG_COLORSPACE_LINEAR
|
||||
#define EGL_ALPHA_FORMAT_NONPRE EGL_VG_ALPHA_FORMAT_NONPRE
|
||||
#define EGL_ALPHA_FORMAT_PRE EGL_VG_ALPHA_FORMAT_PRE
|
||||
|
||||
/* EGL extensions must request enum blocks from the Khronos
|
||||
* API Registrar, who maintains the enumerant registry. Submit
|
||||
* a bug in Khronos Bugzilla against task "Registry".
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* EGL Functions */
|
||||
|
||||
EGLAPI EGLint EGLAPIENTRY eglGetError(void);
|
||||
|
||||
EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay(EGLNativeDisplayType display_id);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglTerminate(EGLDisplay dpy);
|
||||
|
||||
EGLAPI const char * EGLAPIENTRY eglQueryString(EGLDisplay dpy, EGLint name);
|
||||
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs(EGLDisplay dpy, EGLConfig *configs,
|
||||
EGLint config_size, EGLint *num_config);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list,
|
||||
EGLConfig *configs, EGLint config_size,
|
||||
EGLint *num_config);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib(EGLDisplay dpy, EGLConfig config,
|
||||
EGLint attribute, EGLint *value);
|
||||
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config,
|
||||
EGLNativeWindowType win,
|
||||
const EGLint *attrib_list);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config,
|
||||
const EGLint *attrib_list);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config,
|
||||
EGLNativePixmapType pixmap,
|
||||
const EGLint *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface(EGLDisplay dpy, EGLSurface surface);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface(EGLDisplay dpy, EGLSurface surface,
|
||||
EGLint attribute, EGLint *value);
|
||||
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI(EGLenum api);
|
||||
EGLAPI EGLenum EGLAPIENTRY eglQueryAPI(void);
|
||||
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient(void);
|
||||
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread(void);
|
||||
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer(
|
||||
EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer,
|
||||
EGLConfig config, const EGLint *attrib_list);
|
||||
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib(EGLDisplay dpy, EGLSurface surface,
|
||||
EGLint attribute, EGLint value);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer);
|
||||
|
||||
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval(EGLDisplay dpy, EGLint interval);
|
||||
|
||||
|
||||
EGLAPI EGLContext EGLAPIENTRY eglCreateContext(EGLDisplay dpy, EGLConfig config,
|
||||
EGLContext share_context,
|
||||
const EGLint *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext(EGLDisplay dpy, EGLContext ctx);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent(EGLDisplay dpy, EGLSurface draw,
|
||||
EGLSurface read, EGLContext ctx);
|
||||
|
||||
EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext(void);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface(EGLint readdraw);
|
||||
EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay(void);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext(EGLDisplay dpy, EGLContext ctx,
|
||||
EGLint attribute, EGLint *value);
|
||||
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL(void);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative(EGLint engine);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers(EGLDisplay dpy, EGLSurface surface);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers(EGLDisplay dpy, EGLSurface surface,
|
||||
EGLNativePixmapType target);
|
||||
|
||||
/* This is a generic function pointer type, whose name indicates it must
|
||||
* be cast to the proper type *and calling convention* before use.
|
||||
*/
|
||||
typedef void (*__eglMustCastToProperFunctionPointerType)(void);
|
||||
|
||||
/* Now, define eglGetProcAddress using the generic function ptr. type */
|
||||
EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY
|
||||
eglGetProcAddress(const char *procname);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __egl_h_ */
|
||||
@@ -0,0 +1,645 @@
|
||||
#ifndef __eglext_h_
|
||||
#define __eglext_h_ 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Copyright (c) 2013 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a
|
||||
** copy of this software and/or associated documentation files (the
|
||||
** "Materials"), to deal in the Materials without restriction, including
|
||||
** without limitation the rights to use, copy, modify, merge, publish,
|
||||
** distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
** permit persons to whom the Materials are furnished to do so, subject to
|
||||
** the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included
|
||||
** in all copies or substantial portions of the Materials.
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
*/
|
||||
/*
|
||||
** This header is generated from the Khronos OpenGL / OpenGL ES XML
|
||||
** API Registry. The current version of the Registry, generator scripts
|
||||
** used to make the header, and the header can be found at
|
||||
** http://www.opengl.org/registry/
|
||||
**
|
||||
** Khronos $Revision: 24350 $ on $Date: 2013-12-04 12:46:23 -0800 (Wed, 04 Dec 2013) $
|
||||
*/
|
||||
|
||||
#include <EGL/eglplatform.h>
|
||||
|
||||
#define EGL_EGLEXT_VERSION 20131204
|
||||
|
||||
/* Generated C header for:
|
||||
* API: egl
|
||||
* Versions considered: .*
|
||||
* Versions emitted: _nomatch_^
|
||||
* Default extensions included: egl
|
||||
* Additional extensions included: _nomatch_^
|
||||
* Extensions removed: _nomatch_^
|
||||
*/
|
||||
|
||||
#ifndef EGL_KHR_cl_event
|
||||
#define EGL_KHR_cl_event 1
|
||||
#define EGL_CL_EVENT_HANDLE_KHR 0x309C
|
||||
#define EGL_SYNC_CL_EVENT_KHR 0x30FE
|
||||
#define EGL_SYNC_CL_EVENT_COMPLETE_KHR 0x30FF
|
||||
#endif /* EGL_KHR_cl_event */
|
||||
|
||||
#ifndef EGL_KHR_cl_event2
|
||||
#define EGL_KHR_cl_event2 1
|
||||
typedef void *EGLSyncKHR;
|
||||
typedef intptr_t EGLAttribKHR;
|
||||
typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNC64KHRPROC) (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSync64KHR (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list);
|
||||
#endif
|
||||
#endif /* EGL_KHR_cl_event2 */
|
||||
|
||||
#ifndef EGL_KHR_client_get_all_proc_addresses
|
||||
#define EGL_KHR_client_get_all_proc_addresses 1
|
||||
#endif /* EGL_KHR_client_get_all_proc_addresses */
|
||||
|
||||
#ifndef EGL_KHR_config_attribs
|
||||
#define EGL_KHR_config_attribs 1
|
||||
#define EGL_CONFORMANT_KHR 0x3042
|
||||
#define EGL_VG_COLORSPACE_LINEAR_BIT_KHR 0x0020
|
||||
#define EGL_VG_ALPHA_FORMAT_PRE_BIT_KHR 0x0040
|
||||
#endif /* EGL_KHR_config_attribs */
|
||||
|
||||
#ifndef EGL_KHR_create_context
|
||||
#define EGL_KHR_create_context 1
|
||||
#define EGL_CONTEXT_MAJOR_VERSION_KHR 0x3098
|
||||
#define EGL_CONTEXT_MINOR_VERSION_KHR 0x30FB
|
||||
#define EGL_CONTEXT_FLAGS_KHR 0x30FC
|
||||
#define EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR 0x30FD
|
||||
#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_KHR 0x31BD
|
||||
#define EGL_NO_RESET_NOTIFICATION_KHR 0x31BE
|
||||
#define EGL_LOSE_CONTEXT_ON_RESET_KHR 0x31BF
|
||||
#define EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR 0x00000001
|
||||
#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR 0x00000002
|
||||
#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR 0x00000004
|
||||
#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR 0x00000001
|
||||
#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT_KHR 0x00000002
|
||||
#define EGL_OPENGL_ES3_BIT_KHR 0x00000040
|
||||
#endif /* EGL_KHR_create_context */
|
||||
|
||||
#ifndef EGL_KHR_fence_sync
|
||||
#define EGL_KHR_fence_sync 1
|
||||
#ifdef KHRONOS_SUPPORT_INT64
|
||||
#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR 0x30F0
|
||||
#define EGL_SYNC_CONDITION_KHR 0x30F8
|
||||
#define EGL_SYNC_FENCE_KHR 0x30F9
|
||||
#endif /* KHRONOS_SUPPORT_INT64 */
|
||||
#endif /* EGL_KHR_fence_sync */
|
||||
|
||||
#ifndef EGL_KHR_get_all_proc_addresses
|
||||
#define EGL_KHR_get_all_proc_addresses 1
|
||||
#endif /* EGL_KHR_get_all_proc_addresses */
|
||||
|
||||
#ifndef EGL_KHR_gl_renderbuffer_image
|
||||
#define EGL_KHR_gl_renderbuffer_image 1
|
||||
#define EGL_GL_RENDERBUFFER_KHR 0x30B9
|
||||
#endif /* EGL_KHR_gl_renderbuffer_image */
|
||||
|
||||
#ifndef EGL_KHR_gl_texture_2D_image
|
||||
#define EGL_KHR_gl_texture_2D_image 1
|
||||
#define EGL_GL_TEXTURE_2D_KHR 0x30B1
|
||||
#define EGL_GL_TEXTURE_LEVEL_KHR 0x30BC
|
||||
#endif /* EGL_KHR_gl_texture_2D_image */
|
||||
|
||||
#ifndef EGL_KHR_gl_texture_3D_image
|
||||
#define EGL_KHR_gl_texture_3D_image 1
|
||||
#define EGL_GL_TEXTURE_3D_KHR 0x30B2
|
||||
#define EGL_GL_TEXTURE_ZOFFSET_KHR 0x30BD
|
||||
#endif /* EGL_KHR_gl_texture_3D_image */
|
||||
|
||||
#ifndef EGL_KHR_gl_texture_cubemap_image
|
||||
#define EGL_KHR_gl_texture_cubemap_image 1
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR 0x30B3
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR 0x30B4
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR 0x30B5
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR 0x30B6
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR 0x30B7
|
||||
#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR 0x30B8
|
||||
#endif /* EGL_KHR_gl_texture_cubemap_image */
|
||||
|
||||
#ifndef EGL_KHR_image
|
||||
#define EGL_KHR_image 1
|
||||
typedef void *EGLImageKHR;
|
||||
#define EGL_NATIVE_PIXMAP_KHR 0x30B0
|
||||
#define EGL_NO_IMAGE_KHR ((EGLImageKHR)0)
|
||||
typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEIMAGEKHRPROC) (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYIMAGEKHRPROC) (EGLDisplay dpy, EGLImageKHR image);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLImageKHR EGLAPIENTRY eglCreateImageKHR (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImageKHR (EGLDisplay dpy, EGLImageKHR image);
|
||||
#endif
|
||||
#endif /* EGL_KHR_image */
|
||||
|
||||
#ifndef EGL_KHR_image_base
|
||||
#define EGL_KHR_image_base 1
|
||||
#define EGL_IMAGE_PRESERVED_KHR 0x30D2
|
||||
#endif /* EGL_KHR_image_base */
|
||||
|
||||
#ifndef EGL_KHR_image_pixmap
|
||||
#define EGL_KHR_image_pixmap 1
|
||||
#endif /* EGL_KHR_image_pixmap */
|
||||
|
||||
#ifndef EGL_KHR_lock_surface
|
||||
#define EGL_KHR_lock_surface 1
|
||||
#define EGL_READ_SURFACE_BIT_KHR 0x0001
|
||||
#define EGL_WRITE_SURFACE_BIT_KHR 0x0002
|
||||
#define EGL_LOCK_SURFACE_BIT_KHR 0x0080
|
||||
#define EGL_OPTIMAL_FORMAT_BIT_KHR 0x0100
|
||||
#define EGL_MATCH_FORMAT_KHR 0x3043
|
||||
#define EGL_FORMAT_RGB_565_EXACT_KHR 0x30C0
|
||||
#define EGL_FORMAT_RGB_565_KHR 0x30C1
|
||||
#define EGL_FORMAT_RGBA_8888_EXACT_KHR 0x30C2
|
||||
#define EGL_FORMAT_RGBA_8888_KHR 0x30C3
|
||||
#define EGL_MAP_PRESERVE_PIXELS_KHR 0x30C4
|
||||
#define EGL_LOCK_USAGE_HINT_KHR 0x30C5
|
||||
#define EGL_BITMAP_POINTER_KHR 0x30C6
|
||||
#define EGL_BITMAP_PITCH_KHR 0x30C7
|
||||
#define EGL_BITMAP_ORIGIN_KHR 0x30C8
|
||||
#define EGL_BITMAP_PIXEL_RED_OFFSET_KHR 0x30C9
|
||||
#define EGL_BITMAP_PIXEL_GREEN_OFFSET_KHR 0x30CA
|
||||
#define EGL_BITMAP_PIXEL_BLUE_OFFSET_KHR 0x30CB
|
||||
#define EGL_BITMAP_PIXEL_ALPHA_OFFSET_KHR 0x30CC
|
||||
#define EGL_BITMAP_PIXEL_LUMINANCE_OFFSET_KHR 0x30CD
|
||||
#define EGL_LOWER_LEFT_KHR 0x30CE
|
||||
#define EGL_UPPER_LEFT_KHR 0x30CF
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLUNLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglLockSurfaceKHR (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglUnlockSurfaceKHR (EGLDisplay dpy, EGLSurface surface);
|
||||
#endif
|
||||
#endif /* EGL_KHR_lock_surface */
|
||||
|
||||
#ifndef EGL_KHR_lock_surface2
|
||||
#define EGL_KHR_lock_surface2 1
|
||||
#define EGL_BITMAP_PIXEL_SIZE_KHR 0x3110
|
||||
#endif /* EGL_KHR_lock_surface2 */
|
||||
|
||||
#ifndef EGL_KHR_lock_surface3
|
||||
#define EGL_KHR_lock_surface3 1
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACE64KHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface64KHR (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value);
|
||||
#endif
|
||||
#endif /* EGL_KHR_lock_surface3 */
|
||||
|
||||
#ifndef EGL_KHR_reusable_sync
|
||||
#define EGL_KHR_reusable_sync 1
|
||||
typedef khronos_utime_nanoseconds_t EGLTimeKHR;
|
||||
#ifdef KHRONOS_SUPPORT_INT64
|
||||
#define EGL_SYNC_STATUS_KHR 0x30F1
|
||||
#define EGL_SIGNALED_KHR 0x30F2
|
||||
#define EGL_UNSIGNALED_KHR 0x30F3
|
||||
#define EGL_TIMEOUT_EXPIRED_KHR 0x30F5
|
||||
#define EGL_CONDITION_SATISFIED_KHR 0x30F6
|
||||
#define EGL_SYNC_TYPE_KHR 0x30F7
|
||||
#define EGL_SYNC_REUSABLE_KHR 0x30FA
|
||||
#define EGL_SYNC_FLUSH_COMMANDS_BIT_KHR 0x0001
|
||||
#define EGL_FOREVER_KHR 0xFFFFFFFFFFFFFFFFull
|
||||
#define EGL_NO_SYNC_KHR ((EGLSyncKHR)0)
|
||||
typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNCKHRPROC) (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync);
|
||||
typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncKHR (EGLDisplay dpy, EGLSyncKHR sync);
|
||||
EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value);
|
||||
#endif
|
||||
#endif /* KHRONOS_SUPPORT_INT64 */
|
||||
#endif /* EGL_KHR_reusable_sync */
|
||||
|
||||
#ifndef EGL_KHR_stream
|
||||
#define EGL_KHR_stream 1
|
||||
typedef void *EGLStreamKHR;
|
||||
typedef khronos_uint64_t EGLuint64KHR;
|
||||
#ifdef KHRONOS_SUPPORT_INT64
|
||||
#define EGL_NO_STREAM_KHR ((EGLStreamKHR)0)
|
||||
#define EGL_CONSUMER_LATENCY_USEC_KHR 0x3210
|
||||
#define EGL_PRODUCER_FRAME_KHR 0x3212
|
||||
#define EGL_CONSUMER_FRAME_KHR 0x3213
|
||||
#define EGL_STREAM_STATE_KHR 0x3214
|
||||
#define EGL_STREAM_STATE_CREATED_KHR 0x3215
|
||||
#define EGL_STREAM_STATE_CONNECTING_KHR 0x3216
|
||||
#define EGL_STREAM_STATE_EMPTY_KHR 0x3217
|
||||
#define EGL_STREAM_STATE_NEW_FRAME_AVAILABLE_KHR 0x3218
|
||||
#define EGL_STREAM_STATE_OLD_FRAME_AVAILABLE_KHR 0x3219
|
||||
#define EGL_STREAM_STATE_DISCONNECTED_KHR 0x321A
|
||||
#define EGL_BAD_STREAM_KHR 0x321B
|
||||
#define EGL_BAD_STATE_KHR 0x321C
|
||||
typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMKHRPROC) (EGLDisplay dpy, const EGLint *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMATTRIBKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMU64KHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamKHR (EGLDisplay dpy, const EGLint *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglDestroyStreamKHR (EGLDisplay dpy, EGLStreamKHR stream);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglStreamAttribKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamu64KHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value);
|
||||
#endif
|
||||
#endif /* KHRONOS_SUPPORT_INT64 */
|
||||
#endif /* EGL_KHR_stream */
|
||||
|
||||
#ifndef EGL_KHR_stream_consumer_gltexture
|
||||
#define EGL_KHR_stream_consumer_gltexture 1
|
||||
#ifdef EGL_KHR_stream
|
||||
#define EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR 0x321E
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERACQUIREKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERRELEASEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerGLTextureExternalKHR (EGLDisplay dpy, EGLStreamKHR stream);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerAcquireKHR (EGLDisplay dpy, EGLStreamKHR stream);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerReleaseKHR (EGLDisplay dpy, EGLStreamKHR stream);
|
||||
#endif
|
||||
#endif /* EGL_KHR_stream */
|
||||
#endif /* EGL_KHR_stream_consumer_gltexture */
|
||||
|
||||
#ifndef EGL_KHR_stream_cross_process_fd
|
||||
#define EGL_KHR_stream_cross_process_fd 1
|
||||
typedef int EGLNativeFileDescriptorKHR;
|
||||
#ifdef EGL_KHR_stream
|
||||
#define EGL_NO_FILE_DESCRIPTOR_KHR ((EGLNativeFileDescriptorKHR)(-1))
|
||||
typedef EGLNativeFileDescriptorKHR (EGLAPIENTRYP PFNEGLGETSTREAMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
|
||||
typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMFROMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLNativeFileDescriptorKHR EGLAPIENTRY eglGetStreamFileDescriptorKHR (EGLDisplay dpy, EGLStreamKHR stream);
|
||||
EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamFromFileDescriptorKHR (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor);
|
||||
#endif
|
||||
#endif /* EGL_KHR_stream */
|
||||
#endif /* EGL_KHR_stream_cross_process_fd */
|
||||
|
||||
#ifndef EGL_KHR_stream_fifo
|
||||
#define EGL_KHR_stream_fifo 1
|
||||
#ifdef EGL_KHR_stream
|
||||
#define EGL_STREAM_FIFO_LENGTH_KHR 0x31FC
|
||||
#define EGL_STREAM_TIME_NOW_KHR 0x31FD
|
||||
#define EGL_STREAM_TIME_CONSUMER_KHR 0x31FE
|
||||
#define EGL_STREAM_TIME_PRODUCER_KHR 0x31FF
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMTIMEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamTimeKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value);
|
||||
#endif
|
||||
#endif /* EGL_KHR_stream */
|
||||
#endif /* EGL_KHR_stream_fifo */
|
||||
|
||||
#ifndef EGL_KHR_stream_producer_aldatalocator
|
||||
#define EGL_KHR_stream_producer_aldatalocator 1
|
||||
#ifdef EGL_KHR_stream
|
||||
#endif /* EGL_KHR_stream */
|
||||
#endif /* EGL_KHR_stream_producer_aldatalocator */
|
||||
|
||||
#ifndef EGL_KHR_stream_producer_eglsurface
|
||||
#define EGL_KHR_stream_producer_eglsurface 1
|
||||
#ifdef EGL_KHR_stream
|
||||
#define EGL_STREAM_BIT_KHR 0x0800
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATESTREAMPRODUCERSURFACEKHRPROC) (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreateStreamProducerSurfaceKHR (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list);
|
||||
#endif
|
||||
#endif /* EGL_KHR_stream */
|
||||
#endif /* EGL_KHR_stream_producer_eglsurface */
|
||||
|
||||
#ifndef EGL_KHR_surfaceless_context
|
||||
#define EGL_KHR_surfaceless_context 1
|
||||
#endif /* EGL_KHR_surfaceless_context */
|
||||
|
||||
#ifndef EGL_KHR_vg_parent_image
|
||||
#define EGL_KHR_vg_parent_image 1
|
||||
#define EGL_VG_PARENT_IMAGE_KHR 0x30BA
|
||||
#endif /* EGL_KHR_vg_parent_image */
|
||||
|
||||
#ifndef EGL_KHR_wait_sync
|
||||
#define EGL_KHR_wait_sync 1
|
||||
typedef EGLint (EGLAPIENTRYP PFNEGLWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLint EGLAPIENTRY eglWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags);
|
||||
#endif
|
||||
#endif /* EGL_KHR_wait_sync */
|
||||
|
||||
#ifndef EGL_ANDROID_blob_cache
|
||||
#define EGL_ANDROID_blob_cache 1
|
||||
typedef khronos_ssize_t EGLsizeiANDROID;
|
||||
typedef void (*EGLSetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, const void *value, EGLsizeiANDROID valueSize);
|
||||
typedef EGLsizeiANDROID (*EGLGetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, void *value, EGLsizeiANDROID valueSize);
|
||||
typedef void (EGLAPIENTRYP PFNEGLSETBLOBCACHEFUNCSANDROIDPROC) (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI void EGLAPIENTRY eglSetBlobCacheFuncsANDROID (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get);
|
||||
#endif
|
||||
#endif /* EGL_ANDROID_blob_cache */
|
||||
|
||||
#ifndef EGL_ANDROID_framebuffer_target
|
||||
#define EGL_ANDROID_framebuffer_target 1
|
||||
#define EGL_FRAMEBUFFER_TARGET_ANDROID 0x3147
|
||||
#endif /* EGL_ANDROID_framebuffer_target */
|
||||
|
||||
#ifndef EGL_ANDROID_image_native_buffer
|
||||
#define EGL_ANDROID_image_native_buffer 1
|
||||
#define EGL_NATIVE_BUFFER_ANDROID 0x3140
|
||||
#endif /* EGL_ANDROID_image_native_buffer */
|
||||
|
||||
#ifndef EGL_ANDROID_native_fence_sync
|
||||
#define EGL_ANDROID_native_fence_sync 1
|
||||
#define EGL_SYNC_NATIVE_FENCE_ANDROID 0x3144
|
||||
#define EGL_SYNC_NATIVE_FENCE_FD_ANDROID 0x3145
|
||||
#define EGL_SYNC_NATIVE_FENCE_SIGNALED_ANDROID 0x3146
|
||||
#define EGL_NO_NATIVE_FENCE_FD_ANDROID -1
|
||||
typedef EGLint (EGLAPIENTRYP PFNEGLDUPNATIVEFENCEFDANDROIDPROC) (EGLDisplay dpy, EGLSyncKHR sync);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLint EGLAPIENTRY eglDupNativeFenceFDANDROID (EGLDisplay dpy, EGLSyncKHR sync);
|
||||
#endif
|
||||
#endif /* EGL_ANDROID_native_fence_sync */
|
||||
|
||||
#ifndef EGL_ANDROID_recordable
|
||||
#define EGL_ANDROID_recordable 1
|
||||
#define EGL_RECORDABLE_ANDROID 0x3142
|
||||
#endif /* EGL_ANDROID_recordable */
|
||||
|
||||
#ifndef EGL_ANGLE_d3d_share_handle_client_buffer
|
||||
#define EGL_ANGLE_d3d_share_handle_client_buffer 1
|
||||
#define EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE 0x3200
|
||||
#endif /* EGL_ANGLE_d3d_share_handle_client_buffer */
|
||||
|
||||
#ifndef EGL_ANGLE_query_surface_pointer
|
||||
#define EGL_ANGLE_query_surface_pointer 1
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACEPOINTERANGLEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value);
|
||||
#endif
|
||||
#endif /* EGL_ANGLE_query_surface_pointer */
|
||||
|
||||
#ifndef EGL_ANGLE_surface_d3d_texture_2d_share_handle
|
||||
#define EGL_ANGLE_surface_d3d_texture_2d_share_handle 1
|
||||
#endif /* EGL_ANGLE_surface_d3d_texture_2d_share_handle */
|
||||
|
||||
#ifndef EGL_ARM_pixmap_multisample_discard
|
||||
#define EGL_ARM_pixmap_multisample_discard 1
|
||||
#define EGL_DISCARD_SAMPLES_ARM 0x3286
|
||||
#endif /* EGL_ARM_pixmap_multisample_discard */
|
||||
|
||||
#ifndef EGL_EXT_buffer_age
|
||||
#define EGL_EXT_buffer_age 1
|
||||
#define EGL_BUFFER_AGE_EXT 0x313D
|
||||
#endif /* EGL_EXT_buffer_age */
|
||||
|
||||
#ifndef EGL_EXT_client_extensions
|
||||
#define EGL_EXT_client_extensions 1
|
||||
#endif /* EGL_EXT_client_extensions */
|
||||
|
||||
#ifndef EGL_EXT_create_context_robustness
|
||||
#define EGL_EXT_create_context_robustness 1
|
||||
#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT 0x30BF
|
||||
#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_EXT 0x3138
|
||||
#define EGL_NO_RESET_NOTIFICATION_EXT 0x31BE
|
||||
#define EGL_LOSE_CONTEXT_ON_RESET_EXT 0x31BF
|
||||
#endif /* EGL_EXT_create_context_robustness */
|
||||
|
||||
#ifndef EGL_EXT_image_dma_buf_import
|
||||
#define EGL_EXT_image_dma_buf_import 1
|
||||
#define EGL_LINUX_DMA_BUF_EXT 0x3270
|
||||
#define EGL_LINUX_DRM_FOURCC_EXT 0x3271
|
||||
#define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272
|
||||
#define EGL_DMA_BUF_PLANE0_OFFSET_EXT 0x3273
|
||||
#define EGL_DMA_BUF_PLANE0_PITCH_EXT 0x3274
|
||||
#define EGL_DMA_BUF_PLANE1_FD_EXT 0x3275
|
||||
#define EGL_DMA_BUF_PLANE1_OFFSET_EXT 0x3276
|
||||
#define EGL_DMA_BUF_PLANE1_PITCH_EXT 0x3277
|
||||
#define EGL_DMA_BUF_PLANE2_FD_EXT 0x3278
|
||||
#define EGL_DMA_BUF_PLANE2_OFFSET_EXT 0x3279
|
||||
#define EGL_DMA_BUF_PLANE2_PITCH_EXT 0x327A
|
||||
#define EGL_YUV_COLOR_SPACE_HINT_EXT 0x327B
|
||||
#define EGL_SAMPLE_RANGE_HINT_EXT 0x327C
|
||||
#define EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT 0x327D
|
||||
#define EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT 0x327E
|
||||
#define EGL_ITU_REC601_EXT 0x327F
|
||||
#define EGL_ITU_REC709_EXT 0x3280
|
||||
#define EGL_ITU_REC2020_EXT 0x3281
|
||||
#define EGL_YUV_FULL_RANGE_EXT 0x3282
|
||||
#define EGL_YUV_NARROW_RANGE_EXT 0x3283
|
||||
#define EGL_YUV_CHROMA_SITING_0_EXT 0x3284
|
||||
#define EGL_YUV_CHROMA_SITING_0_5_EXT 0x3285
|
||||
#endif /* EGL_EXT_image_dma_buf_import */
|
||||
|
||||
#ifndef EGL_EXT_multiview_window
|
||||
#define EGL_EXT_multiview_window 1
|
||||
#define EGL_MULTIVIEW_VIEW_COUNT_EXT 0x3134
|
||||
#endif /* EGL_EXT_multiview_window */
|
||||
|
||||
#ifndef EGL_EXT_platform_base
|
||||
#define EGL_EXT_platform_base 1
|
||||
typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETPLATFORMDISPLAYEXTPROC) (EGLenum platform, void *native_display, const EGLint *attrib_list);
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list);
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplayEXT (EGLenum platform, void *native_display, const EGLint *attrib_list);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list);
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list);
|
||||
#endif
|
||||
#endif /* EGL_EXT_platform_base */
|
||||
|
||||
#ifndef EGL_EXT_platform_wayland
|
||||
#define EGL_EXT_platform_wayland 1
|
||||
#define EGL_PLATFORM_WAYLAND_EXT 0x31D8
|
||||
#endif /* EGL_EXT_platform_wayland */
|
||||
|
||||
#ifndef EGL_EXT_platform_x11
|
||||
#define EGL_EXT_platform_x11 1
|
||||
#define EGL_PLATFORM_X11_EXT 0x31D5
|
||||
#define EGL_PLATFORM_X11_SCREEN_EXT 0x31D6
|
||||
#endif /* EGL_EXT_platform_x11 */
|
||||
|
||||
#ifndef EGL_EXT_swap_buffers_with_damage
|
||||
#define EGL_EXT_swap_buffers_with_damage 1
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEEXTPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageEXT (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
|
||||
#endif
|
||||
#endif /* EGL_EXT_swap_buffers_with_damage */
|
||||
|
||||
#ifndef EGL_HI_clientpixmap
|
||||
#define EGL_HI_clientpixmap 1
|
||||
struct EGLClientPixmapHI {
|
||||
void *pData;
|
||||
EGLint iWidth;
|
||||
EGLint iHeight;
|
||||
EGLint iStride;
|
||||
};
|
||||
#define EGL_CLIENT_PIXMAP_POINTER_HI 0x8F74
|
||||
typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPIXMAPSURFACEHIPROC) (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurfaceHI (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap);
|
||||
#endif
|
||||
#endif /* EGL_HI_clientpixmap */
|
||||
|
||||
#ifndef EGL_HI_colorformats
|
||||
#define EGL_HI_colorformats 1
|
||||
#define EGL_COLOR_FORMAT_HI 0x8F70
|
||||
#define EGL_COLOR_RGB_HI 0x8F71
|
||||
#define EGL_COLOR_RGBA_HI 0x8F72
|
||||
#define EGL_COLOR_ARGB_HI 0x8F73
|
||||
#endif /* EGL_HI_colorformats */
|
||||
|
||||
#ifndef EGL_IMG_context_priority
|
||||
#define EGL_IMG_context_priority 1
|
||||
#define EGL_CONTEXT_PRIORITY_LEVEL_IMG 0x3100
|
||||
#define EGL_CONTEXT_PRIORITY_HIGH_IMG 0x3101
|
||||
#define EGL_CONTEXT_PRIORITY_MEDIUM_IMG 0x3102
|
||||
#define EGL_CONTEXT_PRIORITY_LOW_IMG 0x3103
|
||||
#endif /* EGL_IMG_context_priority */
|
||||
|
||||
#ifndef EGL_MESA_drm_image
|
||||
#define EGL_MESA_drm_image 1
|
||||
#define EGL_DRM_BUFFER_FORMAT_MESA 0x31D0
|
||||
#define EGL_DRM_BUFFER_USE_MESA 0x31D1
|
||||
#define EGL_DRM_BUFFER_FORMAT_ARGB32_MESA 0x31D2
|
||||
#define EGL_DRM_BUFFER_MESA 0x31D3
|
||||
#define EGL_DRM_BUFFER_STRIDE_MESA 0x31D4
|
||||
#define EGL_DRM_BUFFER_USE_SCANOUT_MESA 0x00000001
|
||||
#define EGL_DRM_BUFFER_USE_SHARE_MESA 0x00000002
|
||||
typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEDRMIMAGEMESAPROC) (EGLDisplay dpy, const EGLint *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDRMIMAGEMESAPROC) (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLImageKHR EGLAPIENTRY eglCreateDRMImageMESA (EGLDisplay dpy, const EGLint *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglExportDRMImageMESA (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride);
|
||||
#endif
|
||||
#endif /* EGL_MESA_drm_image */
|
||||
|
||||
#ifndef EGL_MESA_platform_gbm
|
||||
#define EGL_MESA_platform_gbm 1
|
||||
#define EGL_PLATFORM_GBM_MESA 0x31D7
|
||||
#endif /* EGL_MESA_platform_gbm */
|
||||
|
||||
#ifndef EGL_NV_3dvision_surface
|
||||
#define EGL_NV_3dvision_surface 1
|
||||
#define EGL_AUTO_STEREO_NV 0x3136
|
||||
#endif /* EGL_NV_3dvision_surface */
|
||||
|
||||
#ifndef EGL_NV_coverage_sample
|
||||
#define EGL_NV_coverage_sample 1
|
||||
#define EGL_COVERAGE_BUFFERS_NV 0x30E0
|
||||
#define EGL_COVERAGE_SAMPLES_NV 0x30E1
|
||||
#endif /* EGL_NV_coverage_sample */
|
||||
|
||||
#ifndef EGL_NV_coverage_sample_resolve
|
||||
#define EGL_NV_coverage_sample_resolve 1
|
||||
#define EGL_COVERAGE_SAMPLE_RESOLVE_NV 0x3131
|
||||
#define EGL_COVERAGE_SAMPLE_RESOLVE_DEFAULT_NV 0x3132
|
||||
#define EGL_COVERAGE_SAMPLE_RESOLVE_NONE_NV 0x3133
|
||||
#endif /* EGL_NV_coverage_sample_resolve */
|
||||
|
||||
#ifndef EGL_NV_depth_nonlinear
|
||||
#define EGL_NV_depth_nonlinear 1
|
||||
#define EGL_DEPTH_ENCODING_NV 0x30E2
|
||||
#define EGL_DEPTH_ENCODING_NONE_NV 0
|
||||
#define EGL_DEPTH_ENCODING_NONLINEAR_NV 0x30E3
|
||||
#endif /* EGL_NV_depth_nonlinear */
|
||||
|
||||
#ifndef EGL_NV_native_query
|
||||
#define EGL_NV_native_query 1
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEDISPLAYNVPROC) (EGLDisplay dpy, EGLNativeDisplayType *display_id);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEWINDOWNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEPIXMAPNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeDisplayNV (EGLDisplay dpy, EGLNativeDisplayType *display_id);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeWindowNV (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativePixmapNV (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap);
|
||||
#endif
|
||||
#endif /* EGL_NV_native_query */
|
||||
|
||||
#ifndef EGL_NV_post_convert_rounding
|
||||
#define EGL_NV_post_convert_rounding 1
|
||||
#endif /* EGL_NV_post_convert_rounding */
|
||||
|
||||
#ifndef EGL_NV_post_sub_buffer
|
||||
#define EGL_NV_post_sub_buffer 1
|
||||
#define EGL_POST_SUB_BUFFER_SUPPORTED_NV 0x30BE
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLPOSTSUBBUFFERNVPROC) (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglPostSubBufferNV (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height);
|
||||
#endif
|
||||
#endif /* EGL_NV_post_sub_buffer */
|
||||
|
||||
#ifndef EGL_NV_stream_sync
|
||||
#define EGL_NV_stream_sync 1
|
||||
#define EGL_SYNC_NEW_FRAME_NV 0x321F
|
||||
typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESTREAMSYNCNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateStreamSyncNV (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list);
|
||||
#endif
|
||||
#endif /* EGL_NV_stream_sync */
|
||||
|
||||
#ifndef EGL_NV_sync
|
||||
#define EGL_NV_sync 1
|
||||
typedef void *EGLSyncNV;
|
||||
typedef khronos_utime_nanoseconds_t EGLTimeNV;
|
||||
#ifdef KHRONOS_SUPPORT_INT64
|
||||
#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_NV 0x30E6
|
||||
#define EGL_SYNC_STATUS_NV 0x30E7
|
||||
#define EGL_SIGNALED_NV 0x30E8
|
||||
#define EGL_UNSIGNALED_NV 0x30E9
|
||||
#define EGL_SYNC_FLUSH_COMMANDS_BIT_NV 0x0001
|
||||
#define EGL_FOREVER_NV 0xFFFFFFFFFFFFFFFFull
|
||||
#define EGL_ALREADY_SIGNALED_NV 0x30EA
|
||||
#define EGL_TIMEOUT_EXPIRED_NV 0x30EB
|
||||
#define EGL_CONDITION_SATISFIED_NV 0x30EC
|
||||
#define EGL_SYNC_TYPE_NV 0x30ED
|
||||
#define EGL_SYNC_CONDITION_NV 0x30EE
|
||||
#define EGL_SYNC_FENCE_NV 0x30EF
|
||||
#define EGL_NO_SYNC_NV ((EGLSyncNV)0)
|
||||
typedef EGLSyncNV (EGLAPIENTRYP PFNEGLCREATEFENCESYNCNVPROC) (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCNVPROC) (EGLSyncNV sync);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLFENCENVPROC) (EGLSyncNV sync);
|
||||
typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCNVPROC) (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCNVPROC) (EGLSyncNV sync, EGLenum mode);
|
||||
typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBNVPROC) (EGLSyncNV sync, EGLint attribute, EGLint *value);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLSyncNV EGLAPIENTRY eglCreateFenceSyncNV (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncNV (EGLSyncNV sync);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglFenceNV (EGLSyncNV sync);
|
||||
EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncNV (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncNV (EGLSyncNV sync, EGLenum mode);
|
||||
EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribNV (EGLSyncNV sync, EGLint attribute, EGLint *value);
|
||||
#endif
|
||||
#endif /* KHRONOS_SUPPORT_INT64 */
|
||||
#endif /* EGL_NV_sync */
|
||||
|
||||
#ifndef EGL_NV_system_time
|
||||
#define EGL_NV_system_time 1
|
||||
typedef khronos_utime_nanoseconds_t EGLuint64NV;
|
||||
#ifdef KHRONOS_SUPPORT_INT64
|
||||
typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMEFREQUENCYNVPROC) (void);
|
||||
typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMENVPROC) (void);
|
||||
#ifdef EGL_EGLEXT_PROTOTYPES
|
||||
EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeFrequencyNV (void);
|
||||
EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeNV (void);
|
||||
#endif
|
||||
#endif /* KHRONOS_SUPPORT_INT64 */
|
||||
#endif /* EGL_NV_system_time */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,125 @@
|
||||
#ifndef __eglplatform_h_
|
||||
#define __eglplatform_h_
|
||||
|
||||
/*
|
||||
** Copyright (c) 2007-2013 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a
|
||||
** copy of this software and/or associated documentation files (the
|
||||
** "Materials"), to deal in the Materials without restriction, including
|
||||
** without limitation the rights to use, copy, modify, merge, publish,
|
||||
** distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
** permit persons to whom the Materials are furnished to do so, subject to
|
||||
** the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included
|
||||
** in all copies or substantial portions of the Materials.
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
/* Platform-specific types and definitions for egl.h
|
||||
* $Revision: 23432 $ on $Date: 2013-10-09 00:57:24 -0700 (Wed, 09 Oct 2013) $
|
||||
*
|
||||
* Adopters may modify khrplatform.h and this file to suit their platform.
|
||||
* You are encouraged to submit all modifications to the Khronos group so that
|
||||
* they can be included in future versions of this file. Please submit changes
|
||||
* by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla)
|
||||
* by filing a bug against product "EGL" component "Registry".
|
||||
*/
|
||||
|
||||
#include <KHR/khrplatform.h>
|
||||
|
||||
/* Macros used in EGL function prototype declarations.
|
||||
*
|
||||
* EGL functions should be prototyped as:
|
||||
*
|
||||
* EGLAPI return-type EGLAPIENTRY eglFunction(arguments);
|
||||
* typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments);
|
||||
*
|
||||
* KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h
|
||||
*/
|
||||
|
||||
#ifndef EGLAPI
|
||||
#define EGLAPI KHRONOS_APICALL
|
||||
#endif
|
||||
|
||||
#ifndef EGLAPIENTRY
|
||||
#define EGLAPIENTRY KHRONOS_APIENTRY
|
||||
#endif
|
||||
#define EGLAPIENTRYP EGLAPIENTRY*
|
||||
|
||||
/* The types NativeDisplayType, NativeWindowType, and NativePixmapType
|
||||
* are aliases of window-system-dependent types, such as X Display * or
|
||||
* Windows Device Context. They must be defined in platform-specific
|
||||
* code below. The EGL-prefixed versions of Native*Type are the same
|
||||
* types, renamed in EGL 1.3 so all types in the API start with "EGL".
|
||||
*
|
||||
* Khronos STRONGLY RECOMMENDS that you use the default definitions
|
||||
* provided below, since these changes affect both binary and source
|
||||
* portability of applications using EGL running on different EGL
|
||||
* implementations.
|
||||
*/
|
||||
|
||||
#if defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN 1
|
||||
#endif
|
||||
#include <windows.h>
|
||||
|
||||
typedef HDC EGLNativeDisplayType;
|
||||
typedef HBITMAP EGLNativePixmapType;
|
||||
typedef HWND EGLNativeWindowType;
|
||||
|
||||
#elif defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */
|
||||
|
||||
typedef int EGLNativeDisplayType;
|
||||
typedef void *EGLNativeWindowType;
|
||||
typedef void *EGLNativePixmapType;
|
||||
|
||||
#elif defined(__ANDROID__) || defined(ANDROID)
|
||||
|
||||
#include <android/native_window.h>
|
||||
|
||||
struct egl_native_pixmap_t;
|
||||
|
||||
typedef struct ANativeWindow* EGLNativeWindowType;
|
||||
typedef struct egl_native_pixmap_t* EGLNativePixmapType;
|
||||
typedef void* EGLNativeDisplayType;
|
||||
|
||||
#elif defined(__unix__)
|
||||
|
||||
/* X11 (tentative) */
|
||||
#include <X11/Xlib.h>
|
||||
#include <X11/Xutil.h>
|
||||
|
||||
typedef Display *EGLNativeDisplayType;
|
||||
typedef Pixmap EGLNativePixmapType;
|
||||
typedef Window EGLNativeWindowType;
|
||||
|
||||
#else
|
||||
#error "Platform not recognized"
|
||||
#endif
|
||||
|
||||
/* EGL 1.2 types, renamed for consistency in EGL 1.3 */
|
||||
typedef EGLNativeDisplayType NativeDisplayType;
|
||||
typedef EGLNativePixmapType NativePixmapType;
|
||||
typedef EGLNativeWindowType NativeWindowType;
|
||||
|
||||
|
||||
/* Define EGLint. This must be a signed integral type large enough to contain
|
||||
* all legal attribute names and values passed into and out of EGL, whether
|
||||
* their type is boolean, bitmask, enumerant (symbolic constant), integer,
|
||||
* handle, or other. While in general a 32-bit integer will suffice, if
|
||||
* handles are 64 bit types, then EGLint should be defined as a signed 64-bit
|
||||
* integer type.
|
||||
*/
|
||||
typedef khronos_int32_t EGLint;
|
||||
|
||||
#endif /* __eglplatform_h */
|
||||
@@ -0,0 +1,282 @@
|
||||
#ifndef __khrplatform_h_
|
||||
#define __khrplatform_h_
|
||||
|
||||
/*
|
||||
** Copyright (c) 2008-2009 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a
|
||||
** copy of this software and/or associated documentation files (the
|
||||
** "Materials"), to deal in the Materials without restriction, including
|
||||
** without limitation the rights to use, copy, modify, merge, publish,
|
||||
** distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
** permit persons to whom the Materials are furnished to do so, subject to
|
||||
** the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included
|
||||
** in all copies or substantial portions of the Materials.
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
/* Khronos platform-specific types and definitions.
|
||||
*
|
||||
* $Revision: 23298 $ on $Date: 2013-09-30 17:07:13 -0700 (Mon, 30 Sep 2013) $
|
||||
*
|
||||
* Adopters may modify this file to suit their platform. Adopters are
|
||||
* encouraged to submit platform specific modifications to the Khronos
|
||||
* group so that they can be included in future versions of this file.
|
||||
* Please submit changes by sending them to the public Khronos Bugzilla
|
||||
* (http://khronos.org/bugzilla) by filing a bug against product
|
||||
* "Khronos (general)" component "Registry".
|
||||
*
|
||||
* A predefined template which fills in some of the bug fields can be
|
||||
* reached using http://tinyurl.com/khrplatform-h-bugreport, but you
|
||||
* must create a Bugzilla login first.
|
||||
*
|
||||
*
|
||||
* See the Implementer's Guidelines for information about where this file
|
||||
* should be located on your system and for more details of its use:
|
||||
* http://www.khronos.org/registry/implementers_guide.pdf
|
||||
*
|
||||
* This file should be included as
|
||||
* #include <KHR/khrplatform.h>
|
||||
* by Khronos client API header files that use its types and defines.
|
||||
*
|
||||
* The types in khrplatform.h should only be used to define API-specific types.
|
||||
*
|
||||
* Types defined in khrplatform.h:
|
||||
* khronos_int8_t signed 8 bit
|
||||
* khronos_uint8_t unsigned 8 bit
|
||||
* khronos_int16_t signed 16 bit
|
||||
* khronos_uint16_t unsigned 16 bit
|
||||
* khronos_int32_t signed 32 bit
|
||||
* khronos_uint32_t unsigned 32 bit
|
||||
* khronos_int64_t signed 64 bit
|
||||
* khronos_uint64_t unsigned 64 bit
|
||||
* khronos_intptr_t signed same number of bits as a pointer
|
||||
* khronos_uintptr_t unsigned same number of bits as a pointer
|
||||
* khronos_ssize_t signed size
|
||||
* khronos_usize_t unsigned size
|
||||
* khronos_float_t signed 32 bit floating point
|
||||
* khronos_time_ns_t unsigned 64 bit time in nanoseconds
|
||||
* khronos_utime_nanoseconds_t unsigned time interval or absolute time in
|
||||
* nanoseconds
|
||||
* khronos_stime_nanoseconds_t signed time interval in nanoseconds
|
||||
* khronos_boolean_enum_t enumerated boolean type. This should
|
||||
* only be used as a base type when a client API's boolean type is
|
||||
* an enum. Client APIs which use an integer or other type for
|
||||
* booleans cannot use this as the base type for their boolean.
|
||||
*
|
||||
* Tokens defined in khrplatform.h:
|
||||
*
|
||||
* KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values.
|
||||
*
|
||||
* KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0.
|
||||
* KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0.
|
||||
*
|
||||
* Calling convention macros defined in this file:
|
||||
* KHRONOS_APICALL
|
||||
* KHRONOS_APIENTRY
|
||||
* KHRONOS_APIATTRIBUTES
|
||||
*
|
||||
* These may be used in function prototypes as:
|
||||
*
|
||||
* KHRONOS_APICALL void KHRONOS_APIENTRY funcname(
|
||||
* int arg1,
|
||||
* int arg2) KHRONOS_APIATTRIBUTES;
|
||||
*/
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Definition of KHRONOS_APICALL
|
||||
*-------------------------------------------------------------------------
|
||||
* This precedes the return type of the function in the function prototype.
|
||||
*/
|
||||
#if defined(_WIN32) && !defined(__SCITECH_SNAP__)
|
||||
# define KHRONOS_APICALL __declspec(dllimport)
|
||||
#elif defined (__SYMBIAN32__)
|
||||
# define KHRONOS_APICALL IMPORT_C
|
||||
#else
|
||||
# define KHRONOS_APICALL
|
||||
#endif
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Definition of KHRONOS_APIENTRY
|
||||
*-------------------------------------------------------------------------
|
||||
* This follows the return type of the function and precedes the function
|
||||
* name in the function prototype.
|
||||
*/
|
||||
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
|
||||
/* Win32 but not WinCE */
|
||||
# define KHRONOS_APIENTRY __stdcall
|
||||
#else
|
||||
# define KHRONOS_APIENTRY
|
||||
#endif
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Definition of KHRONOS_APIATTRIBUTES
|
||||
*-------------------------------------------------------------------------
|
||||
* This follows the closing parenthesis of the function prototype arguments.
|
||||
*/
|
||||
#if defined (__ARMCC_2__)
|
||||
#define KHRONOS_APIATTRIBUTES __softfp
|
||||
#else
|
||||
#define KHRONOS_APIATTRIBUTES
|
||||
#endif
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* basic type definitions
|
||||
*-----------------------------------------------------------------------*/
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__)
|
||||
|
||||
|
||||
/*
|
||||
* Using <stdint.h>
|
||||
*/
|
||||
#include <stdint.h>
|
||||
typedef int32_t khronos_int32_t;
|
||||
typedef uint32_t khronos_uint32_t;
|
||||
typedef int64_t khronos_int64_t;
|
||||
typedef uint64_t khronos_uint64_t;
|
||||
#define KHRONOS_SUPPORT_INT64 1
|
||||
#define KHRONOS_SUPPORT_FLOAT 1
|
||||
|
||||
#elif defined(__VMS ) || defined(__sgi)
|
||||
|
||||
/*
|
||||
* Using <inttypes.h>
|
||||
*/
|
||||
#include <inttypes.h>
|
||||
typedef int32_t khronos_int32_t;
|
||||
typedef uint32_t khronos_uint32_t;
|
||||
typedef int64_t khronos_int64_t;
|
||||
typedef uint64_t khronos_uint64_t;
|
||||
#define KHRONOS_SUPPORT_INT64 1
|
||||
#define KHRONOS_SUPPORT_FLOAT 1
|
||||
|
||||
#elif defined(_WIN32) && !defined(__SCITECH_SNAP__)
|
||||
|
||||
/*
|
||||
* Win32
|
||||
*/
|
||||
typedef __int32 khronos_int32_t;
|
||||
typedef unsigned __int32 khronos_uint32_t;
|
||||
typedef __int64 khronos_int64_t;
|
||||
typedef unsigned __int64 khronos_uint64_t;
|
||||
#define KHRONOS_SUPPORT_INT64 1
|
||||
#define KHRONOS_SUPPORT_FLOAT 1
|
||||
|
||||
#elif defined(__sun__) || defined(__digital__)
|
||||
|
||||
/*
|
||||
* Sun or Digital
|
||||
*/
|
||||
typedef int khronos_int32_t;
|
||||
typedef unsigned int khronos_uint32_t;
|
||||
#if defined(__arch64__) || defined(_LP64)
|
||||
typedef long int khronos_int64_t;
|
||||
typedef unsigned long int khronos_uint64_t;
|
||||
#else
|
||||
typedef long long int khronos_int64_t;
|
||||
typedef unsigned long long int khronos_uint64_t;
|
||||
#endif /* __arch64__ */
|
||||
#define KHRONOS_SUPPORT_INT64 1
|
||||
#define KHRONOS_SUPPORT_FLOAT 1
|
||||
|
||||
#elif 0
|
||||
|
||||
/*
|
||||
* Hypothetical platform with no float or int64 support
|
||||
*/
|
||||
typedef int khronos_int32_t;
|
||||
typedef unsigned int khronos_uint32_t;
|
||||
#define KHRONOS_SUPPORT_INT64 0
|
||||
#define KHRONOS_SUPPORT_FLOAT 0
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* Generic fallback
|
||||
*/
|
||||
#include <stdint.h>
|
||||
typedef int32_t khronos_int32_t;
|
||||
typedef uint32_t khronos_uint32_t;
|
||||
typedef int64_t khronos_int64_t;
|
||||
typedef uint64_t khronos_uint64_t;
|
||||
#define KHRONOS_SUPPORT_INT64 1
|
||||
#define KHRONOS_SUPPORT_FLOAT 1
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Types that are (so far) the same on all platforms
|
||||
*/
|
||||
typedef signed char khronos_int8_t;
|
||||
typedef unsigned char khronos_uint8_t;
|
||||
typedef signed short int khronos_int16_t;
|
||||
typedef unsigned short int khronos_uint16_t;
|
||||
|
||||
/*
|
||||
* Types that differ between LLP64 and LP64 architectures - in LLP64,
|
||||
* pointers are 64 bits, but 'long' is still 32 bits. Win64 appears
|
||||
* to be the only LLP64 architecture in current use.
|
||||
*/
|
||||
#ifdef _WIN64
|
||||
typedef signed long long int khronos_intptr_t;
|
||||
typedef unsigned long long int khronos_uintptr_t;
|
||||
typedef signed long long int khronos_ssize_t;
|
||||
typedef unsigned long long int khronos_usize_t;
|
||||
#else
|
||||
typedef signed long int khronos_intptr_t;
|
||||
typedef unsigned long int khronos_uintptr_t;
|
||||
typedef signed long int khronos_ssize_t;
|
||||
typedef unsigned long int khronos_usize_t;
|
||||
#endif
|
||||
|
||||
#if KHRONOS_SUPPORT_FLOAT
|
||||
/*
|
||||
* Float type
|
||||
*/
|
||||
typedef float khronos_float_t;
|
||||
#endif
|
||||
|
||||
#if KHRONOS_SUPPORT_INT64
|
||||
/* Time types
|
||||
*
|
||||
* These types can be used to represent a time interval in nanoseconds or
|
||||
* an absolute Unadjusted System Time. Unadjusted System Time is the number
|
||||
* of nanoseconds since some arbitrary system event (e.g. since the last
|
||||
* time the system booted). The Unadjusted System Time is an unsigned
|
||||
* 64 bit value that wraps back to 0 every 584 years. Time intervals
|
||||
* may be either signed or unsigned.
|
||||
*/
|
||||
typedef khronos_uint64_t khronos_utime_nanoseconds_t;
|
||||
typedef khronos_int64_t khronos_stime_nanoseconds_t;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Dummy value used to pad enum types to 32 bits.
|
||||
*/
|
||||
#ifndef KHRONOS_MAX_ENUM
|
||||
#define KHRONOS_MAX_ENUM 0x7FFFFFFF
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Enumerated boolean type
|
||||
*
|
||||
* Values other than zero should be considered to be true. Therefore
|
||||
* comparisons should not be made against KHRONOS_TRUE.
|
||||
*/
|
||||
typedef enum {
|
||||
KHRONOS_FALSE = 0,
|
||||
KHRONOS_TRUE = 1,
|
||||
KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM
|
||||
} khronos_boolean_enum_t;
|
||||
|
||||
#endif /* __khrplatform_h_ */
|
||||
@@ -0,0 +1,301 @@
|
||||
/* Copyright (c) 2008-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef CL_COMMON_HPP_
|
||||
#define CL_COMMON_HPP_
|
||||
|
||||
#include "top.hpp"
|
||||
#include "platform/runtime.hpp"
|
||||
#include "platform/command.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
#include "thread/thread.hpp"
|
||||
#include "platform/commandqueue.hpp"
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
//! \cond ignore
|
||||
namespace amd {
|
||||
|
||||
template <typename T>
|
||||
class NotNullWrapper
|
||||
{
|
||||
private:
|
||||
T* const ptrOrNull_;
|
||||
|
||||
protected:
|
||||
explicit NotNullWrapper(T* ptrOrNull)
|
||||
: ptrOrNull_(ptrOrNull)
|
||||
{ }
|
||||
|
||||
public:
|
||||
void operator = (T value) const
|
||||
{
|
||||
if (ptrOrNull_ != NULL) {
|
||||
*ptrOrNull_ = value;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class NotNullReference : protected NotNullWrapper<T>
|
||||
{
|
||||
public:
|
||||
explicit NotNullReference(T* ptrOrNull)
|
||||
: NotNullWrapper<T>(ptrOrNull)
|
||||
{ }
|
||||
|
||||
const NotNullWrapper<T>& operator * () const { return *this; }
|
||||
};
|
||||
|
||||
} // namespace amd
|
||||
|
||||
template <typename T>
|
||||
inline amd::NotNullReference<T>
|
||||
not_null(T* ptrOrNull)
|
||||
{
|
||||
return amd::NotNullReference<T>(ptrOrNull);
|
||||
}
|
||||
|
||||
#define CL_CHECK_THREAD(thread) \
|
||||
(thread != NULL || ((thread = new amd::HostThread()) != NULL \
|
||||
&& thread == amd::Thread::current()))
|
||||
|
||||
#define RUNTIME_ENTRY_RET(ret, func, args) \
|
||||
CL_API_ENTRY ret CL_API_CALL \
|
||||
func args \
|
||||
{ \
|
||||
amd::Thread* thread = amd::Thread::current(); \
|
||||
if (!CL_CHECK_THREAD(thread)) { \
|
||||
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; \
|
||||
return (ret) 0; \
|
||||
}
|
||||
|
||||
#define RUNTIME_ENTRY_RET_NOERRCODE(ret, func, args) \
|
||||
CL_API_ENTRY ret CL_API_CALL \
|
||||
func args \
|
||||
{ \
|
||||
amd::Thread* thread = amd::Thread::current(); \
|
||||
if (!CL_CHECK_THREAD(thread)) { \
|
||||
return (ret) 0; \
|
||||
}
|
||||
|
||||
#define RUNTIME_ENTRY(ret, func, args) \
|
||||
CL_API_ENTRY ret CL_API_CALL \
|
||||
func args \
|
||||
{ \
|
||||
amd::Thread* thread = amd::Thread::current(); \
|
||||
if (!CL_CHECK_THREAD(thread)) { \
|
||||
return CL_OUT_OF_HOST_MEMORY; \
|
||||
}
|
||||
|
||||
#define RUNTIME_ENTRY_VOID(ret, func, args) \
|
||||
CL_API_ENTRY ret CL_API_CALL \
|
||||
func args \
|
||||
{ \
|
||||
amd::Thread* thread = amd::Thread::current(); \
|
||||
if (!CL_CHECK_THREAD(thread)) { \
|
||||
return; \
|
||||
}
|
||||
|
||||
#define RUNTIME_EXIT \
|
||||
/* FIXME_lmoriche: we should check to thread->lastError here! */ \
|
||||
}
|
||||
|
||||
//! Helper function to check "properties" parameter in various functions
|
||||
int checkContextProperties(
|
||||
const cl_context_properties *properties,
|
||||
bool* offlineDevices);
|
||||
|
||||
namespace amd {
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <typename T>
|
||||
struct ParamInfo
|
||||
{
|
||||
static inline std::pair<const void*, size_t> get(const T& param) {
|
||||
return std::pair<const void*, size_t>(¶m, sizeof(T));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ParamInfo<const char*>
|
||||
{
|
||||
static inline std::pair<const void*, size_t> get(const char* param) {
|
||||
return std::pair<const void*, size_t>(param, strlen(param) + 1);
|
||||
}
|
||||
};
|
||||
|
||||
template <int N>
|
||||
struct ParamInfo<char[N]>
|
||||
{
|
||||
static inline std::pair<const void*, size_t> get(const char* param) {
|
||||
return std::pair<const void*, size_t>(param, strlen(param) + 1);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template <typename T>
|
||||
static inline cl_int
|
||||
clGetInfo(
|
||||
T& field,
|
||||
size_t param_value_size,
|
||||
void* param_value,
|
||||
size_t* param_value_size_ret)
|
||||
{
|
||||
const void *valuePtr;
|
||||
size_t valueSize;
|
||||
|
||||
std::tie(valuePtr, valueSize)
|
||||
= detail::ParamInfo<typename std::remove_const<T>::type>::get(field);
|
||||
|
||||
*not_null(param_value_size_ret) = valueSize;
|
||||
|
||||
cl_int ret = CL_SUCCESS;
|
||||
if (param_value != NULL && param_value_size < valueSize) {
|
||||
if (!std::is_pointer<T>() || !std::is_same<typename std::remove_const<
|
||||
typename std::remove_pointer<T>::type>::type, char>()) {
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
// For char* and char[] params, we will at least fill up to
|
||||
// param_value_size, then return an error.
|
||||
valueSize = param_value_size;
|
||||
static_cast<char*>(param_value)[--valueSize] = '\0';
|
||||
ret = CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
if (param_value != NULL) {
|
||||
::memcpy(param_value, valuePtr, valueSize);
|
||||
if (param_value_size > valueSize) {
|
||||
::memset(static_cast<address>(param_value) + valueSize,
|
||||
'\0', param_value_size - valueSize);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline cl_int
|
||||
clSetEventWaitList(
|
||||
Command::EventWaitList& eventWaitList,
|
||||
const amd::HostQueue& hostQueue,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list)
|
||||
{
|
||||
if ((num_events_in_wait_list == 0 && event_wait_list != NULL)
|
||||
|| (num_events_in_wait_list != 0 && event_wait_list == NULL)) {
|
||||
return CL_INVALID_EVENT_WAIT_LIST;
|
||||
}
|
||||
|
||||
while (num_events_in_wait_list-- > 0) {
|
||||
cl_event event = *event_wait_list++;
|
||||
Event* amdEvent = as_amd(event);
|
||||
if (!is_valid(event)) {
|
||||
return CL_INVALID_EVENT_WAIT_LIST;
|
||||
}
|
||||
if (&hostQueue.context() != &amdEvent->context()) {
|
||||
return CL_INVALID_CONTEXT;
|
||||
}
|
||||
if ((amdEvent->command().queue() != &hostQueue) && !amdEvent->notifyCmdQueue()) {
|
||||
return CL_INVALID_EVENT_WAIT_LIST;
|
||||
}
|
||||
eventWaitList.push_back(amdEvent);
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//! Common function declarations for CL-external graphics API interop
|
||||
cl_int clEnqueueAcquireExtObjectsAMD(cl_command_queue command_queue,
|
||||
cl_uint num_objects, const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
|
||||
cl_event* event, cl_command_type cmd_type);
|
||||
cl_int clEnqueueReleaseExtObjectsAMD(cl_command_queue command_queue,
|
||||
cl_uint num_objects, const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
|
||||
cl_event* event, cl_command_type cmd_type);
|
||||
|
||||
// This may need moving somewhere tidier...
|
||||
|
||||
struct PlatformIDS { const struct KHRicdVendorDispatchRec* dispatch_; };
|
||||
class PlatformID {
|
||||
public:
|
||||
static PlatformIDS Platform;
|
||||
};
|
||||
#define AMD_PLATFORM (reinterpret_cast<cl_platform_id>(&amd::PlatformID::Platform))
|
||||
|
||||
} // namespace amd
|
||||
|
||||
extern "C" {
|
||||
|
||||
extern CL_API_ENTRY cl_key_amd CL_API_CALL
|
||||
clCreateKeyAMD(
|
||||
cl_platform_id platform,
|
||||
void (CL_CALLBACK * destructor)( void * ),
|
||||
cl_int * errcode_ret);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clObjectGetValueForKeyAMD(
|
||||
void * object,
|
||||
cl_key_amd key,
|
||||
void ** ret_val);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clObjectSetValueForKeyAMD(
|
||||
void * object,
|
||||
cl_key_amd key,
|
||||
void * value);
|
||||
|
||||
#if defined(CL_VERSION_1_1)
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetCommandQueueProperty(
|
||||
cl_command_queue command_queue,
|
||||
cl_command_queue_properties properties,
|
||||
cl_bool enable,
|
||||
cl_command_queue_properties *old_properties) CL_API_SUFFIX__VERSION_1_0;
|
||||
#endif // CL_VERSION_1_1
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clConvertImageAMD(
|
||||
cl_context context,
|
||||
cl_mem image,
|
||||
const cl_image_format * image_format,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateBufferFromImageAMD(
|
||||
cl_context context,
|
||||
cl_mem image,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
extern CL_API_ENTRY cl_program CL_API_CALL
|
||||
clCreateProgramWithAssemblyAMD(
|
||||
cl_context context,
|
||||
cl_uint count,
|
||||
const char ** strings,
|
||||
const size_t * lengths,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
} // extern "C"
|
||||
|
||||
//! \endcond
|
||||
|
||||
#endif /*CL_COMMON_HPP_*/
|
||||
@@ -0,0 +1,694 @@
|
||||
/* Copyright (c) 2014-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef __CL_DEBUGGER_AMD_H
|
||||
#define __CL_DEBUGGER_AMD_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl_platform.h>
|
||||
#else
|
||||
#include <CL/cl_platform.h>
|
||||
#endif
|
||||
|
||||
/******************************************
|
||||
* Private AMD extension cl_dbg *
|
||||
******************************************/
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
#define CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD -80
|
||||
#define CL_DEBUGGER_REGISTER_FAILURE_AMD -81
|
||||
#define CL_TRAP_HANDLER_NOT_DEFINED_AMD -82
|
||||
#define CL_EVENT_TIMEOUT_AMD -83
|
||||
|
||||
|
||||
typedef uintptr_t cl_dbg_event_amd; //! debug event
|
||||
|
||||
/*! \brief Trap Handler Type
|
||||
*
|
||||
* The trap handler for each support type.
|
||||
*/
|
||||
enum cl_dbg_trap_type_amd {
|
||||
CL_DBG_DEBUG_TRAP = 0, //! HW debug
|
||||
CL_DBG_MAX_TRAP
|
||||
};
|
||||
|
||||
/*! \brief Wave actions used to control the wave execution on the hardware
|
||||
*
|
||||
* The wave action enumerations are used to specify the desired
|
||||
* behavior when calling the wave control function. Overall, there are
|
||||
* five types of operations that can be specified.
|
||||
*/
|
||||
enum cl_dbg_waves_action_amd {
|
||||
CL_DBG_WAVES_DONT_USE_ZERO = 0, //! NOT USED
|
||||
CL_DBG_WAVES_HALT = 1, //! halt wave
|
||||
CL_DBG_WAVES_RESUME = 2, //! resume wave
|
||||
CL_DBG_WAVES_KILL = 3, //! kill wave
|
||||
CL_DBG_WAVES_DEBUG = 4, //! debug wave
|
||||
CL_DBG_WAVES_TRAP = 5, //! trap
|
||||
CL_DBG_WAVES_MAX
|
||||
};
|
||||
|
||||
/*! \brief Host actions when encountering an exception in the kernel.
|
||||
*
|
||||
* The host action enumeration is used to specify the desired host
|
||||
* response in the event thatn a device kernel exception is encountered.
|
||||
*/
|
||||
enum cl_dbg_host_action_amd {
|
||||
CL_DBG_HOST_IGNORE = 1, //! ignore the kernel exception
|
||||
CL_DBG_HOST_EXIT = 2, //! exit the host application on a kernel exception
|
||||
CL_DBG_HOST_NOTIFY = 4 //! report the kernel exception
|
||||
};
|
||||
|
||||
/*! \brief Mode of the wave action when calling the wave control function
|
||||
*
|
||||
* The wave mode enumerations are used to specify the desired
|
||||
* broadcast level when calling the wave control function.
|
||||
*/
|
||||
enum cl_dbg_wave_mode_amd {
|
||||
CL_DBG_WAVEMODE_SINGLE = 0, //! send command to single wave
|
||||
CL_DBG_WAVEMODE_BROADCAST = 2, //! send command to wave with match VMID
|
||||
CL_DBG_WAVEMODE_BROADCAST_CU = 3, //! send command to wave with match VMID with specific CU
|
||||
CL_DBG_WAVEMODE_MAX
|
||||
};
|
||||
|
||||
/*! \brief Enumeration of address watch mode
|
||||
*
|
||||
* This enumeration indicates the different modes of address watch.
|
||||
*/
|
||||
enum cl_dbg_address_watch_mode_amd {
|
||||
CL_DBG_ADDR_WATCH_MODE_READ = 0, //! Read operations only
|
||||
CL_DBG_ADDR_WATCH_MODE_NONREAD = 1, //! Write or Atomic operations only
|
||||
CL_DBG_ADDR_WATCH_MODE_ATOMIC = 2, //! Atomic Operations only
|
||||
CL_DBG_ADDR_WATCH_MODE_ALL = 3, //! Read, Write or Atomic operations
|
||||
CL_DBG_ADDR_WATCH_MODE_MAX //! Number of address watch modes
|
||||
};
|
||||
|
||||
/*! \brief Dispatch exception policy descriptor
|
||||
*
|
||||
* The dispatch exception policy descriptor is used to define the
|
||||
* expected exception policy in the event an exception is encountered
|
||||
* on the associated dispatch.
|
||||
*/
|
||||
typedef struct _cl_dbg_exception_policy_amd {
|
||||
cl_uint exceptionMask; //! exception mask
|
||||
cl_dbg_waves_action_amd waveAction; //! wave action
|
||||
cl_dbg_host_action_amd hostAction; //! host action
|
||||
cl_dbg_wave_mode_amd waveMode; //! wave mode
|
||||
} cl_dbg_exception_policy_amd;
|
||||
|
||||
/*! \brief Kernel execution mode
|
||||
*
|
||||
* This structure is used to control the kernel execution mode. The
|
||||
* following aspects are included in this structure:
|
||||
* 1. Regular execution or debug mode (0: regular execution (default),
|
||||
* 1: debug mode)
|
||||
* 2. SQ debugger mode on/off
|
||||
* 3. Disable L1 scalar cache (0: enable (default), 1: disable)
|
||||
* 4. Disable L1 vector cache (0: enable (default), 1: disable)
|
||||
* 5. Disable L2 cache (0: enable (default), 1: disable)
|
||||
* 6. Num of CUs reserved for display (0 (default), 7: max)
|
||||
*/
|
||||
typedef struct _cl_dbg_kernel_exec_mode_amd {
|
||||
union {
|
||||
struct {
|
||||
cl_uint monitorMode : 1;
|
||||
cl_uint gpuSingleStepMode : 1;
|
||||
cl_uint disableL1Scalar : 1;
|
||||
cl_uint disableL1Vector : 1;
|
||||
cl_uint disableL2Cache : 1;
|
||||
cl_uint reservedCuNum : 3;
|
||||
cl_uint reserved : 24;
|
||||
};
|
||||
cl_uint ui32All;
|
||||
};
|
||||
} cl_dbg_kernel_exec_mode_amd;
|
||||
|
||||
/*! \brief GPU cache mask
|
||||
*
|
||||
* This structure is used to specify the GPU cache to be flushed/invalidated
|
||||
*/
|
||||
typedef struct _cl_dbg_gpu_cache_mask_amd {
|
||||
union {
|
||||
struct {
|
||||
cl_uint sqICache : 1; //! instruction cache
|
||||
cl_uint sqKCache : 1; //! data cache
|
||||
cl_uint tcL1 : 1; //! tcL1 cache
|
||||
cl_uint tcL2 : 1; //! tcL2 cache
|
||||
cl_uint reserved : 28;
|
||||
};
|
||||
cl_uint ui32All;
|
||||
};
|
||||
} cl_dbg_gpu_cache_mask_amd;
|
||||
|
||||
/*! \brief Dispatch Debug Info
|
||||
*
|
||||
* This structure is used to store the scratch and global memory descriptors
|
||||
*/
|
||||
typedef struct _cl_dispatch_debug_info_amd {
|
||||
cl_uint scratchMemoryDescriptor[4]; //! Scratch memory descriptors
|
||||
cl_uint globalMemoryDescriptor[4]; //! Global memory descriptors
|
||||
} cl_dispatch_debug_info_amd;
|
||||
|
||||
/*! \brief AQL Packet Info
|
||||
*
|
||||
* This structure is used to store AQL packet informatin for kernel dispatch
|
||||
*/
|
||||
typedef struct _cl_aql_packet_info_amd {
|
||||
cl_uint trapReservedVgprIndex; //! VGPR index reserved for trap
|
||||
//! value is -1 when kernel was not compiled
|
||||
//! in debug mode.
|
||||
cl_uint scratchBufferWaveOffset; //! scratch buffer wave offset
|
||||
//! value is -1 when kernel was not compiled
|
||||
//! in debug mode or scratch buffer is not enabled
|
||||
void* pointerToIsaBuffer; //! Pointer to buffer containing ISA
|
||||
size_t sizeOfIsaBuffer; //! Size of the ISA buffer
|
||||
|
||||
cl_uint numberOfVgprs; //! Number of VGPRs used by the kernel
|
||||
cl_uint numberOfSgprs; //! Number of SGPRs used by the kernel
|
||||
size_t sizeOfStaticGroupMemory; //! Static local memory used by the kernel
|
||||
} cl_aql_packet_info_amd;
|
||||
|
||||
/*! \brief Wave address
|
||||
*
|
||||
* This structure specifies the wave for the SQ control command
|
||||
*/
|
||||
typedef struct _cl_dbg_wave_addr_amd {
|
||||
cl_uint shaderEngine : 2; //! Shader engine
|
||||
cl_uint shaderArray : 1; //! Shader array
|
||||
cl_uint computeUnit : 4; //! Compute unit
|
||||
cl_uint simd : 2; //! SIMD id
|
||||
cl_uint wave : 4; //! Wave id
|
||||
cl_uint vmid : 4; //! VMID
|
||||
cl_uint reserved : 15;
|
||||
|
||||
} cl_dbg_wave_addr_amd;
|
||||
|
||||
/*! \brief Pre-dispatch call back function signature
|
||||
*
|
||||
* This is the signature of the call back fuction before the kernel
|
||||
* dispatch. The call back function is to indicate the start of the
|
||||
* the kernel launch. It is used by the debugger.
|
||||
*/
|
||||
typedef void* (*cl_PreDispatchCallBackFunctionAMD)(cl_device_id device, void* ocl_event_handle,
|
||||
const void* aql_packet, void* acl_binary,
|
||||
void* user_args);
|
||||
|
||||
/*! \brief Post-dispatch call back function signature
|
||||
*
|
||||
* This is the signature of the call back fuction after the kernel
|
||||
* dispatch. The call back function is to indicate the completion of
|
||||
* the the kernel launch. It is used by the debugger.
|
||||
*/
|
||||
typedef void* (*cl_PostDispatchCallBackFunctionAMD)(cl_device_id device, cl_ulong event,
|
||||
void* user_args);
|
||||
|
||||
/*! \brief Set up the dispatch call back function pointers
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param preDispatchFunction is the function to be called before dispatching the kernel
|
||||
*
|
||||
* \param postDispatchFunction is the function to be called after kernel execution
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the function is executed successfully
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackFunctionsAMD(
|
||||
cl_device_id /* device */, cl_PreDispatchCallBackFunctionAMD /* preDispatchFunction */,
|
||||
cl_PostDispatchCallBackFunctionAMD /* postDispatchFunction */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Set up the arguments of the dispatch call back function
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param preDispatchArgs is the arguments for the pre-dispatch callback function
|
||||
*
|
||||
* \param postDispatchArgs is the arguments for the post-dispatch callback function
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the function is executed successfully
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackArgumentsAMD(cl_device_id /* device */,
|
||||
void* /* preDispatchArgs */,
|
||||
void* /* postDispatchArgs */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Invalidate all cache on the device.
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param mask is the mask to specify which cache to be flush/invalidate
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the function is executed successfully
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgFlushCacheAMD(cl_device_id /* device */,
|
||||
cl_dbg_gpu_cache_mask_amd /* mask */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Set up an exception policy in the trap handler object
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param policy specifies the exception policy, which includes the exception mask,
|
||||
* wave action, host action, wave mode.
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the function is executed successfully
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the policy is not specified (NULL)
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetExceptionPolicyAMD(
|
||||
cl_device_id /* device */, cl_dbg_exception_policy_amd* /* policy */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
/*! \brief Get the exception policy in the trap handler object
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param policy is a pointer to the memory where the policy is returned
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the function is executed successfully
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the policy storage is not specified
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetExceptionPolicyAMD(
|
||||
cl_device_id /* device */, cl_dbg_exception_policy_amd* /* policy */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
/*! \brief Set up the kernel execution mode in the trap handler object
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param mode specifies the kernel execution mode, which indicate whether single
|
||||
* step mode is used, how many CUs are reserved.
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the function is executed successfully
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the mode is not specified, ie, has a NULL value
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetKernelExecutionModeAMD(
|
||||
cl_device_id /* device */, cl_dbg_kernel_exec_mode_amd* /* mode */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Get the kernel execution mode in the trap handler object
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param mode is a pointer to the memory where the exectuion mode is returned
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the function is executed successfully
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the mode storage is not specified, ie, has a NULL value
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelExecutionModeAMD(
|
||||
cl_device_id /* device */, cl_dbg_kernel_exec_mode_amd* /* mode */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Create a debug event
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param autoReset is the auto reset flag
|
||||
*
|
||||
* \param pDebugEvent returns the debug event to be used for exception notification
|
||||
*
|
||||
* \param pEventId is the event ID, which is not used at this moment
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the function is executed successfully
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the pDebugEvent value is NULL
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
* - CL_OUT_OF_RESOURCES if fails to create the event
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgCreateEventAMD(cl_device_id /* device */,
|
||||
bool /* autoReset */,
|
||||
cl_dbg_event_amd* /* pDebugEvent */,
|
||||
cl_uint* /* pEventId */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
/*! \brief Wait for a debug event to be signaled
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param pDebugEvent is the debug event to be waited for
|
||||
*
|
||||
* \param pEventId is the event ID, which is not used at this moment
|
||||
*
|
||||
* \param timeOut is the duration for waiting
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the pDebugEvent value is NULL
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
* - CL_EVENT_TIMEOUT_AMD if timeout occurs
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaitEventAMD(cl_device_id /* device */,
|
||||
cl_dbg_event_amd /* pDebugEvent */,
|
||||
cl_uint /* pEventId */,
|
||||
cl_uint /* timeOut */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
/*! \brief Destroy a debug event
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param pDebugEvent is the debug event to be waited for
|
||||
*
|
||||
* \param pEventId is the event ID, which is not used at this moment
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the pDebugEvent value is NULL
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgDestroyEventAMD(cl_device_id /* device */,
|
||||
cl_dbg_event_amd* /* pDebugEvent */,
|
||||
cl_uint* /* pEventId */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Register the debugger on a device
|
||||
*
|
||||
* \param context specifies the context for the debugger
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param pMessageStorge specifies the memory for trap message passing between KMD and OCL runtime
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_CONTEXT if the context is not valid
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the pMEssageStorge value is NULL
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
* - CL_OUT_OF_RESOURCES if a host queue cannot be created for the debugger
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgRegisterDebuggerAMD(
|
||||
cl_context /* context */, cl_device_id /* device */, volatile void* /* pMessageStorage */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Unregister the debugger on a device
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnregisterDebuggerAMD(cl_device_id /* device */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
/*! \brief Setup the pointer of the acl_binary to be used by the debugger
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param aclBinary specifies the ACL binary to be used
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the aclBinary is not provided
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetAclBinaryAMD(cl_device_id /* device */,
|
||||
void* /* aclBinary */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Control the execution of wavefront on the GPU
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param action specifies the wave action - halt, resume, kill, debug
|
||||
*
|
||||
* \param mode specifies the wave mode
|
||||
*
|
||||
* \param trapID specifies the trap ID, which should be 0x7
|
||||
*
|
||||
* \param waveAddress specifies the wave address for the wave control
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the waveMsg is not provided, invalid action or mode value
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaveControlAMD(cl_device_id /* device */,
|
||||
cl_dbg_waves_action_amd /* action */,
|
||||
cl_dbg_wave_mode_amd /* mode */,
|
||||
cl_uint /* trapId */,
|
||||
cl_dbg_wave_addr_amd /* waveAddress */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
/*! \brief Set watch points on memory address ranges to generate exception events
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param numWatchPoints specifies the number of watch points
|
||||
*
|
||||
* \param watchMode is the array of watch mode for the watch points
|
||||
*
|
||||
* \param watchAddress is the array of watch address for the watch points
|
||||
*
|
||||
* \param watchMask is the array of mask for the watch points
|
||||
*
|
||||
* \param watchEvent is the array of event for the watch points
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the number of points <= 0, or other parameters is not specified
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgAddressWatchAMD(
|
||||
cl_device_id /* device */, cl_uint /* numWatchPoints */,
|
||||
cl_dbg_address_watch_mode_amd* /* watchMode */, void** /* watchAddress */,
|
||||
cl_ulong* /* watchMask */, cl_dbg_event_amd* /* watchEvent */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
/*! \brief Get the packaet information for kernel execution
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param aqlCodeInfo specifies the kernel code and its size
|
||||
*
|
||||
* \param packetInfo points to the memory for the packet information to be returned
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetAqlPacketInfoAMD(
|
||||
cl_device_id /* device */, const void* /* aqlCodeInfo */,
|
||||
cl_aql_packet_info_amd* /* packetInfo */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Get the dispatch debug information
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param debugInfo points to the memory for the debug information to be returned
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetDispatchDebugInfoAMD(
|
||||
cl_device_id /* device */, cl_dispatch_debug_info_amd* /* debugInfo */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Map the video memory for the kernel code to allow host access
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param aqlCodeAddress is the memory points to the returned host memory address for the kernel
|
||||
* code
|
||||
*
|
||||
* \param aqlCodeSize returns the size of the kernel code
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapKernelCodeAMD(cl_device_id /* device */,
|
||||
void* /* aqlCodeInfo */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Unmap the video memory for the kernel code
|
||||
*
|
||||
* \param device specifies the device to be used (no needed, just to be consistent)
|
||||
*
|
||||
* \param aqlCodeAddress is the memory points to the mapped memory address for the kernel code
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapKernelCodeAMD(cl_device_id /* device */,
|
||||
cl_ulong* /* aqlCodeAddress */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Map the shader scratch ring's video memory to allow CPU access
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param scratchRingAddr is the memory points to the returned host memory address for scratch
|
||||
* ring
|
||||
*
|
||||
* \param scratchRingSize returns the size of the scratch ring
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapScratchRingAMD(cl_device_id /* device */,
|
||||
cl_ulong* /* scratchRingAddr */,
|
||||
cl_uint* /* scratchRingSize */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
/*! \brief Unmap the shader scratch ring's video memory
|
||||
*
|
||||
* \param device specifies the device to be used (no needed, just to be consistent)
|
||||
*
|
||||
* \param scratchRingAddr is the memory points to the mapped memory address for scratch ring
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapScratchRingAMD(cl_device_id /* device */,
|
||||
cl_ulong* /* scratchRingAddr */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
/*! \brief Get the memory object associated with the kernel parameter
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param paramIdx is the index of of the kernel argument
|
||||
*
|
||||
* \param paramMem is pointer of the memory associated with the kernel argument to be returned
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if the paramIdx is less than zero, or the paramMem has NULL value
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
* - CL_INVALID_KERNEL_ARGS if it fails to get the memory object for the kernel argument
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelParamMemAMD(cl_device_id /* devicepointer */,
|
||||
cl_uint /* paramIdx */,
|
||||
cl_mem* /* paramMem */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
/*! \brief Set value of a global memory object
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param memObject is the memory object handle to be assigned the value specified in srcMem.
|
||||
*
|
||||
* \param offset is offset of the memory object
|
||||
*
|
||||
* \param srcMem points to the memory which contains the values to be assigned to the memory
|
||||
*
|
||||
* \param size size (in bytes) of the srcMem
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if memObj or srcPtr has NULL value, size <= 0 or offset < 0
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetGlobalMemoryAMD(cl_device_id /* device */,
|
||||
cl_mem /* memObject */,
|
||||
cl_uint /* offset */,
|
||||
void* /* srcMem */,
|
||||
cl_uint /* size */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
/*! \brief Install the trap handler of a given type
|
||||
*
|
||||
* \param device specifies the device to be used
|
||||
*
|
||||
* \param trapType is the type of trap handler
|
||||
*
|
||||
* \param trapHandler is the pointer of trap handler (TBA)
|
||||
*
|
||||
* \param trapBuffer is the pointer of trap handler buffer (TMA)
|
||||
*
|
||||
* \param trapHandlerSize size (in bytes) of the trap handler
|
||||
*
|
||||
* \param trapBufferSize size (in bytes) of the trap handler buffer
|
||||
*
|
||||
* \return One of the following values:
|
||||
* - CL_SUCCESS if the event occurs before the timeout
|
||||
* - CL_INVALID_DEVICE if the device is not valid
|
||||
* - CL_INVALID_VALUE if trapHandler is NULL or trapHandlerSize <= 0
|
||||
* - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgInstallTrapAMD(cl_device_id /* device */,
|
||||
cl_dbg_trap_type_amd /* trapType */,
|
||||
cl_mem /* trapHandler */,
|
||||
cl_mem /* trapBuffer */
|
||||
) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /*extern "C"*/
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
#endif /*__CL_DEBUGGER_AMD_H*/
|
||||
@@ -0,0 +1,293 @@
|
||||
/* Copyright (c) 2008-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "cl_common.hpp"
|
||||
#include "vdi_common.hpp"
|
||||
#ifdef _WIN32
|
||||
#include <d3d10_1.h>
|
||||
#include "cl_d3d9_amd.hpp"
|
||||
#include "cl_d3d10_amd.hpp"
|
||||
#include "cl_d3d11_amd.hpp"
|
||||
#endif //_WIN32
|
||||
|
||||
#include <icd/loader/icd_dispatch.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
amd::PlatformIDS amd::PlatformID::Platform = //{ NULL };
|
||||
{amd::ICDDispatchedObject::icdVendorDispatch_};
|
||||
|
||||
static cl_int CL_API_CALL icdGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name,
|
||||
size_t param_value_size, void* param_value,
|
||||
size_t* param_value_size_ret) {
|
||||
return clGetPlatformInfo(NULL, param_name, param_value_size, param_value, param_value_size_ret);
|
||||
}
|
||||
|
||||
static cl_int CL_API_CALL icdGetDeviceIDs(cl_platform_id platform, cl_device_type device_type,
|
||||
cl_uint num_entries, cl_device_id* devices,
|
||||
cl_uint* num_devices) {
|
||||
return clGetDeviceIDs(NULL, device_type, num_entries, devices, num_devices);
|
||||
}
|
||||
|
||||
static cl_int CL_API_CALL icdGetDeviceInfo(cl_device_id device, cl_device_info param_name,
|
||||
size_t param_value_size, void* param_value,
|
||||
size_t* param_value_size_ret) {
|
||||
if (param_name == CL_DEVICE_PLATFORM) {
|
||||
// Return the ICD platform instead of the default NULL platform.
|
||||
cl_platform_id platform = reinterpret_cast<cl_platform_id>(&amd::PlatformID::Platform);
|
||||
return amd::clGetInfo(platform, param_value_size, param_value, param_value_size_ret);
|
||||
}
|
||||
|
||||
return clGetDeviceInfo(device, param_name, param_value_size, param_value, param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_icd_dispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {
|
||||
{NULL /* should not get called */, icdGetPlatformInfo, icdGetDeviceIDs, icdGetDeviceInfo,
|
||||
clCreateContext, clCreateContextFromType, clRetainContext, clReleaseContext, clGetContextInfo,
|
||||
clCreateCommandQueue, clRetainCommandQueue, clReleaseCommandQueue, clGetCommandQueueInfo,
|
||||
clSetCommandQueueProperty, clCreateBuffer, clCreateImage2D, clCreateImage3D, clRetainMemObject,
|
||||
clReleaseMemObject, clGetSupportedImageFormats, clGetMemObjectInfo, clGetImageInfo,
|
||||
clCreateSampler, clRetainSampler, clReleaseSampler, clGetSamplerInfo,
|
||||
clCreateProgramWithSource, clCreateProgramWithBinary, clRetainProgram, clReleaseProgram,
|
||||
clBuildProgram, clUnloadCompiler, clGetProgramInfo, clGetProgramBuildInfo, clCreateKernel,
|
||||
clCreateKernelsInProgram, clRetainKernel, clReleaseKernel, clSetKernelArg, clGetKernelInfo,
|
||||
clGetKernelWorkGroupInfo, clWaitForEvents, clGetEventInfo, clRetainEvent, clReleaseEvent,
|
||||
clGetEventProfilingInfo, clFlush, clFinish, clEnqueueReadBuffer, clEnqueueWriteBuffer,
|
||||
clEnqueueCopyBuffer, clEnqueueReadImage, clEnqueueWriteImage, clEnqueueCopyImage,
|
||||
clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapBuffer, clEnqueueMapImage,
|
||||
clEnqueueUnmapMemObject, clEnqueueNDRangeKernel, clEnqueueTask, clEnqueueNativeKernel,
|
||||
clEnqueueMarker, clEnqueueWaitForEvents, clEnqueueBarrier, clGetExtensionFunctionAddress,
|
||||
clCreateFromGLBuffer, clCreateFromGLTexture2D, clCreateFromGLTexture3D,
|
||||
clCreateFromGLRenderbuffer, clGetGLObjectInfo, clGetGLTextureInfo, clEnqueueAcquireGLObjects,
|
||||
clEnqueueReleaseGLObjects, clGetGLContextInfoKHR,
|
||||
WINDOWS_SWITCH(clGetDeviceIDsFromD3D10KHR, NULL),
|
||||
WINDOWS_SWITCH(clCreateFromD3D10BufferKHR, NULL),
|
||||
WINDOWS_SWITCH(clCreateFromD3D10Texture2DKHR, NULL),
|
||||
WINDOWS_SWITCH(clCreateFromD3D10Texture3DKHR, NULL),
|
||||
WINDOWS_SWITCH(clEnqueueAcquireD3D10ObjectsKHR, NULL),
|
||||
WINDOWS_SWITCH(clEnqueueReleaseD3D10ObjectsKHR, NULL), clSetEventCallback, clCreateSubBuffer,
|
||||
clSetMemObjectDestructorCallback, clCreateUserEvent, clSetUserEventStatus,
|
||||
clEnqueueReadBufferRect, clEnqueueWriteBufferRect, clEnqueueCopyBufferRect,
|
||||
NULL, NULL, NULL, clCreateEventFromGLsyncKHR,
|
||||
|
||||
/* OpenCL 1.2*/
|
||||
clCreateSubDevices, clRetainDevice, clReleaseDevice, clCreateImage,
|
||||
clCreateProgramWithBuiltInKernels, clCompileProgram, clLinkProgram, clUnloadPlatformCompiler,
|
||||
clGetKernelArgInfo, clEnqueueFillBuffer, clEnqueueFillImage, clEnqueueMigrateMemObjects,
|
||||
clEnqueueMarkerWithWaitList, clEnqueueBarrierWithWaitList,
|
||||
clGetExtensionFunctionAddressForPlatform, clCreateFromGLTexture,
|
||||
|
||||
WINDOWS_SWITCH(clGetDeviceIDsFromD3D11KHR, NULL),
|
||||
WINDOWS_SWITCH(clCreateFromD3D11BufferKHR, NULL),
|
||||
WINDOWS_SWITCH(clCreateFromD3D11Texture2DKHR, NULL),
|
||||
WINDOWS_SWITCH(clCreateFromD3D11Texture3DKHR, NULL),
|
||||
WINDOWS_SWITCH(clCreateFromDX9MediaSurfaceKHR, NULL),
|
||||
WINDOWS_SWITCH(clEnqueueAcquireD3D11ObjectsKHR, NULL),
|
||||
WINDOWS_SWITCH(clEnqueueReleaseD3D11ObjectsKHR, NULL),
|
||||
|
||||
WINDOWS_SWITCH(clGetDeviceIDsFromDX9MediaAdapterKHR,
|
||||
NULL), // KHRpfn_clGetDeviceIDsFromDX9MediaAdapterKHR
|
||||
// clGetDeviceIDsFromDX9MediaAdapterKHR;
|
||||
WINDOWS_SWITCH(
|
||||
clEnqueueAcquireDX9MediaSurfacesKHR,
|
||||
NULL), // KHRpfn_clEnqueueAcquireDX9MediaSurfacesKHR clEnqueueAcquireDX9MediaSurfacesKHR;
|
||||
WINDOWS_SWITCH(
|
||||
clEnqueueReleaseDX9MediaSurfacesKHR,
|
||||
NULL), // KHRpfn_clEnqueueReleaseDX9MediaSurfacesKHR clEnqueueReleaseDX9MediaSurfacesKHR;
|
||||
|
||||
NULL,
|
||||
NULL, NULL, NULL,
|
||||
|
||||
clCreateCommandQueueWithProperties, clCreatePipe, clGetPipeInfo, clSVMAlloc, clSVMFree,
|
||||
clEnqueueSVMFree, clEnqueueSVMMemcpy, clEnqueueSVMMemFill, clEnqueueSVMMap, clEnqueueSVMUnmap,
|
||||
clCreateSamplerWithProperties, clSetKernelArgSVMPointer, clSetKernelExecInfo,
|
||||
clGetKernelSubGroupInfo,
|
||||
clCloneKernel,
|
||||
clCreateProgramWithIL,
|
||||
clEnqueueSVMMigrateMem,
|
||||
clGetDeviceAndHostTimer,
|
||||
clGetHostTimer,
|
||||
clGetKernelSubGroupInfo,
|
||||
clSetDefaultDeviceCommandQueue,
|
||||
|
||||
clSetProgramReleaseCallback,
|
||||
clSetProgramSpecializationConstant }};
|
||||
|
||||
#if defined(ATI_OS_WIN)
|
||||
#include <Shlwapi.h>
|
||||
|
||||
#pragma comment(lib, "shlwapi.lib")
|
||||
|
||||
static bool ShouldLoadPlatform() {
|
||||
// Get the OpenCL ICD registry values
|
||||
HKEY platformsKey = NULL;
|
||||
if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\Khronos\\OpenCL\\Vendors", 0, KEY_READ,
|
||||
&platformsKey) != ERROR_SUCCESS)
|
||||
return true;
|
||||
|
||||
std::vector<std::string> registryValues;
|
||||
DWORD dwIndex = 0;
|
||||
while (true) {
|
||||
char cszLibraryName[1024] = {0};
|
||||
DWORD dwLibraryNameSize = sizeof(cszLibraryName);
|
||||
DWORD dwLibraryNameType = 0;
|
||||
DWORD dwValue = 0;
|
||||
DWORD dwValueSize = sizeof(dwValue);
|
||||
|
||||
if (RegEnumValueA(platformsKey, dwIndex++, cszLibraryName, &dwLibraryNameSize, NULL,
|
||||
&dwLibraryNameType, (LPBYTE)&dwValue, &dwValueSize) != ERROR_SUCCESS)
|
||||
break;
|
||||
// Require that the value be a DWORD and equal zero
|
||||
if (dwLibraryNameType != REG_DWORD || dwValue != 0) {
|
||||
continue;
|
||||
}
|
||||
registryValues.push_back(cszLibraryName);
|
||||
}
|
||||
RegCloseKey(platformsKey);
|
||||
|
||||
HMODULE hm = NULL;
|
||||
if (!GetModuleHandleExA(
|
||||
GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
|
||||
(LPCSTR)&ShouldLoadPlatform, &hm))
|
||||
return true;
|
||||
|
||||
char cszDllPath[1024] = {0};
|
||||
if (!GetModuleFileNameA(hm, cszDllPath, sizeof(cszDllPath))) return true;
|
||||
|
||||
// If we are loaded from the DriverStore, then there should be a registry
|
||||
// value matching our current module absolute path.
|
||||
if (std::find(registryValues.begin(), registryValues.end(), cszDllPath) == registryValues.end())
|
||||
return true;
|
||||
|
||||
LPSTR cszFileName;
|
||||
char buffer[1024] = {0};
|
||||
if (!GetFullPathNameA(cszDllPath, sizeof(buffer), buffer, &cszFileName)) return true;
|
||||
|
||||
// We found an absolute path in the registry that matched this DLL, now
|
||||
// check if there is also an entry with the same filename.
|
||||
if (std::find(registryValues.begin(), registryValues.end(), cszFileName) == registryValues.end())
|
||||
return true;
|
||||
|
||||
// Lastly, check if there is a DLL with the same name in the System folder.
|
||||
char cszSystemPath[1024] = {0};
|
||||
#if defined(ATI_BITS_32)
|
||||
if (!GetSystemWow64DirectoryA(cszSystemPath, sizeof(cszSystemPath)))
|
||||
#endif // defined(ATI_BITS_32)
|
||||
if (!GetSystemDirectoryA(cszSystemPath, sizeof(cszSystemPath))) return true;
|
||||
|
||||
std::string systemDllPath;
|
||||
systemDllPath.append(cszSystemPath).append("\\").append(cszFileName);
|
||||
if (!PathFileExistsA(systemDllPath.c_str())) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If we get here, then all 3 conditions are true:
|
||||
// - An entry in the registry with an absolute path matches the current DLL
|
||||
// - An entry in the registry with a relative path matches the current DLL
|
||||
// - A DLL with the same name was found in the system directory
|
||||
//
|
||||
// We should not load this platform!
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <dlfcn.h>
|
||||
|
||||
// If there is only one platform, load it.
|
||||
// If there is more than one platform, only load platforms that have visible devices
|
||||
// If all platforms have no devices available, only load the PAL platform
|
||||
static bool ShouldLoadPlatform() {
|
||||
bool shouldLoad = true;
|
||||
|
||||
if (!amd::Runtime::initialized()) {
|
||||
amd::Runtime::init();
|
||||
}
|
||||
const int numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, false);
|
||||
|
||||
void *otherPlatform = nullptr;
|
||||
if (amd::IS_LEGACY) {
|
||||
otherPlatform = dlopen("libamdocl64.so", RTLD_LAZY);
|
||||
if (otherPlatform != nullptr) { // Present platform exists
|
||||
shouldLoad = numDevices > 0;
|
||||
}
|
||||
} else {
|
||||
otherPlatform = dlopen("libamdocl-orca64.so", RTLD_LAZY);
|
||||
if (otherPlatform != nullptr) { // Legacy platform exists
|
||||
// gcc4.8 doesn't support casting void* to a function pointer
|
||||
// Work around this by creating a typedef untill we upgrade the compiler
|
||||
typedef void*(*clGetFunctionAddress_t)(const char *);
|
||||
typedef cl_int(*clIcdGetPlatformIDs_t)(cl_uint, cl_platform_id *, cl_uint *);
|
||||
|
||||
clGetFunctionAddress_t legacyGetFunctionAddress =
|
||||
reinterpret_cast<clGetFunctionAddress_t>(dlsym(otherPlatform, "clGetExtensionFunctionAddress"));
|
||||
clIcdGetPlatformIDs_t legacyGetPlatformIDs =
|
||||
reinterpret_cast<clIcdGetPlatformIDs_t>(legacyGetFunctionAddress("clIcdGetPlatformIDsKHR"));
|
||||
|
||||
cl_uint numLegacyPlatforms = 0;
|
||||
legacyGetPlatformIDs(0, nullptr, &numLegacyPlatforms);
|
||||
|
||||
shouldLoad = (numDevices > 0) || (numLegacyPlatforms == 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (otherPlatform != nullptr) {
|
||||
dlclose(otherPlatform);
|
||||
}
|
||||
|
||||
return shouldLoad;
|
||||
}
|
||||
|
||||
#endif // defined(ATI_OS_WIN)
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(cl_uint num_entries,
|
||||
cl_platform_id* platforms,
|
||||
cl_uint* num_platforms) {
|
||||
if (((num_entries > 0 || num_platforms == NULL) && platforms == NULL) ||
|
||||
(num_entries == 0 && platforms != NULL)) {
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
static bool shouldLoad = true;
|
||||
|
||||
static std::once_flag initOnce;
|
||||
std::call_once(initOnce, [](){ shouldLoad = ShouldLoadPlatform(); });
|
||||
|
||||
if (!shouldLoad) {
|
||||
*not_null(num_platforms) = 0;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
if (!amd::Runtime::initialized()) {
|
||||
amd::Runtime::init();
|
||||
}
|
||||
|
||||
if (num_platforms != NULL && platforms == NULL) {
|
||||
*num_platforms = 1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
assert(platforms != NULL && "check the code above");
|
||||
*platforms = reinterpret_cast<cl_platform_id>(&amd::PlatformID::Platform);
|
||||
|
||||
*not_null(num_platforms) = 1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,739 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2010 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_ICD_H
|
||||
#define __OPENCL_CL_ICD_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_gl.h>
|
||||
|
||||
#define cl_khr_icd 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetPlatformIDs_fn)(
|
||||
cl_uint /* num_entries */, cl_platform_id* /* platforms */,
|
||||
cl_uint* /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetPlatformInfo_fn)(
|
||||
cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetDeviceIDs_fn)(
|
||||
cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */,
|
||||
cl_device_id* /* devices */, cl_uint* /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetDeviceInfo_fn)(
|
||||
cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_context(CL_API_CALL* clCreateContext_fn)(
|
||||
const cl_context_properties* /* properties */, cl_uint /* num_devices */,
|
||||
const cl_device_id* /* devices */,
|
||||
void(CL_CALLBACK* /* pfn_notify */)(const char*, const void*, size_t, void*),
|
||||
void* /* user_data */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_context(CL_API_CALL* clCreateContextFromType_fn)(
|
||||
const cl_context_properties* /* properties */, cl_device_type /* device_type */,
|
||||
void(CL_CALLBACK* /* pfn_notify*/)(const char*, const void*, size_t, void*),
|
||||
void* /* user_data */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clRetainContext_fn)(cl_context /* context */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clReleaseContext_fn)(cl_context /* context */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetContextInfo_fn)(
|
||||
cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_command_queue(CL_API_CALL* clCreateCommandQueue_fn)(
|
||||
cl_context /* context */, cl_device_id /* device */,
|
||||
cl_command_queue_properties /* properties */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clRetainCommandQueue_fn)(cl_command_queue /* command_queue */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clReleaseCommandQueue_fn)(cl_command_queue /* command_queue */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetCommandQueueInfo_fn)(
|
||||
cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */,
|
||||
size_t /* param_value_size */, void* /* param_value */,
|
||||
size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clSetCommandQueueProperty_fn)(
|
||||
cl_command_queue /* command_queue */, cl_command_queue_properties /* properties */,
|
||||
cl_bool /* enable */,
|
||||
cl_command_queue_properties* /* old_properties */) /*CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED*/;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreateBuffer_fn)(
|
||||
cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void* /* host_ptr */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreateSubBuffer_fn)(
|
||||
cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */,
|
||||
const void* /* buffer_create_info */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreateImage2D_fn)(
|
||||
cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format* /* image_format */,
|
||||
size_t /* image_width */, size_t /* image_height */, size_t /* image_row_pitch */,
|
||||
void* /* host_ptr */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreateImage3D_fn)(
|
||||
cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format* /* image_format */,
|
||||
size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */,
|
||||
size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void* /* host_ptr */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clRetainMemObject_fn)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clReleaseMemObject_fn)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetSupportedImageFormats_fn)(
|
||||
cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */,
|
||||
cl_uint /* num_entries */, cl_image_format* /* image_formats */,
|
||||
cl_uint* /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetMemObjectInfo_fn)(
|
||||
cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetImageInfo_fn)(
|
||||
cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clSetMemObjectDestructorCallback_fn)(
|
||||
cl_mem /* memobj */,
|
||||
void(CL_CALLBACK* /*pfn_notify*/)(cl_mem /* memobj */, void* /*user_data*/),
|
||||
void* /*user_data */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
/* Sampler APIs */
|
||||
typedef cl_sampler(CL_API_CALL* clCreateSampler_fn)(
|
||||
cl_context /* context */, cl_bool /* normalized_coords */,
|
||||
cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clRetainSampler_fn)(cl_sampler /* sampler */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clReleaseSampler_fn)(cl_sampler /* sampler */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetSamplerInfo_fn)(
|
||||
cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* Program Object APIs */
|
||||
typedef cl_program(CL_API_CALL* clCreateProgramWithSource_fn)(
|
||||
cl_context /* context */, cl_uint /* count */, const char** /* strings */,
|
||||
const size_t* /* lengths */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_program CL_API_CALL
|
||||
clCreateProgramWithIL(cl_context /* context */,
|
||||
const void * /* strings */, size_t /* lengths */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_program(CL_API_CALL* clCreateProgramWithILKHR_fn)(
|
||||
cl_context /* context */, const void* /* il */, size_t /* length */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_program(CL_API_CALL* clCreateProgramWithBinary_fn)(
|
||||
cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */,
|
||||
const size_t* /* lengths */, const unsigned char** /* binaries */, cl_int* /* binary_status */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clRetainProgram_fn)(cl_program /* program */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clReleaseProgram_fn)(cl_program /* program */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clBuildProgram_fn)(
|
||||
cl_program /* program */, cl_uint /* num_devices */, const cl_device_id* /* device_list */,
|
||||
const char* /* options */,
|
||||
void(CL_CALLBACK* /* pfn_notify */)(cl_program /* program */, void* /* user_data */),
|
||||
void* /* user_data */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clUnloadCompiler_fn)(void) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetProgramInfo_fn)(
|
||||
cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetProgramBuildInfo_fn)(
|
||||
cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */,
|
||||
size_t /* param_value_size */, void* /* param_value */,
|
||||
size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* Kernel Object APIs */
|
||||
typedef cl_kernel(CL_API_CALL* clCreateKernel_fn)(
|
||||
cl_program /* program */, const char* /* kernel_name */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clCreateKernelsInProgram_fn)(
|
||||
cl_program /* program */, cl_uint /* num_kernels */, cl_kernel* /* kernels */,
|
||||
cl_uint* /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clRetainKernel_fn)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clReleaseKernel_fn)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clSetKernelArg_fn)(cl_kernel /* kernel */, cl_uint /* arg_index */,
|
||||
size_t /* arg_size */, const void* /* arg_value */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetKernelInfo_fn)(
|
||||
cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetKernelWorkGroupInfo_fn)(
|
||||
cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */,
|
||||
size_t /* param_value_size */, void* /* param_value */,
|
||||
size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* Event Object APIs */
|
||||
typedef cl_int(CL_API_CALL* clWaitForEvents_fn)(
|
||||
cl_uint /* num_events */, const cl_event* /* event_list */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetEventInfo_fn)(
|
||||
cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_event(CL_API_CALL* clCreateUserEvent_fn)(
|
||||
cl_context /* context */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clRetainEvent_fn)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clReleaseEvent_fn)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clSetUserEventStatus_fn)(
|
||||
cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clSetEventCallback_fn)(
|
||||
cl_event /* event */, cl_int /* command_exec_callback_type */,
|
||||
void(CL_CALLBACK* /* pfn_notify */)(cl_event, cl_int, void*),
|
||||
void* /* user_data */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
/* Profiling APIs */
|
||||
typedef cl_int(CL_API_CALL* clGetEventProfilingInfo_fn)(
|
||||
cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* Flush and Finish APIs */
|
||||
typedef cl_int(CL_API_CALL* clFlush_fn)(cl_command_queue /* command_queue */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clFinish_fn)(cl_command_queue /* command_queue */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* Enqueued Commands APIs */
|
||||
typedef cl_int(CL_API_CALL* clEnqueueReadBuffer_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */,
|
||||
size_t /* offset */, size_t /* cb */, void* /* ptr */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueReadBufferRect_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */,
|
||||
const size_t* /* buffer_offset */, const size_t* /* host_offset */, const size_t* /* region */,
|
||||
size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */,
|
||||
size_t /* host_slice_pitch */, void* /* ptr */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueWriteBuffer_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */,
|
||||
size_t /* offset */, size_t /* cb */, const void* /* ptr */,
|
||||
cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueWriteBufferRect_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */,
|
||||
const size_t* /* buffer_offset */, const size_t* /* host_offset */, const size_t* /* region */,
|
||||
size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */,
|
||||
size_t /* host_slice_pitch */, const void* /* ptr */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueCopyBuffer_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */,
|
||||
size_t /* src_offset */, size_t /* dst_offset */, size_t /* cb */,
|
||||
cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueCopyBufferRect_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */,
|
||||
const size_t* /* src_origin */, const size_t* /* dst_origin */, const size_t* /* region */,
|
||||
size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */,
|
||||
size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueReadImage_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */,
|
||||
const size_t* /* origin[3] */, const size_t* /* region[3] */, size_t /* row_pitch */,
|
||||
size_t /* slice_pitch */, void* /* ptr */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueWriteImage_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */,
|
||||
const size_t* /* origin[3] */, const size_t* /* region[3] */, size_t /* input_row_pitch */,
|
||||
size_t /* input_slice_pitch */, const void* /* ptr */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueCopyImage_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */,
|
||||
const size_t* /* src_origin[3] */, const size_t* /* dst_origin[3] */,
|
||||
const size_t* /* region[3] */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueCopyImageToBuffer_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */,
|
||||
const size_t* /* src_origin[3] */, const size_t* /* region[3] */, size_t /* dst_offset */,
|
||||
cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueCopyBufferToImage_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */,
|
||||
size_t /* src_offset */, const size_t* /* dst_origin[3] */, const size_t* /* region[3] */,
|
||||
cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef void*(CL_API_CALL* clEnqueueMapBuffer_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */,
|
||||
cl_map_flags /* map_flags */, size_t /* offset */, size_t /* cb */,
|
||||
cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */, cl_int* /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef void*(CL_API_CALL* clEnqueueMapImage_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */,
|
||||
cl_map_flags /* map_flags */, const size_t* /* origin[3] */, const size_t* /* region[3] */,
|
||||
size_t* /* image_row_pitch */, size_t* /* image_slice_pitch */,
|
||||
cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */, cl_int* /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueUnmapMemObject_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* memobj */, void* /* mapped_ptr */,
|
||||
cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueNDRangeKernel_fn)(
|
||||
cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */,
|
||||
const size_t* /* global_work_offset */, const size_t* /* global_work_size */,
|
||||
const size_t* /* local_work_size */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueTask_fn)(cl_command_queue /* command_queue */,
|
||||
cl_kernel /* kernel */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueNativeKernel_fn)(
|
||||
cl_command_queue /* command_queue */, void(CL_CALLBACK* user_func)(void*), void* /* args */,
|
||||
size_t /* cb_args */, cl_uint /* num_mem_objects */, const cl_mem* /* mem_list */,
|
||||
const void** /* args_mem_loc */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueMarker_fn)(cl_command_queue /* command_queue */,
|
||||
cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueWaitForEvents_fn)(
|
||||
cl_command_queue /* command_queue */, cl_uint /* num_events */,
|
||||
const cl_event* /* event_list */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueBarrier_fn)(cl_command_queue /* command_queue */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef void*(CL_API_CALL* clGetExtensionFunctionAddress_fn)(const char* /* func_name */)
|
||||
CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreateFromGLBuffer_fn)(
|
||||
cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* bufobj */,
|
||||
int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreateFromGLTexture2D_fn)(
|
||||
cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */, cl_GLuint /* texture */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreateFromGLTexture3D_fn)(
|
||||
cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */, cl_GLuint /* texture */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreateFromGLRenderbuffer_fn)(
|
||||
cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* renderbuffer */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetGLObjectInfo_fn)(
|
||||
cl_mem /* memobj */, cl_gl_object_type* /* gl_object_type */,
|
||||
cl_GLuint* /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetGLTextureInfo_fn)(
|
||||
cl_mem /* memobj */, cl_gl_texture_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_event(CL_API_CALL* clCreateEventFromGLsyncKHR_fn)(
|
||||
cl_context /* context */, cl_GLsync /* cl_GLsync */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueAcquireGLObjects_fn)(
|
||||
cl_command_queue /* command_queue */, cl_uint /* num_objects */,
|
||||
const cl_mem* /* mem_objects */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueReleaseGLObjects_fn)(
|
||||
cl_command_queue /* command_queue */, cl_uint /* num_objects */,
|
||||
const cl_mem* /* mem_objects */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clCreateSubDevices_fn)(
|
||||
cl_device_id /* in_device */, const cl_device_partition_property* /* properties */,
|
||||
cl_uint /* num_entries */, cl_device_id* /* out_devices */,
|
||||
cl_uint* /* num_devices */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clRetainDevice_fn)(cl_device_id /* device */)
|
||||
CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clReleaseDevice_fn)(cl_device_id /* device */)
|
||||
CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreateImage_fn)(cl_context /* context */, cl_mem_flags /* flags */,
|
||||
const cl_image_format* /* image_format*/,
|
||||
const cl_image_desc* /* image_desc*/,
|
||||
void* /* host_ptr */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_program(CL_API_CALL* clCreateProgramWithBuiltInKernels_fn)(
|
||||
cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */,
|
||||
const char* /* kernel_names */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clCompileProgram_fn)(
|
||||
cl_program /* program */, cl_uint /* num_devices */, const cl_device_id* /* device_list */,
|
||||
const char* /* options */, cl_uint /* num_input_headers */,
|
||||
const cl_program* /* input_headers */, const char** /* header_include_names */,
|
||||
void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data),
|
||||
void* /* user_data */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_program(CL_API_CALL* clLinkProgram_fn)(
|
||||
cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */,
|
||||
const char* /* options */, cl_uint /* num_input_programs */,
|
||||
const cl_program* /* input_programs */,
|
||||
void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), void* /* user_data */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clUnloadPlatformCompiler_fn)(cl_platform_id /* platform */)
|
||||
CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetKernelArgInfo_fn)(
|
||||
cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */,
|
||||
size_t /* param_value_size */, void* /* param_value */,
|
||||
size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueFillBuffer_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* buffer */, const void* /* pattern */,
|
||||
size_t /* pattern_size */, size_t /* offset */, size_t /* size */,
|
||||
cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueFillImage_fn)(
|
||||
cl_command_queue /* command_queue */, cl_mem /* image */, const void* /* fill_color */,
|
||||
const size_t* /* origin */, const size_t* /* region */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueMigrateMemObjects_fn)(
|
||||
cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */,
|
||||
const cl_mem* /* mem_objects */, cl_mem_migration_flags /* flags */,
|
||||
cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueMarkerWithWaitList_fn)(
|
||||
cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueBarrierWithWaitList_fn)(
|
||||
cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef void*(CL_API_CALL* clGetExtensionFunctionAddressForPlatform_fn)(
|
||||
cl_platform_id /* platform */, const char* /* funcname */)CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreateFromGLTexture_fn)(
|
||||
cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* texture_target */,
|
||||
cl_GLint /* miplevel */, cl_GLuint /* texture */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_command_queue(CL_API_CALL* clCreateCommandQueueWithProperties_fn)(
|
||||
cl_context /* context */, cl_device_id /* device */,
|
||||
const cl_queue_properties* /* properties */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_sampler(CL_API_CALL* clCreateSamplerWithProperties_fn)(
|
||||
cl_context /* context */, const cl_sampler_properties* /* properties */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef void*(CL_API_CALL* clSVMAlloc_fn)(cl_context /* context */, cl_svm_mem_flags /* flags */,
|
||||
size_t /* size */,
|
||||
cl_uint /* alignment */)CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef void(CL_API_CALL* clSVMFree_fn)(cl_context /* context */,
|
||||
void* /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clSetKernelArgSVMPointer_fn)(
|
||||
cl_kernel /* kernel */, cl_uint /* arg_index */,
|
||||
const void* /* arg_value */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clSetKernelExecInfo_fn)(
|
||||
cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */,
|
||||
const void* /* param_value */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueSVMFree_fn)(
|
||||
cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */,
|
||||
void* [] /* svm_pointers */,
|
||||
void(CL_CALLBACK* /* pfn_free_func */)(cl_command_queue /* queue */,
|
||||
cl_uint /* num_svm_pointers */,
|
||||
void* [] /* svm_pointers */, void* /* user_data */),
|
||||
void* /* user_data */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueSVMMemcpy_fn)(
|
||||
cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void* /* dst_ptr */,
|
||||
const void* /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueSVMMemFill_fn)(
|
||||
cl_command_queue /* command_queue */, void* /* svm_ptr */, const void* /* pattern */,
|
||||
size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueSVMMap_fn)(
|
||||
cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* flags */,
|
||||
void* /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clEnqueueSVMUnmap_fn)(cl_command_queue /* command_queue */,
|
||||
void* /* svm_ptr */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_mem(CL_API_CALL* clCreatePipe_fn)(cl_context /* context */, cl_mem_flags /* flags */,
|
||||
cl_uint /* pipe_packet_size */,
|
||||
cl_uint /* pipe_max_packets */,
|
||||
const cl_pipe_properties* /* properties */,
|
||||
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetPipeInfo_fn)(
|
||||
cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
typedef cl_int(CL_API_CALL* clGetKernelSubGroupInfoKHR_fn)(
|
||||
cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_sub_group_info /* param_name */,
|
||||
size_t /* input_value_size */, const void* /* input_value */, size_t /* param_value_size */,
|
||||
void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0;
|
||||
|
||||
|
||||
typedef cl_int(CL_API_CALL* clSetDefaultDeviceCommandQueue_fn)(
|
||||
cl_context /* context */, cl_device_id /* device */,
|
||||
cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_2_1;
|
||||
|
||||
typedef cl_kernel(CL_API_CALL* clCloneKernel_fn)(
|
||||
cl_kernel /* source_kernel */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_1;
|
||||
|
||||
typedef cl_int (CL_API_CALL* clEnqueueSVMMigrateMem_fn)(
|
||||
cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */,
|
||||
const void ** /* svm_pointers */, const size_t * /* sizes */,
|
||||
cl_mem_migration_flags /* flags */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_1;
|
||||
|
||||
typedef cl_int (CL_API_CALL* clGetDeviceAndHostTimer_fn)(
|
||||
cl_device_id /* device */, cl_ulong * /* device_timestamp */,
|
||||
cl_ulong * /* host_timestamp */) CL_API_SUFFIX__VERSION_2_1;
|
||||
|
||||
typedef cl_int (CL_API_CALL* clGetHostTimer_fn)(
|
||||
cl_device_id /* device */, cl_ulong * /* host_timestamp */) CL_API_SUFFIX__VERSION_2_1;
|
||||
|
||||
typedef cl_int (CL_API_CALL* clSetProgramSpecializationConstant_fn)(
|
||||
cl_program /* program */, cl_uint /* spec_id */, size_t /* spec_size */,
|
||||
const void* /* spec_value */) CL_API_SUFFIX__VERSION_2_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL* clSetProgramReleaseCallback_fn)(
|
||||
cl_program /* program */,
|
||||
void (CL_CALLBACK * /* pfn_notify */)(cl_program program, void * user_data),
|
||||
void * /* user_data */) CL_API_SUFFIX__VERSION_2_2;
|
||||
|
||||
typedef struct _cl_icd_dispatch_table {
|
||||
/* OpenCL 1.0 */
|
||||
clGetPlatformIDs_fn GetPlatformIDs;
|
||||
clGetPlatformInfo_fn GetPlatformInfo;
|
||||
clGetDeviceIDs_fn GetDeviceIDs;
|
||||
clGetDeviceInfo_fn GetDeviceInfo;
|
||||
clCreateContext_fn CreateContext;
|
||||
clCreateContextFromType_fn CreateContextFromType;
|
||||
clRetainContext_fn RetainContext;
|
||||
clReleaseContext_fn ReleaseContext;
|
||||
clGetContextInfo_fn GetContextInfo;
|
||||
clCreateCommandQueue_fn CreateCommandQueue;
|
||||
clRetainCommandQueue_fn RetainCommandQueue;
|
||||
clReleaseCommandQueue_fn ReleaseCommandQueue;
|
||||
clGetCommandQueueInfo_fn GetCommandQueueInfo;
|
||||
clSetCommandQueueProperty_fn SetCommandQueueProperty;
|
||||
clCreateBuffer_fn CreateBuffer;
|
||||
clCreateImage2D_fn CreateImage2D;
|
||||
clCreateImage3D_fn CreateImage3D;
|
||||
clRetainMemObject_fn RetainMemObject;
|
||||
clReleaseMemObject_fn ReleaseMemObject;
|
||||
clGetSupportedImageFormats_fn GetSupportedImageFormats;
|
||||
clGetMemObjectInfo_fn GetMemObjectInfo;
|
||||
clGetImageInfo_fn GetImageInfo;
|
||||
clCreateSampler_fn CreateSampler;
|
||||
clRetainSampler_fn RetainSampler;
|
||||
clReleaseSampler_fn ReleaseSampler;
|
||||
clGetSamplerInfo_fn GetSamplerInfo;
|
||||
clCreateProgramWithSource_fn CreateProgramWithSource;
|
||||
clCreateProgramWithBinary_fn CreateProgramWithBinary;
|
||||
clRetainProgram_fn RetainProgram;
|
||||
clReleaseProgram_fn ReleaseProgram;
|
||||
clBuildProgram_fn BuildProgram;
|
||||
clUnloadCompiler_fn UnloadCompiler;
|
||||
clGetProgramInfo_fn GetProgramInfo;
|
||||
clGetProgramBuildInfo_fn GetProgramBuildInfo;
|
||||
clCreateKernel_fn CreateKernel;
|
||||
clCreateKernelsInProgram_fn CreateKernelsInProgram;
|
||||
clRetainKernel_fn RetainKernel;
|
||||
clReleaseKernel_fn ReleaseKernel;
|
||||
clSetKernelArg_fn SetKernelArg;
|
||||
clGetKernelInfo_fn GetKernelInfo;
|
||||
clGetKernelWorkGroupInfo_fn GetKernelWorkGroupInfo;
|
||||
clWaitForEvents_fn WaitForEvents;
|
||||
clGetEventInfo_fn GetEventInfo;
|
||||
clRetainEvent_fn RetainEvent;
|
||||
clReleaseEvent_fn ReleaseEvent;
|
||||
clGetEventProfilingInfo_fn GetEventProfilingInfo;
|
||||
clFlush_fn Flush;
|
||||
clFinish_fn Finish;
|
||||
clEnqueueReadBuffer_fn EnqueueReadBuffer;
|
||||
clEnqueueWriteBuffer_fn EnqueueWriteBuffer;
|
||||
clEnqueueCopyBuffer_fn EnqueueCopyBuffer;
|
||||
clEnqueueReadImage_fn EnqueueReadImage;
|
||||
clEnqueueWriteImage_fn EnqueueWriteImage;
|
||||
clEnqueueCopyImage_fn EnqueueCopyImage;
|
||||
clEnqueueCopyImageToBuffer_fn EnqueueCopyImageToBuffer;
|
||||
clEnqueueCopyBufferToImage_fn EnqueueCopyBufferToImage;
|
||||
clEnqueueMapBuffer_fn EnqueueMapBuffer;
|
||||
clEnqueueMapImage_fn EnqueueMapImage;
|
||||
clEnqueueUnmapMemObject_fn EnqueueUnmapMemObject;
|
||||
clEnqueueNDRangeKernel_fn EnqueueNDRangeKernel;
|
||||
clEnqueueTask_fn EnqueueTask;
|
||||
clEnqueueNativeKernel_fn EnqueueNativeKernel;
|
||||
clEnqueueMarker_fn EnqueueMarker;
|
||||
clEnqueueWaitForEvents_fn EnqueueWaitForEvents;
|
||||
clEnqueueBarrier_fn EnqueueBarrier;
|
||||
clGetExtensionFunctionAddress_fn GetExtensionFunctionAddress;
|
||||
clCreateFromGLBuffer_fn CreateFromGLBuffer;
|
||||
clCreateFromGLTexture2D_fn CreateFromGLTexture2D;
|
||||
clCreateFromGLTexture3D_fn CreateFromGLTexture3D;
|
||||
clCreateFromGLRenderbuffer_fn CreateFromGLRenderbuffer;
|
||||
clGetGLObjectInfo_fn GetGLObjectInfo;
|
||||
clGetGLTextureInfo_fn GetGLTextureInfo;
|
||||
clEnqueueAcquireGLObjects_fn EnqueueAcquireGLObjects;
|
||||
clEnqueueReleaseGLObjects_fn EnqueueReleaseGLObjects;
|
||||
clGetGLContextInfoKHR_fn GetGLContextInfoKHR;
|
||||
void* _reservedForD3D10KHR[6];
|
||||
|
||||
/* OpenCL 1.1 */
|
||||
clSetEventCallback_fn SetEventCallback;
|
||||
clCreateSubBuffer_fn CreateSubBuffer;
|
||||
clSetMemObjectDestructorCallback_fn SetMemObjectDestructorCallback;
|
||||
clCreateUserEvent_fn CreateUserEvent;
|
||||
clSetUserEventStatus_fn SetUserEventStatus;
|
||||
clEnqueueReadBufferRect_fn EnqueueReadBufferRect;
|
||||
clEnqueueWriteBufferRect_fn EnqueueWriteBufferRect;
|
||||
clEnqueueCopyBufferRect_fn EnqueueCopyBufferRect;
|
||||
|
||||
void* _reservedForDeviceFissionEXT[3];
|
||||
clCreateEventFromGLsyncKHR_fn CreateEventFromGLsyncKHR;
|
||||
|
||||
/* OpenCL 1.2 */
|
||||
clCreateSubDevices_fn CreateSubDevices;
|
||||
clRetainDevice_fn RetainDevice;
|
||||
clReleaseDevice_fn ReleaseDevice;
|
||||
clCreateImage_fn CreateImage;
|
||||
clCreateProgramWithBuiltInKernels_fn CreateProgramWithBuiltInKernels;
|
||||
clCompileProgram_fn CompileProgram;
|
||||
clLinkProgram_fn LinkProgram;
|
||||
clUnloadPlatformCompiler_fn UnloadPlatformCompiler;
|
||||
clGetKernelArgInfo_fn GetKernelArgInfo;
|
||||
clEnqueueFillBuffer_fn EnqueueFillBuffer;
|
||||
clEnqueueFillImage_fn EnqueueFillImage;
|
||||
clEnqueueMigrateMemObjects_fn EnqueueMigrateMemObjects;
|
||||
clEnqueueMarkerWithWaitList_fn EnqueueMarkerWithWaitList;
|
||||
clEnqueueBarrierWithWaitList_fn EnqueueBarrierWithWaitList;
|
||||
clGetExtensionFunctionAddressForPlatform_fn GetExtensionFunctionAddressForPlatform;
|
||||
clCreateFromGLTexture_fn CreateFromGLTexture;
|
||||
|
||||
/* cl_khr_d3d11_sharing, cl_khr_dx9_media_sharing */
|
||||
void* _reservedForD3DExtensions[10];
|
||||
|
||||
/* cl_khr_egl_image, cl_khr_egl_event */
|
||||
void* _reservedForEGLExtensions[4];
|
||||
|
||||
/* OpenCL 2.0 */
|
||||
clCreateCommandQueueWithProperties_fn CreateCommandQueueWithProperties;
|
||||
clCreatePipe_fn CreatePipe;
|
||||
clGetPipeInfo_fn GetPipeInfo;
|
||||
clSVMAlloc_fn SVMAlloc;
|
||||
clSVMFree_fn SVMFree;
|
||||
clEnqueueSVMFree_fn EnqueueSVMFree;
|
||||
clEnqueueSVMMemcpy_fn EnqueueSVMMemcpy;
|
||||
clEnqueueSVMMemFill_fn EnqueueSVMMemFill;
|
||||
clEnqueueSVMMap_fn EnqueueSVMMap;
|
||||
clEnqueueSVMUnmap_fn EnqueueSVMUnmap;
|
||||
clCreateSamplerWithProperties_fn CreateSamplerWithProperties;
|
||||
clSetKernelArgSVMPointer_fn SetKernelArgSVMPointer;
|
||||
clSetKernelExecInfo_fn SetKernelExecInfo;
|
||||
/* cl_khr_sub_groups */
|
||||
clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfoKHR;
|
||||
|
||||
/* OpenCL 2.1 */
|
||||
clCloneKernel_fn CloneKernel;
|
||||
clCreateProgramWithILKHR_fn CreateProgramWithILKHR;
|
||||
clEnqueueSVMMigrateMem_fn EnqueueSVMMigrateMem;
|
||||
clGetDeviceAndHostTimer_fn GetDeviceAndHostTimer;
|
||||
clGetHostTimer_fn GetHostTimer;
|
||||
clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfo;
|
||||
clSetDefaultDeviceCommandQueue_fn SetDefaultDeviceCommandQueue;
|
||||
|
||||
/* OpenCL 2.2 */
|
||||
clSetProgramReleaseCallback_fn SetProgramReleaseCallback;
|
||||
clSetProgramSpecializationConstant_fn SetProgramSpecializationConstant;
|
||||
|
||||
} cl_icd_dispatch_table;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* __OPENCL_CL_ICD_H */
|
||||
@@ -0,0 +1,165 @@
|
||||
/* Copyright (c) 2012-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef CL_KERNEL_H_
|
||||
#define CL_KERNEL_H_
|
||||
|
||||
struct clk_builtins_t;
|
||||
|
||||
// This must be a multiple of sizeof(cl_ulong16)
|
||||
#define __CPU_SCRATCH_SIZE 128
|
||||
|
||||
#define CLK_PRIVATE_MEMORY_SIZE (16 * 1024)
|
||||
|
||||
struct clk_thread_info_block_t {
|
||||
// Warning! The size of this struct needs to be a multiple
|
||||
// of 16 when compiling 64 bit
|
||||
|
||||
struct clk_builtins_t const* builtins;
|
||||
void* local_mem_base;
|
||||
void* local_scratch;
|
||||
const void* table_base;
|
||||
size_t pad;
|
||||
|
||||
uint work_dim;
|
||||
size_t global_offset[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/
|
||||
size_t global_size[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/
|
||||
|
||||
size_t enqueued_local_size[4];
|
||||
size_t local_size[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/
|
||||
size_t local_id[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/
|
||||
size_t group_id[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/
|
||||
};
|
||||
|
||||
typedef enum clk_value_type_t {
|
||||
T_VOID,
|
||||
T_CHAR,
|
||||
T_SHORT,
|
||||
T_INT,
|
||||
T_LONG,
|
||||
T_FLOAT,
|
||||
T_DOUBLE,
|
||||
T_POINTER,
|
||||
T_CHAR2,
|
||||
T_CHAR3,
|
||||
T_CHAR4,
|
||||
T_CHAR8,
|
||||
T_CHAR16,
|
||||
T_SHORT2,
|
||||
T_SHORT3,
|
||||
T_SHORT4,
|
||||
T_SHORT8,
|
||||
T_SHORT16,
|
||||
T_INT2,
|
||||
T_INT3,
|
||||
T_INT4,
|
||||
T_INT8,
|
||||
T_INT16,
|
||||
T_LONG2,
|
||||
T_LONG3,
|
||||
T_LONG4,
|
||||
T_LONG8,
|
||||
T_LONG16,
|
||||
T_FLOAT2,
|
||||
T_FLOAT3,
|
||||
T_FLOAT4,
|
||||
T_FLOAT8,
|
||||
T_FLOAT16,
|
||||
T_DOUBLE2,
|
||||
T_DOUBLE3,
|
||||
T_DOUBLE4,
|
||||
T_DOUBLE8,
|
||||
T_DOUBLE16,
|
||||
T_SAMPLER,
|
||||
T_SEMA,
|
||||
T_STRUCT,
|
||||
T_QUEUE,
|
||||
T_PAD
|
||||
} clk_value_type_t;
|
||||
|
||||
typedef enum clk_address_space_t {
|
||||
A_PRIVATE,
|
||||
A_LOCAL,
|
||||
A_CONSTANT,
|
||||
A_GLOBAL,
|
||||
A_REGION
|
||||
} clk_address_space_t;
|
||||
|
||||
// kernel arg access qualifier and type qualifier
|
||||
typedef enum clk_arg_qualifier_t {
|
||||
Q_NONE = 0,
|
||||
|
||||
// for image type only, access qualifier
|
||||
Q_READ = 1,
|
||||
Q_WRITE = 2,
|
||||
|
||||
// for pointer type only
|
||||
Q_CONST = 4, // pointee
|
||||
Q_RESTRICT = 8,
|
||||
Q_VOLATILE = 16, // pointee
|
||||
Q_PIPE = 32 // pipe
|
||||
|
||||
} clk_arg_qualifier_t;
|
||||
|
||||
#pragma pack(push, 4)
|
||||
struct clk_parameter_descriptor_t {
|
||||
clk_value_type_t type;
|
||||
clk_address_space_t space;
|
||||
uint qualifier;
|
||||
const char* name;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
//#define CLK_LOCAL_MEM_FENCE (1 << 0)
|
||||
//#define CLK_GLOBAL_MEM_FENCE (1 << 1)
|
||||
|
||||
struct clk_builtins_t {
|
||||
/* Synchronization functions */
|
||||
void (*barrier_ptr)(cl_mem_fence_flags flags);
|
||||
|
||||
/* AMD Only builtins: FIXME_lmoriche (extension) */
|
||||
void* reserved;
|
||||
int (*printf_ptr)(const char* format, ...);
|
||||
};
|
||||
|
||||
enum clk_natures_t { KN_HAS_BARRIER = 1 << 0, KN_WG_LEVEL = 1 << 1 };
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4200)
|
||||
#endif
|
||||
|
||||
#if !defined(__OPENCL_VERSION__) || __OPENCL_VERSION__ >= 200
|
||||
|
||||
typedef struct clk_pipe_t {
|
||||
size_t read_idx;
|
||||
size_t write_idx;
|
||||
size_t end_idx;
|
||||
char padding[128 - 3 * sizeof(size_t)];
|
||||
char packets[];
|
||||
} clk_pipe_t;
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#endif /*CL_KERNEL_H_*/
|
||||
@@ -0,0 +1,189 @@
|
||||
/* Copyright (c) 2009-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef __CL_PROFILE_AMD_H
|
||||
#define __CL_PROFILE_AMD_H
|
||||
|
||||
#include "CL/cl_platform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
typedef struct _cl_perfcounter_amd* cl_perfcounter_amd;
|
||||
typedef cl_ulong cl_perfcounter_property;
|
||||
typedef cl_uint cl_perfcounter_info;
|
||||
|
||||
/* cl_perfcounter_info */
|
||||
enum PerfcounterInfo {
|
||||
CL_PERFCOUNTER_NONE = 0x0,
|
||||
CL_PERFCOUNTER_REFERENCE_COUNT = 0x1,
|
||||
CL_PERFCOUNTER_DATA = 0x2,
|
||||
CL_PERFCOUNTER_GPU_BLOCK_INDEX = 0x3,
|
||||
CL_PERFCOUNTER_GPU_COUNTER_INDEX = 0x4,
|
||||
CL_PERFCOUNTER_GPU_EVENT_INDEX = 0x5,
|
||||
CL_PERFCOUNTER_LAST
|
||||
};
|
||||
|
||||
/*********************************
|
||||
* Set device clock mode data
|
||||
*********************************/
|
||||
enum cl_DeviceClockMode_AMD {
|
||||
CL_DEVICE_CLOCK_MODE_DEFAULT_AMD = 0x0, /*Device clocks and other power settings are restored to default*/
|
||||
CL_DEVICE_CLOCK_MODE_QUERY_AMD = 0x1, /*Queries the current device clock ratios. Leaves the clock mode of the device unchanged*/
|
||||
CL_DEVICE_CLOCK_MODE_PROFILING_AMD = 0x2, /*Scale down from peak ratio*/
|
||||
CL_DEVICE_CLOCK_MODE_MINIMUMMEMORY_AMD = 0x3, /* Memory clock is set to the lowest available level*/
|
||||
CL_DEVICE_CLOCK_MODE_MINIMUMENGINE_AMD = 0x4, /*Engine clock is set to the lowest available level*/
|
||||
CL_DEVICE_CLOCK_MODE_PEAK_AMD = 0x5, /*Clocks set to maximum when possible. Fan set to maximum.*/
|
||||
CL_DEVICE_CLOCK_MODE_QUERYPROFILING_AMD = 0x6, /*Queries the profiling device clock ratios. Leaves the clock mode of the device unchanged*/
|
||||
CL_DEVICE_CLOCK_MODE_QUERYPEAK_AMD = 0x7, /*Queries the peak device clock ratios.Leaves the clock mode of the device unchanged*/
|
||||
CL_DEVICE_CLOCK_MODE_COUNT_AMD = 0x8, /*Maxmium count of device clock mode*/
|
||||
};
|
||||
|
||||
typedef struct _cl_set_device_clock_mode_input_amd
|
||||
{
|
||||
/* specify the clock mode for AMD GPU device*/
|
||||
cl_DeviceClockMode_AMD clock_mode;
|
||||
} cl_set_device_clock_mode_input_amd;
|
||||
|
||||
typedef struct _cl_set_device_clock_mode_output_amd
|
||||
{
|
||||
/*Ratio of current mem clock to peak clock as obtained from DeviceProperties::maxGpuClock*/
|
||||
cl_float memory_clock_ratio_to_peak;
|
||||
/*Ratio of current gpu core clock to peak clock as obtained from DeviceProperties::maxGpuClock*/
|
||||
cl_float engine_clock_ratio_to_peak;
|
||||
} cl_set_device_clock_mode_output_amd;
|
||||
|
||||
/*! \brief Creates a new HW performance counter
|
||||
* for the specified OpenCL context.
|
||||
*
|
||||
* \param device must be a valid OpenCL device.
|
||||
*
|
||||
* \param properties the list of properties of the hardware counter
|
||||
*
|
||||
* \param errcode_ret A non zero value if OpenCL failed to create PerfCounter
|
||||
* - CL_SUCCESS if the function is executed successfully.
|
||||
* - CL_INVALID_CONTEXT if the specified context is invalid.
|
||||
* - CL_OUT_OF_RESOURCES if we couldn't create the object
|
||||
*
|
||||
* \return the created perfcounter object
|
||||
*/
|
||||
extern CL_API_ENTRY cl_perfcounter_amd CL_API_CALL clCreatePerfCounterAMD(
|
||||
cl_device_id /* device */, cl_perfcounter_property* /* properties */, cl_int* /* errcode_ret */
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/*! \brief Destroy a performance counter object.
|
||||
*
|
||||
* \param perf_counter the perfcounter object for release
|
||||
*
|
||||
* \return A non zero value if OpenCL failed to release PerfCounter
|
||||
* - CL_SUCCESS if the function is executed successfully.
|
||||
* - CL_INVALID_OPERATION if we failed to release the object
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clReleasePerfCounterAMD(cl_perfcounter_amd /* perf_counter */
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/*! \brief Increments the perfcounter object reference count.
|
||||
*
|
||||
* \param perf_counter the perfcounter object for retain
|
||||
*
|
||||
* \return A non zero value if OpenCL failed to retain PerfCounter
|
||||
* - CL_SUCCESS if the function is executed successfully.
|
||||
* - CL_INVALID_OPERATION if we failed to release the object
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clRetainPerfCounterAMD(cl_perfcounter_amd /* perf_counter */
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/*! \brief Enqueues the begin command for the specified counters.
|
||||
*
|
||||
* \param command_queue must be a valid OpenCL command queue.
|
||||
*
|
||||
* \param num_perf_counters the number of perfcounter objects in the array.
|
||||
*
|
||||
* \param perf_counters specifies an array of perfcounter objects.
|
||||
*
|
||||
* \return A non zero value if OpenCL failed to release PerfCounter
|
||||
* - CL_SUCCESS if the function is executed successfully.
|
||||
* - CL_INVALID_OPERATION if we failed to enqueue the begin operation
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBeginPerfCounterAMD(
|
||||
cl_command_queue /* command_queue */, cl_uint /* num_perf_counters */,
|
||||
cl_perfcounter_amd* /* perf_counters */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/*! \brief Enqueues the end command for the specified counters.
|
||||
*
|
||||
* \param command_queue must be a valid OpenCL command queue.
|
||||
*
|
||||
* \param num_perf_counters the number of perfcounter objects in the array.
|
||||
*
|
||||
* \param perf_counters specifies an array of perfcounter objects.
|
||||
*
|
||||
* \param event the event object associated with the end operation.
|
||||
*
|
||||
* \return A non zero value if OpenCL failed to release PerfCounter
|
||||
* - CL_SUCCESS if the function is executed successfully.
|
||||
* - CL_INVALID_OPERATION if we failed to enqueue the end operation
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueEndPerfCounterAMD(
|
||||
cl_command_queue /* command_queue */, cl_uint /* num_perf_counters */,
|
||||
cl_perfcounter_amd* /* perf_counters */, cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */, cl_event* /* event */
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/*! \brief Retrieves the results from the counter objects.
|
||||
*
|
||||
* \param perf_counter specifies a perfcounter objects for query.
|
||||
*
|
||||
* \param param_name specifies the information to query.
|
||||
*
|
||||
* \param param_value is a pointer to memory where the appropriate result
|
||||
* being queried is returned. If \a param_value is NULL, it is ignored.
|
||||
*
|
||||
* \param param_value_size is used to specify the size in bytes of memory
|
||||
* pointed to by \a param_value. This size must be >= size of return type.
|
||||
*
|
||||
* \param param_value_size_ret returns the actual size in bytes of data copied
|
||||
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
|
||||
*
|
||||
* \param values must be a valid pointer to an array of 64-bit values
|
||||
* and the array size must be equal to num_perf_counters.
|
||||
*
|
||||
* \return
|
||||
* - CL_SUCCESS if the function is executed successfully.
|
||||
* - CL_PROFILING_INFO_NOT_AVAILABLE if event isn't finished.
|
||||
* - CL_INVALID_OPERATION if we failed to get the data
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clGetPerfCounterInfoAMD(
|
||||
cl_perfcounter_amd /* perf_counter */, cl_perfcounter_info /* param_name */,
|
||||
size_t /* param_value_size */, void* /* param_value */, size_t* /* param_value_size_ret */
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clSetDeviceClockModeAMD(
|
||||
cl_device_id /* device*/, cl_set_device_clock_mode_input_amd /* Clock_Mode_Input */,
|
||||
cl_set_device_clock_mode_output_amd* /* Clock_Mode_Output */
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /*extern "C"*/
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
#endif /*__CL_PROFILE_AMD_H*/
|
||||
@@ -0,0 +1,363 @@
|
||||
/* Copyright (c) 2012-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef __CL_THREAD_TRACE_AMD_H
|
||||
#define __CL_THREAD_TRACE_AMD_H
|
||||
|
||||
#include "CL/cl_platform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
typedef struct _cl_threadtrace_amd* cl_threadtrace_amd;
|
||||
typedef cl_uint cl_thread_trace_param;
|
||||
typedef cl_uint cl_threadtrace_info;
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_THREAD_TRACE_MEM 0x4500
|
||||
#define CL_COMMAND_THREAD_TRACE 0x4501
|
||||
|
||||
/* cl_threadtrace_command_name_amd enumeration */
|
||||
typedef enum _cl_threadtrace_command_name_amd {
|
||||
CL_THREAD_TRACE_BEGIN_COMMAND,
|
||||
CL_THREAD_TRACE_END_COMMAND,
|
||||
CL_THREAD_TRACE_PAUSE_COMMAND,
|
||||
CL_THREAD_TRACE_RESUME_COMMAND
|
||||
} cl_threadtrace_command_name_amd;
|
||||
|
||||
// Thread trace parameters
|
||||
enum ThreadTraceParameter {
|
||||
CL_THREAD_TRACE_PARAM_TOKEN_MASK,
|
||||
CL_THREAD_TRACE_PARAM_REG_MASK,
|
||||
CL_THREAD_TRACE_PARAM_COMPUTE_UNIT_TARGET,
|
||||
CL_THREAD_TRACE_PARAM_SHADER_ARRAY_TARGET,
|
||||
CL_THREAD_TRACE_PARAM_SIMD_MASK,
|
||||
CL_THREAD_TRACE_PARAM_VM_ID_MASK,
|
||||
CL_THREAD_TRACE_PARAM_RANDOM_SEED,
|
||||
CL_THREAD_TRACE_PARAM_CAPTURE_MODE,
|
||||
CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK,
|
||||
CL_THREAD_TRACE_PARAM_USER_DATA,
|
||||
CL_THREAD_TRACE_PARAM_IS_WRAPPED
|
||||
};
|
||||
|
||||
// CL_THREAD_TRACE_PARAM_TOKEN_MASK data selects for SI
|
||||
enum CL_THREAD_TRACE_TOKEN_MASK {
|
||||
// Time passed
|
||||
CL_THREAD_TRACE_TOKEN_MASK_TIME_SI = 0x00000001,
|
||||
// Resync the timestamp
|
||||
CL_THREAD_TRACE_TOKEN_MASK_TIMESTAMP_SI = 0x00000002,
|
||||
// A register write has occurred
|
||||
CL_THREAD_TRACE_TOKEN_MASK_REG_SI = 0x00000004,
|
||||
// A wavefront has started
|
||||
CL_THREAD_TRACE_TOKEN_MASK_WAVE_START_SI = 0x00000008,
|
||||
// Output space has been allocated for color/Z [Should be used for cl-gl]
|
||||
CL_THREAD_TRACE_TOKEN_MASK_WAVE_PS_ALLOC_SI = 0x00000010,
|
||||
// Output space has been allocated for vertex position [Should be used for cl-gl]
|
||||
CL_THREAD_TRACE_TOKEN_MASK_WAVE_VS_ALLOC_SI = 0x00000020,
|
||||
// Wavefront completion
|
||||
CL_THREAD_TRACE_TOKEN_MASK_WAVE_END_SI = 0x00000040,
|
||||
// An event has reached the top of a shader stage. In-order with WAVE_START
|
||||
CL_THREAD_TRACE_TOKEN_MASK_EVENT_SI = 0x00000080,
|
||||
// An event has reached the top of a compute shader stage. In-order with WAVE_START
|
||||
CL_THREAD_TRACE_TOKEN_MASK_EVENT_CS_SI = 0x00000100,
|
||||
// An event has reached the top of a shader stage for the second GFX pipe. In-order with
|
||||
// WAVE_START.
|
||||
//[Should be used for cl-gl]
|
||||
CL_THREAD_TRACE_TOKEN_MASK_EVENT_GFX_SI = 0x00000200,
|
||||
// The kernel has executed an instruction
|
||||
CL_THREAD_TRACE_TOKEN_MASK_INST_SI = 0x00000400,
|
||||
// The kernel has explicitly written the PC value
|
||||
CL_THREAD_TRACE_TOKEN_MASK_INST_PC_SI = 0x00000800,
|
||||
// The kernel has written user data into the thread trace buffer
|
||||
CL_THREAD_TRACE_TOKEN_MASK_INST_USERDATA_SI = 0x00001000,
|
||||
// Provides information about instruction scheduling
|
||||
CL_THREAD_TRACE_TOKEN_MASK_ISSUE_SI = 0x00002000,
|
||||
// The performance counter delta has been updated
|
||||
CL_THREAD_TRACE_TOKEN_MASK_PERF_SI = 0x00004000,
|
||||
// A miscellaneous event has been sent
|
||||
CL_THREAD_TRACE_TOKEN_MASK_MISC_SI = 0x00008000,
|
||||
// All possible tokens
|
||||
CL_THREAD_TRACE_TOKEN_MASK_ALL_SI = 0x0000ffff,
|
||||
};
|
||||
|
||||
// CL_THREAD_TRACE_PARAM_REG_MASK data selects
|
||||
enum CL_THREAD_TRACE_REG_MASK {
|
||||
// Event initiator
|
||||
CL_THREAD_TRACE_REG_MASK_EVENT_SI = 0x00000001,
|
||||
// Draw initiator [Should be used for cl-gl]
|
||||
CL_THREAD_TRACE_REG_MASK_DRAW_SI = 0x00000002,
|
||||
// Dispatch initiator
|
||||
CL_THREAD_TRACE_REG_MASK_DISPATCH_SI = 0x00000004,
|
||||
// User data from host
|
||||
CL_THREAD_TRACE_REG_MASK_USERDATA_SI = 0x00000008,
|
||||
// GFXDEC register (8-state) [Should be used for cl-gl]
|
||||
CL_THREAD_TRACE_REG_MASK_GFXDEC_SI = 0x00000020,
|
||||
// SHDEC register (many state)
|
||||
CL_THREAD_TRACE_REG_MASK_SHDEC_SI = 0x00000040,
|
||||
// Other registers
|
||||
CL_THREAD_TRACE_REG_MASK_OTHER_SI = 0x00000080,
|
||||
// All possible registers types
|
||||
CL_THREAD_TRACE_REG_MASK_ALL_SI = 0x000000ff,
|
||||
};
|
||||
|
||||
// CL_THREAD_TRACE_PARAM_VM_ID_MASK data selects
|
||||
enum CL_THREAD_TRACE_VM_ID_MASK {
|
||||
// Capture only data from the VM_ID used to write {SQTT}_BASE
|
||||
CL_THREAD_TRACE_VM_ID_MASK_SINGLE = 0,
|
||||
// Capture all data from all VM_IDs
|
||||
CL_THREAD_TRACE_VM_ID_MASK_ALL = 1,
|
||||
// Capture all data but only get target (a.k.a. detail) data from VM_ID used to write {SQTT}_BASE
|
||||
CL_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL = 2
|
||||
};
|
||||
|
||||
// CL_THREAD_TRACE_PARAM_CAPTURE_MODE data
|
||||
enum CL_THREAD_TRACE_CAPTURE_MODE {
|
||||
// Capture all data in the thread trace buffer
|
||||
CL_THREAD_TRACE_CAPTURE_ALL = 0,
|
||||
// Capture only data between THREAD_TRACE_START and THREAD_TRACE_STOP events
|
||||
CL_THREAD_TRACE_CAPTURE_SELECT = 1,
|
||||
// Capture data between THREAD_TRACE_START and THREAD_TRACE_/STOP events,
|
||||
// and global/reference data at all times
|
||||
CL_THREAD_TRACE_CAPTURE_SELECT_DETAIL = 2
|
||||
};
|
||||
|
||||
// CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK data selects
|
||||
enum CL_THREAD_TRACE_INSTRUCTION_MASK {
|
||||
// Generate {SQTT}_TOKEN_INST tokens for all instructions
|
||||
CL_THREAD_TRACE_INST_MASK_ALL,
|
||||
// Generate {SQTT}_TOKEN_INST tokens for stalled instructions only
|
||||
CL_THREAD_TRACE_INST_MASK_STALLED,
|
||||
// Generate {SQTT}_TOKEN_INST messages for stalled and other (no op/wait/set prio/etc)
|
||||
// instructions
|
||||
CL_THREAD_TRACE_INST_MASK_STALLED_AND_IMMEDIATE,
|
||||
// Generate {SQTT}_TOKEN_INST messages for immediate instructions only only [ Should be used only
|
||||
// for CI]
|
||||
CL_THREAD_TRACE_INST_MASK_IMMEDIATE_CI,
|
||||
};
|
||||
|
||||
enum ThreadTraceInfo {
|
||||
CL_THREAD_TRACE_SE,
|
||||
CL_THREAD_TRACE_BUFFERS_FILLED,
|
||||
CL_THREAD_TRACE_BUFFERS_SIZE
|
||||
};
|
||||
|
||||
|
||||
/*! \brief Creates a new cl_threadtrace_amd object
|
||||
*
|
||||
* \param device must be a valid OpenCL device.
|
||||
*
|
||||
* \param errcode_ret A non zero value if OpenCL failed to create threadTrace
|
||||
* -CL_INVALID_DEVICE if devices contains an invalid device.
|
||||
* -CL_DEVICE_NOT_AVAILABLE if a device is currently not available even
|
||||
* though the device was returned by clGetDeviceIDs.
|
||||
* -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the
|
||||
* OpenCL implementation on the device.
|
||||
* -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the
|
||||
OpenCL implementation on the host.
|
||||
*
|
||||
* \return the created threadTrace object
|
||||
*/
|
||||
extern CL_API_ENTRY cl_threadtrace_amd CL_API_CALL clCreateThreadTraceAMD(
|
||||
cl_device_id /* device */, cl_int* /* errcode_ret */
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/*! \brief Destroys a cl_threadtrace_amd object.
|
||||
*
|
||||
* \param threadTrace the cl_threadtrace_amd object for release
|
||||
*
|
||||
* \return A non zero value if OpenCL failed to release threadTrace
|
||||
* -CL_INVALID_VALUE if the thread_trace is not a valid OpenCL thread trace object
|
||||
(cl_threadtrace_amd) .
|
||||
* -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the
|
||||
* OpenCL implementation on the device.
|
||||
* -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the
|
||||
OpenCL implementation on the host.
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clReleaseThreadTraceAMD(cl_threadtrace_amd /* threadTrace */
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/*! \brief Increments the cl_threadtrace_amd object reference count.
|
||||
*
|
||||
* \param threadTrace the cl_threadtrace_amd object for retain
|
||||
*
|
||||
* \return A non zero value if OpenCL failed to retain threadTrace
|
||||
* -CL_INVALID_VALUE if the thread_trace is not a valid thread trace object (cl_threadtrace_amd) .
|
||||
* -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the
|
||||
OpenCL implementation on the device.
|
||||
* -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the
|
||||
OpenCL implementation on the host.
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clRetainThreadTraceAMD(cl_threadtrace_amd /* threadTrace */
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/*! \brief Sets the cl_threadtrace_amd object configuration parameter.
|
||||
*
|
||||
* \param thread_trace the cl_threadtrace_amd object to set configuration parameter
|
||||
*
|
||||
* \param config_param the cl_thread_trace_param
|
||||
*
|
||||
* \param param_value corresponding to configParam
|
||||
*
|
||||
* \return A non zero value if OpenCL failed to set threadTrace buffer parameter
|
||||
* - CL_INVALID_VALUE if the thread_trace is invalid thread trace object.
|
||||
* - CL_INVALID_VALUE if the invalid config_param or param_value enum values , are used.
|
||||
* - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or
|
||||
event_wait_list is not NULL and num_events_in_wait_list is 0,
|
||||
* - or if event objects in event_wait_list are not valid events.
|
||||
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL
|
||||
implementation on the device.
|
||||
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the
|
||||
OpenCL implementation on the host.
|
||||
*/
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clSetThreadTraceParamAMD(
|
||||
cl_threadtrace_amd /*thread_trace*/, cl_thread_trace_param /*config_param*/,
|
||||
cl_uint /*param_value*/
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* \brief Enqueues the binding command to bind cl_threadtrace_amd to cl_mem object for trace
|
||||
* recording..
|
||||
*
|
||||
* \param command_queue must be a valid OpenCL command queue.
|
||||
*
|
||||
* \param thread_trace specifies the cl_threadtrace_amd object.
|
||||
*
|
||||
* \param mem_objects the cl_mem objects for trace recording
|
||||
*
|
||||
* \param mem_objects_num the number of cl_mem objects in the mem_objects
|
||||
*
|
||||
* \param buffer_size the size of each cl_mem object from mem_objects
|
||||
*
|
||||
* \param event_wait_list specify [is a pointer to] events that need to
|
||||
* complete before this particular command can be executed.
|
||||
* If \a event_wait_list is NULL, then this particular command does not wait
|
||||
* on any event to complete. If \a event_wait_list is NULL,
|
||||
* \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
|
||||
* the list of events pointed to by \a event_wait_list must be valid and
|
||||
* \a num_events_in_wait_list must be greater than 0. The events specified in
|
||||
* \a event_wait_list act as synchronization points.
|
||||
*
|
||||
* \param num_events_in_wait_list specify the number of events in
|
||||
* \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be
|
||||
* greater than 0 if \a event_wait_list is not NULL.
|
||||
*
|
||||
* \param event returns an event object that identifies this particular
|
||||
* command and can be used to query or queue a wait for this particular
|
||||
* command to complete. \a event can be NULL in which case it will not be
|
||||
* possible for the application to query the status of this command or queue a
|
||||
* wait for this command to complete.
|
||||
* \return A non zero value if OpenCL failed to set threadTrace buffer parameter
|
||||
* - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue.
|
||||
* - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list
|
||||
* are not the same.
|
||||
* - CL_INVALID_VALUE if the thread_trace is invalid thread trace object.
|
||||
* - CL_INVALID_VALUE if the buffer_size is negative or zero.
|
||||
* - CL_INVALID_VALUE if the sub_buffers_num I less than 1.
|
||||
* - CL_INVALID_OPERATION if the mem_objects_num is not equal to the number of Shader Engines of
|
||||
* the [GPU] device.
|
||||
* - CL_INVALID_MEM_OBJECT if one on memory objects in the mem_objects array is not a valid memory
|
||||
* object or memory_objects is NULL.
|
||||
* - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for the data store
|
||||
* associated from the memory objects of the mem_objects array.
|
||||
* - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or
|
||||
* event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in
|
||||
* event_wait_list are not valid events.
|
||||
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL
|
||||
* implementation on the device.
|
||||
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the
|
||||
* OpenCL implementation on the host.
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBindThreadTraceBufferAMD(
|
||||
cl_command_queue command_queue, cl_threadtrace_amd /*thread_trace*/, cl_mem* /*mem_objects*/,
|
||||
cl_uint /*mem_objects_num*/, cl_uint /*buffer_size*/, cl_uint /*num_events_in_wait_list*/,
|
||||
const cl_event* /*event_wait_list*/, cl_event* /*event*/
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/*! \brief Get specific information about the OpenCL Thread Trace.
|
||||
*
|
||||
* \param thread_trace_info_param is an enum that identifies the Thread Trace information being
|
||||
* queried.
|
||||
*
|
||||
* \param param_value is a pointer to memory location where appropriate values
|
||||
* for a given \a threadTrace_info_param will be returned. If \a param_value is NULL,
|
||||
* it is ignored.
|
||||
*
|
||||
* \param param_value_size specifies the size in bytes of memory pointed to by
|
||||
* \a param_value. This size in bytes must be >= size of return type.
|
||||
*
|
||||
* \param param_value_size_ret returns the actual size in bytes of data being
|
||||
* queried by param_value. If \a param_value_size_ret is NULL, it is ignored.
|
||||
*
|
||||
* \return One of the following values:
|
||||
* CL_INVALID_OPERATION if cl_threadtrace_amd object is not valid
|
||||
* - CL_INVALID_VALUE if \a param_name is not one of the supported
|
||||
* values or if size in bytes specified by \a param_value_size is < size of
|
||||
* return type and \a param_value is not a NULL value.
|
||||
* CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the
|
||||
* OpenCL implementation on the host.
|
||||
* CL_SUCCESS if the function is executed successfully.
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clGetThreadTraceInfoAMD(
|
||||
cl_threadtrace_amd /* thread_trace */, cl_threadtrace_info /*thread_trace_info_param*/,
|
||||
size_t /*param_value_size*/, void* /*param_value*/, size_t* /*param_value_size_ret*/
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/*! \brief Enqueues the thread trace command for the specified thread trace object.
|
||||
*
|
||||
* \param command_queue must be a valid OpenCL command queue.
|
||||
*
|
||||
* \param threadTraces specifies an array of cl_threadtrace_amd objects.
|
||||
*
|
||||
* \return A non zero value if OpenCL failed to release threadTrace
|
||||
* - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue.
|
||||
* - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list
|
||||
* are not the same.
|
||||
* - CL_INVALID_VALUE if the thread_trace is invalid thread trace object .
|
||||
* - CL_INVALID_VALUE if the invalid command name enum value , not described in the
|
||||
* cl_threadtrace_command_name_amd, is used.
|
||||
* - CL_INVALID_OPERATION if the command enqueue failed. It can happen in the following cases:
|
||||
* o BEGIN_COMMAND is queued for thread trace object for which memory object/s was/were not
|
||||
* bound..
|
||||
* o END_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not queued.
|
||||
* o PAUSE_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not
|
||||
* queued.
|
||||
* o RESUME_COMMAND is queued for thread trace object, for which PAUSE_COMMAND was not
|
||||
* queued.
|
||||
* - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or
|
||||
* event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in
|
||||
* event_wait_list are not valid events.
|
||||
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL
|
||||
* implementation on the device.
|
||||
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL
|
||||
* implementation on the host.
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueThreadTraceCommandAMD(
|
||||
cl_command_queue /*command_queue*/, cl_threadtrace_amd /*thread_trace*/,
|
||||
cl_threadtrace_command_name_amd /*command_name*/, cl_uint /*num_events_in_wait_list*/,
|
||||
const cl_event* /*event_wait_list*/, cl_event* /*event*/
|
||||
) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /*extern "C"*/
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
#endif /*__CL_THREAD_TRACE_AMD_H*/
|
||||
@@ -0,0 +1,64 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
GLPREFIX(GLubyte*, glGetString, (GLenum name))
|
||||
|
||||
GLPREFIX(void, glBindBuffer, (GLenum target, GLuint buffer))
|
||||
//GLPREFIX(void, glBindFramebufferEXT, (GLenum target, GLuint framebuffer))
|
||||
GLPREFIX(void, glBindRenderbuffer, (GLenum target, GLuint renderbuffer))
|
||||
GLPREFIX(void, glBindTexture, (GLenum target, GLuint texture))
|
||||
GLPREFIX(void, glBufferData, (GLenum target, GLsizeiptr size, const GLvoid* data, GLenum usage))
|
||||
|
||||
GLPREFIX(GLenum, glCheckFramebufferStatusEXT, (GLenum target))
|
||||
|
||||
GLPREFIX(void, glDeleteBuffers, (GLsizei n, const GLuint* buffers))
|
||||
GLPREFIX(void, glDrawPixels, (GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels))
|
||||
|
||||
//GLPREFIX(void, glFramebufferRenderbufferEXT, (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer))
|
||||
|
||||
GLPREFIX(void, glGenBuffers, (GLsizei n, GLuint* buffers))
|
||||
//GLPREFIX(void, glGenFramebuffersEXT, (GLsizei n, GLuint* framebuffers))
|
||||
//10
|
||||
GLPREFIX(void, glGetBufferParameteriv, (GLenum target, GLenum pname, GLint* params))
|
||||
GLPREFIX(GLenum, glGetError, (void))
|
||||
GLPREFIX(void, glFinish, (void))
|
||||
GLPREFIX(void, glFlush, (void))
|
||||
GLPREFIX(GLenum, glClientWaitSync, (GLsync sync, GLbitfield flags, GLuint64 timeout))
|
||||
GLPREFIX(void, glGetIntegerv, (GLenum pname, GLint *params))
|
||||
GLPREFIX(void, glGetRenderbufferParameterivEXT, (GLenum target, GLenum pname, GLint* params))
|
||||
//GLPREFIX(GLubyte*, glGetString, (GLenum name))
|
||||
GLPREFIX(void, glGetTexImage, (GLenum target, GLint level, GLenum format, GLenum type, GLvoid *pixels))
|
||||
GLPREFIX(void, glGetTexLevelParameteriv, (GLenum target, GLint level, GLenum pname, GLint *params))
|
||||
GLPREFIX(void, glGetTexParameteriv, (GLenum target, GLenum pname, GLint *params))
|
||||
|
||||
GLPREFIX(GLboolean, glIsBuffer, (GLuint buffer))
|
||||
GLPREFIX(GLboolean, glIsRenderbufferEXT, (GLuint renderbuffer))
|
||||
GLPREFIX(GLboolean, glIsTexture, (GLuint texture))
|
||||
//20
|
||||
GLPREFIX(GLvoid*, glMapBuffer, (GLenum target, GLenum access))
|
||||
|
||||
GLPREFIX(void, glReadPixels, (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid *pixels))
|
||||
|
||||
GLPREFIX(void, glTexImage2D, (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels))
|
||||
GLPREFIX(void, glTexImage3D, (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels))
|
||||
|
||||
GLPREFIX(GLboolean, glUnmapBuffer, (GLenum target))
|
||||
|
||||
#undef GLPREFIX
|
||||
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2019 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* OpenCL is a trademark of Apple Inc. used under license by Khronos.
|
||||
*/
|
||||
|
||||
#ifndef _ICD_DISPATCH_H_
|
||||
#define _ICD_DISPATCH_H_
|
||||
|
||||
#ifndef CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||
#endif
|
||||
|
||||
#ifndef CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#endif
|
||||
|
||||
#ifndef CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#endif
|
||||
|
||||
#ifndef CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#endif
|
||||
|
||||
// cl.h
|
||||
#include <CL/cl.h>
|
||||
|
||||
// cl_gl.h and required files
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <d3d9.h>
|
||||
#include <d3d10_1.h>
|
||||
#include <CL/cl_d3d10.h>
|
||||
#include <CL/cl_d3d11.h>
|
||||
#include <CL/cl_dx9_media_sharing.h>
|
||||
#endif
|
||||
#include <CL/cl_gl.h>
|
||||
#include <CL/cl_gl_ext.h>
|
||||
#include <CL/cl_ext.h>
|
||||
#include <CL/cl_egl.h>
|
||||
#include <CL/cl_icd.h>
|
||||
|
||||
/*
|
||||
*
|
||||
* vendor dispatch table structure
|
||||
*
|
||||
*/
|
||||
|
||||
struct _cl_platform_id
|
||||
{
|
||||
cl_icd_dispatch *dispatch;
|
||||
};
|
||||
|
||||
struct _cl_device_id
|
||||
{
|
||||
cl_icd_dispatch *dispatch;
|
||||
};
|
||||
|
||||
struct _cl_context
|
||||
{
|
||||
cl_icd_dispatch *dispatch;
|
||||
};
|
||||
|
||||
struct _cl_command_queue
|
||||
{
|
||||
cl_icd_dispatch *dispatch;
|
||||
};
|
||||
|
||||
struct _cl_mem
|
||||
{
|
||||
cl_icd_dispatch *dispatch;
|
||||
};
|
||||
|
||||
struct _cl_program
|
||||
{
|
||||
cl_icd_dispatch *dispatch;
|
||||
};
|
||||
|
||||
struct _cl_kernel
|
||||
{
|
||||
cl_icd_dispatch *dispatch;
|
||||
};
|
||||
|
||||
struct _cl_event
|
||||
{
|
||||
cl_icd_dispatch *dispatch;
|
||||
};
|
||||
|
||||
struct _cl_sampler
|
||||
{
|
||||
cl_icd_dispatch *dispatch;
|
||||
};
|
||||
|
||||
#endif // _ICD_DISPATCH_H_
|
||||
|
||||
+27
-14
@@ -222,12 +222,12 @@ if ($HIP_PLATFORM eq "clang") {
|
||||
$HIPCXXFLAGS .= " -Xclang -fallow-half-arguments-and-returns -D__HIP_HCC_COMPAT_MODE__=1";
|
||||
}
|
||||
|
||||
if ($HIP_RUNTIME eq "HCC" ) {
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa";
|
||||
$HIPCXXFLAGS .= " -isystem $HSA_PATH/include";
|
||||
$HIPCFLAGS .= " -isystem $HSA_PATH/include";
|
||||
} else {
|
||||
$HIPCXXFLAGS .= " -fhip-new-launch-api";
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa";
|
||||
$HIPCXXFLAGS .= " -isystem $HSA_PATH/include";
|
||||
$HIPCFLAGS .= " -isystem $HSA_PATH/include";
|
||||
if (!($HIP_RUNTIME eq "HCC")) {
|
||||
$HIPCXXFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api";
|
||||
$HIPCFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api";
|
||||
}
|
||||
|
||||
} elsif ($HIP_PLATFORM eq "hcc") {
|
||||
@@ -245,6 +245,9 @@ if ($HIP_PLATFORM eq "clang") {
|
||||
$HCC_VERSION_MAJOR=$HCC_VERSION;
|
||||
$HCC_VERSION_MAJOR=~s/\..*//;
|
||||
|
||||
$HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'} // 1;
|
||||
$marker_path = "$ROCM_PATH/profiler/CXLActivityLogger";
|
||||
|
||||
# HCC* may be used to compile src/hip_hcc.o (and also feed the HIPCXXFLAGS below)
|
||||
$HCC = "$HCC_HOME/bin/hcc";
|
||||
$HCCFLAGS = "-hc -D__HIPCC__ -isystem $HCC_HOME/include ";
|
||||
@@ -292,6 +295,20 @@ if ($HIP_PLATFORM eq "clang") {
|
||||
$HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am ";
|
||||
# $HIPLDFLAGS .= " -L$HCC_HOME/compiler/lib -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMMC -lLLVMCore -lLLVMSupport ";
|
||||
|
||||
# Add trace marker library:
|
||||
# TODO - once we cleanly separate the HIP API headers from HIP library headers this logic should move to CMakebuild option - apps do not need to see the marker library.
|
||||
if ($HIP_ATP_MARKER) {
|
||||
$marker_inc_path = "$marker_path/include";
|
||||
if (-e $marker_inc_path) {
|
||||
$HIPCXXFLAGS .= " -isystem $marker_inc_path";
|
||||
}
|
||||
}
|
||||
|
||||
$marker_lib_path = "$marker_path/bin/x86_64";
|
||||
if (-e $marker_lib_path) {
|
||||
$HIPLDFLAGS .= " -L$marker_lib_path -lCXLActivityLogger -Wl,--rpath=$marker_lib_path";
|
||||
}
|
||||
|
||||
if (not $isWindows) {
|
||||
$HIPLDFLAGS .= " -lm";
|
||||
}
|
||||
@@ -336,7 +353,10 @@ my $runCmd = 1;
|
||||
my $buildDeps = 0;
|
||||
my $linkType = 1;
|
||||
my $setLinkType = 0;
|
||||
my $coFormatv3 = 0;
|
||||
my $coFormatv3 = 1;
|
||||
if(defined $HIP_COMPILER and $HIP_COMPILER eq "hcc") {
|
||||
$coFormatv3 = 0;
|
||||
}
|
||||
my $funcSupp = 0; # enable function support
|
||||
|
||||
my @options = ();
|
||||
@@ -474,13 +494,6 @@ foreach $arg (@ARGV)
|
||||
$optArg = $arg;
|
||||
}
|
||||
|
||||
## This is a temporary workaround for CMake detection of OpenMP support.
|
||||
## It should be removed when the OpenMP detection c++ test in CMake is updated
|
||||
## and corrected CMake version is available.
|
||||
if((defined $HIP_COMPILER) and ($HIP_COMPILER eq "clang") and ($arg eq '-fopenmp')) {
|
||||
$HIPCXXFLAGS .= " -D_OPENMP "
|
||||
}
|
||||
|
||||
## process linker response file for hip-clang
|
||||
## extract object files from static library and pass them directly to
|
||||
## hip-clang in command line.
|
||||
|
||||
@@ -223,6 +223,10 @@ sub simpleSubstitutions {
|
||||
$ft{'memory'} += s/\bcuMemcpy2DAsync\b/hipMemcpyParam2DAsync/g;
|
||||
$ft{'memory'} += s/\bcuMemcpy2DAsync_v2\b/hipMemcpyParam2DAsync/g;
|
||||
$ft{'memory'} += s/\bcuMemcpy2D_v2\b/hipMemcpyParam2D/g;
|
||||
$ft{'memory'} += s/\bcuMemcpy3D\b/hipDrvMemcpy3D/g;
|
||||
$ft{'memory'} += s/\bcuMemcpy3DAsync\b/hipDrvMemcpy3DAsync/g;
|
||||
$ft{'memory'} += s/\bcuMemcpy3D_v2\b/hipDrvMemcpy3D/g;
|
||||
$ft{'memory'} += s/\bcuMemcpy3DAsync_v2\b/hipDrvMemcpy3DAsync/g;
|
||||
$ft{'memory'} += s/\bcuMemcpyAtoH\b/hipMemcpyAtoH/g;
|
||||
$ft{'memory'} += s/\bcuMemcpyAtoH_v2\b/hipMemcpyAtoH/g;
|
||||
$ft{'memory'} += s/\bcuMemcpyDtoD\b/hipMemcpyDtoD/g;
|
||||
@@ -979,6 +983,8 @@ sub simpleSubstitutions {
|
||||
$ft{'type'} += s/\bCUDA_ARRAY_DESCRIPTOR_st\b/HIP_ARRAY_DESCRIPTOR/g;
|
||||
$ft{'type'} += s/\bCUDA_MEMCPY2D\b/hip_Memcpy2D/g;
|
||||
$ft{'type'} += s/\bCUDA_MEMCPY2D_st\b/hip_Memcpy2D/g;
|
||||
$ft{'type'} += s/\bCUDA_MEMCPY3D\b/HIP_MEMCPY3D/g;
|
||||
$ft{'type'} += s/\bCUDA_MEMCPY3D_st\b/HIP_MEMCPY3D/g;
|
||||
$ft{'type'} += s/\bCUaddress_mode\b/hipTextureAddressMode/g;
|
||||
$ft{'type'} += s/\bCUaddress_mode_enum\b/hipTextureAddressMode/g;
|
||||
$ft{'type'} += s/\bCUarray\b/hipArray */g;
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
# Try to find ROCR (Radeon Open Compute Runtime)
|
||||
#
|
||||
# Once found, this will define:
|
||||
# - ROCR_FOUND - ROCR status (found or not found)
|
||||
# - ROCR_INCLUDES - Required ROCR include directories
|
||||
# - ROCR_LIBRARIES - Required ROCR libraries
|
||||
find_path(FIND_ROCR_INCLUDES hsa.h HINTS /opt/rocm/include /opt/rocm/hsa/include PATH_SUFFIXES hsa)
|
||||
find_library(FIND_ROCR_LIBRARIES hsa-runtime64 HINTS /opt/rocm/lib /opt/rocm/hsa/lib)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(ROCR DEFAULT_MSG
|
||||
FIND_ROCR_INCLUDES FIND_ROCR_LIBRARIES)
|
||||
mark_as_advanced(FIND_ROCR_INCLUDES FIND_ROCR_LIBRARIES)
|
||||
|
||||
set(ROCR_INCLUDES ${FIND_ROCR_INCLUDES})
|
||||
set(ROCR_LIBRARIES ${FIND_ROCR_LIBRARIES})
|
||||
@@ -0,0 +1,16 @@
|
||||
# Try to find ROCT (Radeon Open Compute Thunk)
|
||||
#
|
||||
# Once found, this will define:
|
||||
# - ROCT_FOUND - ROCT status (found or not found)
|
||||
# - ROCT_INCLUDES - Required ROCT include directories
|
||||
# - ROCT_LIBRARIES - Required ROCT libraries
|
||||
find_path(FIND_ROCT_INCLUDES hsakmt.h HINTS /opt/rocm/include)
|
||||
find_library(FIND_ROCT_LIBRARIES hsakmt HINTS /opt/rocm/lib)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(ROCT DEFAULT_MSG
|
||||
FIND_ROCT_INCLUDES FIND_ROCT_LIBRARIES)
|
||||
mark_as_advanced(FIND_ROCT_INCLUDES FIND_ROCT_LIBRARIES)
|
||||
|
||||
set(ROCT_INCLUDES ${FIND_ROCT_INCLUDES})
|
||||
set(ROCT_LIBRARIES ${FIND_ROCT_LIBRARIES})
|
||||
Vendored
@@ -0,0 +1,279 @@
|
||||
# Profiling HIP Code
|
||||
|
||||
This section describes the profiling and debugging capabilities that HIP provides.
|
||||
Profiling information can viewed in the CodeXL visualization tool or printed directly to stderr as the application runs.
|
||||
This document starts with some of the general capabilities of CodeXL and then describes some of the additional HIP marker and debug features.
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
- [CodeXL Profiling](#codexl-profiling)
|
||||
* [Collecting and Viewing Traces](#collecting-and-viewing-traces)
|
||||
+ [Using rocm-profiler timestamp profiling](#using-rocm-profiler-timestamp-profiling)
|
||||
+ [Using rocm-profiler performance counter collection:](#using-rocm-profiler-performance-counter-collection)
|
||||
+ [Using CodeXL to view profiling results:](#using-codexl-to-view-profiling-results)
|
||||
+ [More information on CodeXL](#more-information-on-codexl)
|
||||
* [HIP Markers](#hip-markers)
|
||||
+ [Profiling HIP APIs](#profiling-hip-apis)
|
||||
+ [Adding markers to applications](#adding-markers-to-applications)
|
||||
* [Additional HIP Profiling Features](#additional-hip-profiling-features)
|
||||
+ [Demangling C++ Kernel Names](#demangling-c-kernel-names)
|
||||
+ [Controlling when profiling starts and ends](#controlling-when-profiling-starts-and-ends)
|
||||
+ [Reducing timeline trace output file size](#reducing-timeline-trace-output-file-size)
|
||||
+ [How to enable profiling at HIP build time](#how-to-enable-profiling-at-hip-build-time)
|
||||
- [Tracing and Debug](#tracing-and-debug)
|
||||
* [Tracing HIP APIs](#tracing-hip-apis)
|
||||
+ [Color](#color)
|
||||
|
||||
<!-- tocstop -->
|
||||
|
||||
## CodeXL Profiling
|
||||
|
||||
### Collecting and Viewing Traces
|
||||
|
||||
#### Using rocm-profiler timestamp profiling
|
||||
rocm-profiler is a command-line tool for tracing any application that uses ROCr API, including HCC and HIP.
|
||||
rocm-profiler's timeline trace will show the beginning and end for all kernel commands, data transfer commands, and HSA Runtime (ROCr) API calls. The trace results are saved into a file, which by convention uses the "atp" extension. Here is an example that shows how to run the command-line profiler:
|
||||
```shell
|
||||
$ /opt/rocm/bin/rocm-profiler -o <outputATPFileName> -A -T <applicationName> <applicationArguments>
|
||||
```
|
||||
|
||||
#### Using rocm-profiler performance counter collection:
|
||||
rocm-profiler can record performance counter information to provide greater insight inside a kernel, such as the memory bandwidth, ALU busy percentage, and cache statistics.
|
||||
Collecting the common set of useful counters requires passing the counter configuration files for two passes:
|
||||
```
|
||||
$ /opt/rocm/bin/rocm-profiler -C -O --counterfile /opt/rocm/profiler/counterfiles/counters_HSA_Fiji_pass1 --counterfile /opt/rocm/profiler/counterfiles/counters_HSA_Fiji_pass2 <applicationName> <applicationArguments>
|
||||
```
|
||||
|
||||
|
||||
#### Using CodeXL to view profiling results:
|
||||
The trace can be loaded and viewed in the CodeXL visualization tool:
|
||||
|
||||
- Open the CodeXL GUI, create an new project, and switch to "Profile Mode":
|
||||
- $ CodeXL &
|
||||
- [File->New Project, leave fields as is, just click "OK"]
|
||||
- [Profile->Switch to Profile Mode]
|
||||
- Load timestamp tracing results into a timeline view:
|
||||
- Right click on the project in the CodeXL Explorer view
|
||||
- Click "Import Session..."
|
||||
- Select to $HOME/apitrace.atp (or appropriate .atp file if you used another file name)
|
||||
|
||||
- Load the performance counter results
|
||||
- Right click on the project in the CodeXL Explorer view
|
||||
- Click "Import Session..."
|
||||
- Select $HOME/Session1.csv (or appropriate .csv file if you used another file name)
|
||||
|
||||
|
||||
#### More information on CodeXL
|
||||
rocm-profiler --help will show additional options and usage guidelines.
|
||||
|
||||
See this [blog](http://gpuopen.com/getting-up-to-speed-with-the-codexl-gpu-profiler-and-radeon-open-compute/) for more information on profiling ROCm apps (including HIP) with CodeXL.
|
||||
|
||||
The 2.2 version of Windows CodeXL does not correctly handle Linux line-endings. If you are collecting a trace on Linux and then viewing it with the 2.2 Windows CodeXL, first convert the line ending in the .atp file to Windows-style line endings.
|
||||
|
||||
### HIP Markers
|
||||
#### Profiling HIP APIs
|
||||
HIP can generate markers at function beginning and end which are displayed on the CodeXL timeline view.
|
||||
HIP 1.0 compiles marker support by default, and you can enable it by setting the HIP_PROFILE_API environment variable and then running the rocm-profiler:
|
||||
|
||||
```shell
|
||||
|
||||
# Use profile to generate timeline view:
|
||||
export HIP_PROFILE_API=1
|
||||
$ /opt/rocm/bin/rocm-profiler -A -T <applicationName> <applicationArguments>
|
||||
|
||||
Or
|
||||
$ /opt/rocm/bin/rocm-profiler -e HIP_PROFILE_API=1 -A -T <applicationName> <applicationArguments>
|
||||
```
|
||||
|
||||
HIP_PROFILE_API supports two levels of information.
|
||||
- HIP_PROFILE_API=1 : Short format. Print name of API but no arguments. For example:
|
||||
`hipMemcpy`
|
||||
- HIP_PROFILE_API=2 : Long format. Print name of API + values of all function arguments. For example:
|
||||
`hipMemcpy (0x7f32154db010, 0x50446e000, 4000000, hipMemcpyDeviceToHost)`
|
||||
|
||||
#### Adding markers to applications
|
||||
|
||||
Markers can be used to define application-specific events that will be recorded in the ATP file and displayed in the CodeXL GUI.
|
||||
This can be particularly useful for visualizing how the higher-level phases of application behavior relate to the lower level HIP APIs, kernel launches, and data transfers.
|
||||
For example, an instrumented machine learning framework could show the beginning and ending of each layer in the network.
|
||||
|
||||
Markers have a specific begin and end time, and can be nested. Nested calls are displayed hierarchically in the CodeXL GUI, with each level of the hierarchy occupying a different row.
|
||||
|
||||
The HIP APis are defined in "hip_profile.h":
|
||||
```
|
||||
#include <hip/hip_profile.h>
|
||||
|
||||
HIP_BEGIN_MARKER(const char *markerName, const char *groupName);
|
||||
HIP_END_MARKER();
|
||||
|
||||
HIP_BEGIN_MARKER("Setup", "MyAppGroup");
|
||||
// ...
|
||||
// application code for setup
|
||||
// ...
|
||||
HIP_END_MARKER();
|
||||
```
|
||||
|
||||
For C++ codes, HIP also provides a scoped marker which records the start time when constructed and the end time when the scoped marker is destructed at the end of the scope. This provides a convenient, single-line mechanism to record an event that neatly corresponds to a region of code.
|
||||
|
||||
```cxx
|
||||
void FunctionFoo(...)
|
||||
{
|
||||
HIP_SCOPED_MARKER("FunctionFoo", "MyAppGroup"); // Marker starts recording here.
|
||||
|
||||
// ...
|
||||
// Function implementation
|
||||
// ...
|
||||
|
||||
// Marker destroyed here and records end time stamp.
|
||||
};
|
||||
```
|
||||
|
||||
The HIP marker API is only supported on ROCm platform. The marker macros are defined on CUDA platforms and will compile, but are silently ignored at runtime.
|
||||
|
||||
This [HIP sample](https://github.com/ROCm-Developer-Tools/HIP/tree/master/samples/2_Cookbook/2_Profiler) shows the profiler marker API used in a small application.
|
||||
|
||||
More information on the marker API can be found in the profiler header file and PDF in a ROCm installation:
|
||||
- /opt/rocm/profiler/CXLActivityLogger/include/CXLActivityLogger.h
|
||||
- /opt/rocm/profiler/CXLActivityLogger/doc/CXLActivityLogger.pdf
|
||||
|
||||
### Additional HIP Profiling Features
|
||||
#### Demangling C++ Kernel Names
|
||||
HIP includes the `hipdemangleatp` tool which can post-process an ATP file to "demangle" C++ names.
|
||||
Mangled kernel names encode the C++ arguments and other information, and are guaranteed to be unique even for cases such as operator overloading. However, the mangled names can be quite verbose. For example:
|
||||
|
||||
`ZZ39gemm_NoTransA_MICRO_NBK_M_N_K_TS16XMTS4RN2hc16accelerator_viewEPKflS3_lPfliiiiiiffEN3_EC__719__cxxamp_trampolineElililiiiiiiS3_iS3_S4_ff`
|
||||
|
||||
`hipdemangleatp` will convert this into the more readable:
|
||||
`gemm_NoTransA_MICRO_NBK_M_N_K_TS16XMTS4`
|
||||
|
||||
The `hipdemangleatp` tool operates on the ATP file "in-place" and thus replaces the input file with the demangled version.
|
||||
|
||||
```
|
||||
$ hipdemangleatp myfile.atp
|
||||
```
|
||||
|
||||
The kernel name is also shown in some of the summary htlm files (Top10 kernels). These can be regenerated from the demangled ATP file by re-running rocm-profiler:
|
||||
```
|
||||
$ rocm-profiler -T --atpfile myfile.atp
|
||||
```
|
||||
|
||||
A future version of CodeXL may directly integrate demangle functionality.
|
||||
|
||||
|
||||
#### Controlling when profiling starts and ends
|
||||
hipProfilerStart() and hipProfilerEnd() can be inserted into an application to control which phases of the applications are profiled.
|
||||
These APIs can be used to skip initialization code or to focus profiling on a desired region, and are particularly useful for large long-running applications.
|
||||
See the API documentation for more information. These APIs work on both ROCm and CUDA paths.
|
||||
|
||||
On ROCm, the following environment variables can be used to control when profiling occurs:
|
||||
|
||||
```
|
||||
HIP_DB_START_API : Comma-separated list of tid.api_seq_num for when to start debug and profiling.
|
||||
HIP_DB_STOP_API : Comma-separated list of tid.api_seq_num for when to stop debug and profiling.
|
||||
```
|
||||
|
||||
HIP/ROCm assigns a monotonically increasing sequence number to the APIs called from each thread. The thread and API sequence number can be used in the above API to control when tracing starts and stops. These flags also control the HIP_DB messages (described below).
|
||||
|
||||
When using these options, start the profiler with profiling disabled:
|
||||
```
|
||||
# ROCm:
|
||||
$ rocm-profiler --startdisabled ...
|
||||
|
||||
# CUDA:
|
||||
$ nvprof --profile-from-start-off ...
|
||||
```
|
||||
|
||||
This feature is under development.
|
||||
|
||||
#### Reducing timeline trace output file size
|
||||
If the application is already recording the HIP APIs, the HSA APIs are somewhat redundant and the ATP file size can be substantially reduced by not recording these APIs. HIP includes a text file that lists all of the HSA APIs and can assist in this filtering:
|
||||
|
||||
```
|
||||
$ rocm-profiler -F hip/bin/hsa-api-filter-cxl.txt
|
||||
```
|
||||
|
||||
This file can be copied and edited to provide more selective HSA event recording.
|
||||
|
||||
|
||||
#### How to enable profiling at HIP build time
|
||||
Pre-built packages of HIP are not built with profiling support enabled.You must enable marker support manually when compiling HIP.
|
||||
|
||||
1. Build HIP with ATP markers enabled
|
||||
HIP pre-built packages are enabled with ATP marker support by default.
|
||||
To enable ATP marker support when building HIP from source, use the option ```-DCOMPILE_HIP_ATP_MARKER=1``` during the cmake configure step. Build and install HIP.
|
||||
```shell
|
||||
$ mkdir build && cd build
|
||||
$ cmake .. -DCOMPILE_HIP_ATP_MARKER
|
||||
$ make install
|
||||
```
|
||||
|
||||
2. Install ROCm-Profiler
|
||||
Installing HIP from the [rocm](http://gpuopen.com/getting-started-with-boltzmann-components-platforms-installation/) pre-built packages, installs the ROCm-Profiler as well.
|
||||
Alternatively, you can build ROCm-Profiler using the instructions [here](https://github.com/RadeonOpenCompute/ROCm-Profiler#building-the-rocm-profiler).
|
||||
|
||||
3. Recompile the target application
|
||||
|
||||
Then follow the steps above to collect a marker-enabled trace.
|
||||
|
||||
|
||||
## Tracing and Debug
|
||||
|
||||
### Tracing HIP APIs
|
||||
The HIP runtime can print the HIP function strings to stderr using HIP_TRACE_API environment variable.
|
||||
The trace prints two messages for each API - one at the beginning of the API call (line starts with "<<") and one at the end of the API call (line ends with ">>").
|
||||
Here's an example for one API followed by a description for the sections of the trace:
|
||||
|
||||
```
|
||||
<<hip-api tid:1.6 hipMemcpy (0x7f32154db010, 0x50446e000, 4000000, hipMemcpyDeviceToHost)
|
||||
hip-api tid:1.6 hipMemcpy ret= 0 (hipSuccess)>>
|
||||
```
|
||||
|
||||
- `<<hip-api` is the header used for all HIP API debug messages. The message is also shown in a specific color. This can be used to distinguish this API from other HIP or application messages.
|
||||
- `tid:1.6` indicates that this API call came from thread #1 and is the 6th API call in that thread. When the first API in a new thread is called, HIP will associates a short sequential ID with that thread. You can see the full thread ID (reported by C++) as 0x7f6183b097c0 in the example below.
|
||||
- `hipMemcpy` is the name of the API.
|
||||
- The first line then prints a comma-separated list of the arguments to the function. APIs which return values to the caller by writing to pointers will show the pointer addresses rather than the pointer contents. This behavior may change in the future.
|
||||
- The second line shows the completion of the API, including the numeric return value (`ret= 0`) as well as an string representation for the error code (`hipSuccess`). If the returned error code is non-zero, then the csecond line message is shown in red (unless HIP_TRACE_API_COLOR is "none" - see below).
|
||||
|
||||
|
||||
Heres a specific example showing the output of the [square](https://github.com/ROCm-Developer-Tools/HIP/tree/master/samples/0_Intro/square) program running on HIP:
|
||||
|
||||
```
|
||||
$ HIP_TRACE_API=1 ./square.hip.out
|
||||
hip-api tid:1:HIP initialized short_tid#1 (maps to full_tid: 0x7f6183b097c0)
|
||||
<<hip-api tid:1.1 hipGetDeviceProperties (0x7ffddb673e08, 0)
|
||||
hip-api tid:1.1 hipGetDeviceProperties ret= 0 (hipSuccess)>>
|
||||
info: running on device gfx803
|
||||
info: allocate host mem ( 7.63 MB)
|
||||
info: allocate device mem ( 7.63 MB)
|
||||
<<hip-api tid:1.2 hipMalloc (0x7ffddb673fb8, 4000000)
|
||||
hip-api tid:1.2 hipMalloc ret= 0 (hipSuccess)>>
|
||||
<<hip-api tid:1.3 hipMalloc (0x7ffddb673fb0, 4000000)
|
||||
hip-api tid:1.3 hipMalloc ret= 0 (hipSuccess)>>
|
||||
info: copy Host2Device
|
||||
<<hip-api tid:1.4 hipMemcpy (0x50409d000, 0x7f32158ac010, 4000000, hipMemcpyHostToDevice)
|
||||
hip-api tid:1.4 hipMemcpy ret= 0 (hipSuccess)>>
|
||||
info: launch 'vector_square' kernel
|
||||
1.5 hipLaunchKernel 'HIP_KERNEL_NAME(vector_square)' gridDim:{512,1,1} groupDim:{256,1,1} sharedMem:+0 stream#0.0
|
||||
info: copy Device2Host
|
||||
<<hip-api tid:1.6 hipMemcpy (0x7f32154db010, 0x50446e000, 4000000, hipMemcpyDeviceToHost)
|
||||
hip-api tid:1.6 hipMemcpy ret= 0 (hipSuccess)>>
|
||||
info: check result
|
||||
PASSED!
|
||||
```
|
||||
|
||||
HIP_TRACE_API supports multiple levels of debug information:
|
||||
- 0x1 = print all HIP APIs. This is the most verbose setting; the flags below allow selecting a subset.
|
||||
- 0x2 = print HIP APIs which initiate GPU kernel commands. Includes hipLaunchKernel, hipLaunchModuleKernel
|
||||
- 0x4 = print HIP APIs which initiate GPU memory commands. Includes hipMemcpy*, hipMemset*.
|
||||
- 0x8 = print HIP APIs which allocate or free memory. Includes hipMalloc, hipHostMalloc, hipFree, hipHostFree.
|
||||
|
||||
These can be combined. For example, HIP_TRACE_API=6 shows a concise view of the HIP commands (both kernel and memory) that are sent to the GPU.
|
||||
|
||||
|
||||
#### Color
|
||||
Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors.
|
||||
You can change the color used for the trace mode with the HIP_TRACE_API_COLOR environment variable. Possible values are None/Red/Green/Yellow/Blue/Magenta/Cyan/White.
|
||||
None will disable use of color control codes for both the opening and closing and may be useful when saving the trace file or when a pure text trace is desired.
|
||||
|
||||
|
||||
|
||||
@@ -137,11 +137,11 @@ if(HIP_COMPILER STREQUAL "clang")
|
||||
)
|
||||
|
||||
set_property(TARGET hip::device APPEND PROPERTY
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}/.."
|
||||
)
|
||||
|
||||
set_property(TARGET hip::device APPEND PROPERTY
|
||||
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}"
|
||||
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}/.."
|
||||
)
|
||||
|
||||
foreach(GPU_TARGET ${GPU_TARGETS})
|
||||
|
||||
@@ -29,8 +29,14 @@ THE SOFTWARE.
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#if __HIP_VDI__
|
||||
extern "C" {
|
||||
#endif
|
||||
HIP_PUBLIC_API
|
||||
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f);
|
||||
#if __HIP_VDI__
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline hipChannelFormatDesc hipCreateChannelDescHalf() {
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
@@ -135,6 +135,47 @@ typedef enum hipResourceType {
|
||||
hipResourceTypePitch2D = 0x03
|
||||
}hipResourceType;
|
||||
|
||||
typedef enum HIPresourcetype_enum {
|
||||
HIP_RESOURCE_TYPE_ARRAY = 0x00, /**< Array resoure */
|
||||
HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */
|
||||
HIP_RESOURCE_TYPE_LINEAR = 0x02, /**< Linear resource */
|
||||
HIP_RESOURCE_TYPE_PITCH2D = 0x03 /**< Pitch 2D resource */
|
||||
} HIPresourcetype;
|
||||
|
||||
/**
|
||||
* hip address modes
|
||||
*/
|
||||
typedef enum HIPaddress_mode_enum {
|
||||
HIP_TR_ADDRESS_MODE_WRAP = 0,
|
||||
HIP_TR_ADDRESS_MODE_CLAMP = 1,
|
||||
HIP_TR_ADDRESS_MODE_MIRROR = 2,
|
||||
HIP_TR_ADDRESS_MODE_BORDER = 3
|
||||
} HIPaddress_mode;
|
||||
|
||||
/**
|
||||
* hip filter modes
|
||||
*/
|
||||
typedef enum HIPfilter_mode_enum {
|
||||
HIP_TR_FILTER_MODE_POINT = 0,
|
||||
HIP_TR_FILTER_MODE_LINEAR = 1
|
||||
} HIPfilter_mode;
|
||||
|
||||
/**
|
||||
* Texture descriptor
|
||||
*/
|
||||
typedef struct HIP_TEXTURE_DESC_st {
|
||||
HIPaddress_mode addressMode[3]; /**< Address modes */
|
||||
HIPfilter_mode filterMode; /**< Filter mode */
|
||||
unsigned int flags; /**< Flags */
|
||||
unsigned int maxAnisotropy; /**< Maximum anisotropy ratio */
|
||||
HIPfilter_mode mipmapFilterMode; /**< Mipmap filter mode */
|
||||
float mipmapLevelBias; /**< Mipmap level bias */
|
||||
float minMipmapLevelClamp; /**< Mipmap minimum level clamp */
|
||||
float maxMipmapLevelClamp; /**< Mipmap maximum level clamp */
|
||||
float borderColor[4]; /**< Border Color */
|
||||
int reserved[12];
|
||||
} HIP_TEXTURE_DESC;
|
||||
|
||||
/**
|
||||
* hip texture resource view formats
|
||||
*/
|
||||
@@ -176,6 +217,45 @@ typedef enum hipResourceViewFormat {
|
||||
hipResViewFormatUnsignedBlockCompressed7 = 0x22
|
||||
}hipResourceViewFormat;
|
||||
|
||||
typedef enum HIPresourceViewFormat_enum
|
||||
{
|
||||
HIP_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */
|
||||
HIP_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */
|
||||
HIP_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */
|
||||
HIP_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */
|
||||
HIP_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */
|
||||
HIP_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, /**< 1 channel 32-bit floating point */
|
||||
HIP_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, /**< 2 channel 32-bit floating point */
|
||||
HIP_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, /**< 4 channel 32-bit floating point */
|
||||
HIP_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, /**< Block compressed 1 */
|
||||
HIP_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, /**< Block compressed 2 */
|
||||
HIP_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, /**< Block compressed 3 */
|
||||
HIP_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, /**< Block compressed 4 unsigned */
|
||||
HIP_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, /**< Block compressed 4 signed */
|
||||
HIP_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, /**< Block compressed 5 unsigned */
|
||||
HIP_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, /**< Block compressed 5 signed */
|
||||
HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, /**< Block compressed 6 unsigned half-float */
|
||||
HIP_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, /**< Block compressed 6 signed half-float */
|
||||
HIP_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 /**< Block compressed 7 */
|
||||
} HIPresourceViewFormat;
|
||||
|
||||
/**
|
||||
* HIP resource descriptor
|
||||
*/
|
||||
@@ -204,6 +284,39 @@ typedef struct hipResourceDesc {
|
||||
} res;
|
||||
}hipResourceDesc;
|
||||
|
||||
typedef struct HIP_RESOURCE_DESC_st
|
||||
{
|
||||
HIPresourcetype resType; /**< Resource type */
|
||||
|
||||
union {
|
||||
struct {
|
||||
hipArray_t hArray; /**< HIP array */
|
||||
} array;
|
||||
struct {
|
||||
hipMipmappedArray_t hMipmappedArray; /**< HIP mipmapped array */
|
||||
} mipmap;
|
||||
struct {
|
||||
hipDeviceptr_t devPtr; /**< Device pointer */
|
||||
hipArray_Format format; /**< Array format */
|
||||
unsigned int numChannels; /**< Channels per array element */
|
||||
size_t sizeInBytes; /**< Size in bytes */
|
||||
} linear;
|
||||
struct {
|
||||
hipDeviceptr_t devPtr; /**< Device pointer */
|
||||
hipArray_Format format; /**< Array format */
|
||||
unsigned int numChannels; /**< Channels per array element */
|
||||
size_t width; /**< Width of the array in elements */
|
||||
size_t height; /**< Height of the array in elements */
|
||||
size_t pitchInBytes; /**< Pitch between two rows in bytes */
|
||||
} pitch2D;
|
||||
struct {
|
||||
int reserved[32];
|
||||
} reserved;
|
||||
} res;
|
||||
|
||||
unsigned int flags; /**< Flags (must be zero) */
|
||||
} HIP_RESOURCE_DESC;
|
||||
|
||||
/**
|
||||
* hip resource view descriptor
|
||||
*/
|
||||
@@ -218,6 +331,22 @@ struct hipResourceViewDesc {
|
||||
unsigned int lastLayer;
|
||||
};
|
||||
|
||||
/**
|
||||
* Resource view descriptor
|
||||
*/
|
||||
typedef struct HIP_RESOURCE_VIEW_DESC_st
|
||||
{
|
||||
HIPresourceViewFormat format; /**< Resource view format */
|
||||
size_t width; /**< Width of the resource view */
|
||||
size_t height; /**< Height of the resource view */
|
||||
size_t depth; /**< Depth of the resource view */
|
||||
unsigned int firstMipmapLevel; /**< First defined mipmap level */
|
||||
unsigned int lastMipmapLevel; /**< Last defined mipmap level */
|
||||
unsigned int firstLayer; /**< First layer index */
|
||||
unsigned int lastLayer; /**< Last layer index */
|
||||
unsigned int reserved[16];
|
||||
} HIP_RESOURCE_VIEW_DESC;
|
||||
|
||||
/**
|
||||
* Memory copy types
|
||||
*
|
||||
@@ -263,26 +392,29 @@ typedef struct hipMemcpy3DParms {
|
||||
} hipMemcpy3DParms;
|
||||
|
||||
typedef struct HIP_MEMCPY3D {
|
||||
size_t Depth;
|
||||
size_t Height;
|
||||
size_t WidthInBytes;
|
||||
hipDeviceptr_t dstDevice;
|
||||
size_t dstHeight;
|
||||
void* dstHost;
|
||||
size_t dstLOD;
|
||||
hipMemoryType dstMemoryType;
|
||||
size_t dstPitch;
|
||||
size_t dstXInBytes;
|
||||
size_t dstY;
|
||||
size_t dstZ;
|
||||
void* reserved0;
|
||||
void* reserved1;
|
||||
hipDeviceptr_t srcDevice;
|
||||
size_t srcHeight;
|
||||
const void* srcHost;
|
||||
size_t srcLOD;
|
||||
hipMemoryType srcMemoryType;
|
||||
size_t srcPitch;
|
||||
unsigned int srcXInBytes;
|
||||
unsigned int srcY;
|
||||
unsigned int srcZ;
|
||||
unsigned int srcLOD;
|
||||
hipMemoryType srcMemoryType;
|
||||
const void* srcHost;
|
||||
hipDeviceptr_t srcDevice;
|
||||
hipArray_t srcArray;
|
||||
unsigned int srcPitch;
|
||||
unsigned int srcHeight;
|
||||
unsigned int dstXInBytes;
|
||||
unsigned int dstY;
|
||||
unsigned int dstZ;
|
||||
unsigned int dstLOD;
|
||||
hipMemoryType dstMemoryType;
|
||||
void* dstHost;
|
||||
hipDeviceptr_t dstDevice;
|
||||
hipArray_t dstArray;
|
||||
unsigned int dstPitch;
|
||||
unsigned int dstHeight;
|
||||
unsigned int WidthInBytes;
|
||||
unsigned int Height;
|
||||
unsigned int Depth;
|
||||
} HIP_MEMCPY3D;
|
||||
|
||||
static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz,
|
||||
|
||||
@@ -192,16 +192,6 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
stream, &config[0]);
|
||||
}
|
||||
|
||||
inline
|
||||
__attribute__((visibility("hidden")))
|
||||
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags) {
|
||||
hip_impl::hip_init();
|
||||
auto& ps = hip_impl::get_program_state();
|
||||
return ihipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags, ps);
|
||||
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline
|
||||
__attribute__((visibility("hidden")))
|
||||
|
||||
@@ -229,7 +229,7 @@ THE SOFTWARE.
|
||||
__host__ __device__
|
||||
operator __half_raw() const { return __half_raw{data}; }
|
||||
__host__ __device__
|
||||
operator volatile __half_raw() const volatile
|
||||
operator __half_raw() const volatile
|
||||
{
|
||||
return __half_raw{data};
|
||||
}
|
||||
|
||||
@@ -108,9 +108,12 @@ extern int HIP_TRACE_API;
|
||||
#include <hip/hcc_detail/host_defines.h>
|
||||
#include <hip/hcc_detail/device_functions.h>
|
||||
#include <hip/hcc_detail/surface_functions.h>
|
||||
#include <hip/hcc_detail/texture_functions.h>
|
||||
#if __HCC__
|
||||
#include <hip/hcc_detail/math_functions.h>
|
||||
#include <hip/hcc_detail/texture_functions.h>
|
||||
#else
|
||||
#include <hip/hcc_detail/texture_fetch_functions.h>
|
||||
#include <hip/hcc_detail/texture_indirect_functions.h>
|
||||
#endif
|
||||
// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
|
||||
#if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__)
|
||||
@@ -385,7 +388,7 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri
|
||||
#elif defined(__clang__) && defined(__HIP__)
|
||||
|
||||
#define HIP_KERNEL_NAME(...) __VA_ARGS__
|
||||
#define HIP_SYMBOL(X) #X
|
||||
#define HIP_SYMBOL(X) X
|
||||
|
||||
typedef int hipLaunchParm;
|
||||
|
||||
|
||||
@@ -1482,18 +1482,18 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
|
||||
hipModule_t hmod, const char* name);
|
||||
|
||||
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName);
|
||||
hipError_t hipGetSymbolSize(size_t* size, const void* symbolName);
|
||||
hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src,
|
||||
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol);
|
||||
hipError_t hipGetSymbolSize(size_t* size, const void* symbol);
|
||||
hipError_t hipMemcpyToSymbol(const void* symbol, const void* src,
|
||||
size_t sizeBytes, size_t offset __dparm(0),
|
||||
hipMemcpyKind kind __dparm(hipMemcpyHostToDevice));
|
||||
hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src,
|
||||
hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
|
||||
size_t sizeBytes, size_t offset,
|
||||
hipMemcpyKind kind, hipStream_t stream __dparm(0));
|
||||
hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName,
|
||||
hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol,
|
||||
size_t sizeBytes, size_t offset __dparm(0),
|
||||
hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost));
|
||||
hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
|
||||
hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol,
|
||||
size_t sizeBytes, size_t offset,
|
||||
hipMemcpyKind kind,
|
||||
hipStream_t stream __dparm(0));
|
||||
@@ -1933,6 +1933,15 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent);
|
||||
*/
|
||||
hipError_t hipFreeArray(hipArray* array);
|
||||
|
||||
/**
|
||||
* @brief Frees a mipmapped array on the device
|
||||
*
|
||||
* @param[in] mipmappedArray - Pointer to mipmapped array to free
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorInvalidValue
|
||||
*/
|
||||
hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray);
|
||||
|
||||
/**
|
||||
* @brief Allocate an array on the device.
|
||||
*
|
||||
@@ -1947,6 +1956,39 @@ hipError_t hipFreeArray(hipArray* array);
|
||||
|
||||
hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc,
|
||||
struct hipExtent extent, unsigned int flags);
|
||||
|
||||
/**
|
||||
* @brief Allocate a mipmapped array on the device
|
||||
*
|
||||
* @param[out] mipmappedArray - Pointer to allocated mipmapped array in device memory
|
||||
* @param[in] desc - Requested channel format
|
||||
* @param[in] extent - Requested allocation size (width field in elements)
|
||||
* @param[in] numLevels - Number of mipmap levels to allocate
|
||||
* @param[in] flags - Flags for extensions
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation
|
||||
*/
|
||||
hipError_t hipMallocMipmappedArray(
|
||||
hipMipmappedArray_t *mipmappedArray,
|
||||
const struct hipChannelFormatDesc* desc,
|
||||
struct hipExtent extent,
|
||||
unsigned int numLevels,
|
||||
unsigned int flags __dparm(0));
|
||||
|
||||
/**
|
||||
* @brief Gets a mipmap level of a HIP mipmapped array
|
||||
*
|
||||
* @param[out] levelArray - Returned mipmap level HIP array
|
||||
* @param[in] mipmappedArray - HIP mipmapped array
|
||||
* @param[in] level - Mipmap level
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorInvalidValue
|
||||
*/
|
||||
hipError_t hipGetMipmappedArrayLevel(
|
||||
hipArray_t *levelArray,
|
||||
hipMipmappedArray_const_t mipmappedArray,
|
||||
unsigned int level);
|
||||
|
||||
/**
|
||||
* @brief Copies data between host and device.
|
||||
*
|
||||
@@ -2159,6 +2201,31 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p);
|
||||
*/
|
||||
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0));
|
||||
|
||||
/**
|
||||
* @brief Copies data between host and device.
|
||||
*
|
||||
* @param[in] pCopy 3D memory copy parameters
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
|
||||
* #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
|
||||
*
|
||||
* @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
|
||||
* hipMemcpyAsync
|
||||
*/
|
||||
hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy);
|
||||
|
||||
/**
|
||||
* @brief Copies data between host and device asynchronously.
|
||||
*
|
||||
* @param[in] pCopy 3D memory copy parameters
|
||||
* @param[in] stream Stream to use
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
|
||||
* #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
|
||||
*
|
||||
* @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
|
||||
* hipMemcpyAsync
|
||||
*/
|
||||
hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream);
|
||||
|
||||
// doxygen end Memory
|
||||
/**
|
||||
* @}
|
||||
@@ -2957,17 +3024,6 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int*
|
||||
hipFunction_t f, size_t dynSharedMemPerBlk,
|
||||
int blockSizeLimit, unsigned int flags);
|
||||
|
||||
/**
|
||||
* @brief Returns occupancy for a device function.
|
||||
*
|
||||
* @param [out] numBlocks Returned occupancy
|
||||
* @param [in] func Kernel function for which occupancy is calulated
|
||||
* @param [in] blockSize Block size the kernel is intended to be launched with
|
||||
* @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
|
||||
*/
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk);
|
||||
|
||||
/**
|
||||
* @brief Returns occupancy for a device function.
|
||||
*
|
||||
@@ -2979,6 +3035,29 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk);
|
||||
|
||||
/**
|
||||
* @brief Returns occupancy for a device function.
|
||||
*
|
||||
* @param [out] numBlocks Returned occupancy
|
||||
* @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated
|
||||
* @param [in] blockSize Block size the kernel is intended to be launched with
|
||||
* @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
|
||||
* @param [in] flags Extra flags for occupancy calculation (only default supported)
|
||||
*/
|
||||
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags);
|
||||
|
||||
/**
|
||||
* @brief Returns occupancy for a device function.
|
||||
*
|
||||
* @param [out] numBlocks Returned occupancy
|
||||
* @param [in] func Kernel function for which occupancy is calulated
|
||||
* @param [in] blockSize Block size the kernel is intended to be launched with
|
||||
* @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
|
||||
*/
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk);
|
||||
|
||||
/**
|
||||
* @brief Returns occupancy for a device function.
|
||||
*
|
||||
@@ -2992,18 +3071,20 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault));
|
||||
|
||||
/**
|
||||
* @brief Returns occupancy for a device function.
|
||||
* @brief determine the grid and block sizes to achieves maximum occupancy for a kernel
|
||||
*
|
||||
* @param [out] numBlocks Returned occupancy
|
||||
* @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated
|
||||
* @param [in] blockSize Block size the kernel is intended to be launched with
|
||||
* @param [out] gridSize minimum grid size for maximum potential occupancy
|
||||
* @param [out] blockSize block size for maximum potential occupancy
|
||||
* @param [in] f kernel function for which occupancy is calulated
|
||||
* @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
|
||||
* @param [in] flags Extra flags for occupancy calculation (only default supported)
|
||||
* @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit
|
||||
*
|
||||
* @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue
|
||||
*/
|
||||
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags);
|
||||
hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
|
||||
const void* f, size_t dynSharedMemPerBlk,
|
||||
int blockSizeLimit);
|
||||
|
||||
#if __HIP_VDI__ && !defined(__HCC__)
|
||||
/**
|
||||
* @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched
|
||||
* on respective streams before enqueuing any other work on the specified streams from any other threads
|
||||
@@ -3018,7 +3099,6 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags);
|
||||
|
||||
#endif
|
||||
|
||||
// doxygen end Version Management
|
||||
/**
|
||||
@@ -3260,6 +3340,206 @@ hipError_t hipLaunchKernel(const void* function_address,
|
||||
size_t sharedMemBytes __dparm(0),
|
||||
hipStream_t stream __dparm(0));
|
||||
|
||||
#if __HIP_VDI__
|
||||
hipError_t hipBindTexture(
|
||||
size_t* offset,
|
||||
const textureReference* tex,
|
||||
const void* devPtr,
|
||||
const hipChannelFormatDesc* desc,
|
||||
size_t size = UINT_MAX);
|
||||
|
||||
hipError_t hipBindTexture2D(
|
||||
size_t* offset,
|
||||
const textureReference* tex,
|
||||
const void* devPtr,
|
||||
const hipChannelFormatDesc* desc,
|
||||
size_t width,
|
||||
size_t height,
|
||||
size_t pitch);
|
||||
|
||||
hipError_t hipBindTextureToArray(
|
||||
const textureReference* tex,
|
||||
hipArray_const_t array,
|
||||
const hipChannelFormatDesc* desc);
|
||||
|
||||
hipError_t hipBindTextureToMipmappedArray(
|
||||
const textureReference* tex,
|
||||
hipMipmappedArray_const_t mipmappedArray,
|
||||
const hipChannelFormatDesc* desc);
|
||||
|
||||
hipError_t hipGetTextureAlignmentOffset(
|
||||
size_t* offset,
|
||||
const textureReference* texref);
|
||||
|
||||
hipError_t hipGetTextureReference(
|
||||
const textureReference** texref,
|
||||
const void* symbol);
|
||||
|
||||
hipError_t hipUnbindTexture(const textureReference* tex);
|
||||
|
||||
hipError_t hipCreateTextureObject(
|
||||
hipTextureObject_t* pTexObject,
|
||||
const hipResourceDesc* pResDesc,
|
||||
const hipTextureDesc* pTexDesc,
|
||||
const hipResourceViewDesc* pResViewDesc);
|
||||
|
||||
hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject);
|
||||
|
||||
hipError_t hipGetChannelDesc(
|
||||
hipChannelFormatDesc* desc,
|
||||
hipArray_const_t array);
|
||||
|
||||
hipError_t hipGetTextureObjectResourceDesc(
|
||||
hipResourceDesc* pResDesc,
|
||||
hipTextureObject_t textureObject);
|
||||
|
||||
hipError_t hipGetTextureObjectResourceViewDesc(
|
||||
hipResourceViewDesc* pResViewDesc,
|
||||
hipTextureObject_t textureObject);
|
||||
|
||||
hipError_t hipGetTextureObjectTextureDesc(
|
||||
hipTextureDesc* pTexDesc,
|
||||
hipTextureObject_t textureObject);
|
||||
|
||||
hipError_t hipTexRefGetAddress(
|
||||
hipDeviceptr_t* dev_ptr,
|
||||
const textureReference* texRef);
|
||||
|
||||
hipError_t hipTexRefGetAddressMode(
|
||||
hipTextureAddressMode* pam,
|
||||
const textureReference* texRef,
|
||||
int dim);
|
||||
|
||||
hipError_t hipTexRefGetFilterMode(
|
||||
hipTextureFilterMode* pfm,
|
||||
const textureReference* texRef);
|
||||
|
||||
hipError_t hipTexRefGetFlags(
|
||||
unsigned int* pFlags,
|
||||
const textureReference* texRef);
|
||||
|
||||
hipError_t hipTexRefGetFormat(
|
||||
hipArray_Format* pFormat,
|
||||
int* pNumChannels,
|
||||
const textureReference* texRef);
|
||||
|
||||
hipError_t hipTexRefGetMaxAnisotropy(
|
||||
int* pmaxAnsio,
|
||||
const textureReference* texRef);
|
||||
|
||||
hipError_t hipTexRefGetMipmapFilterMode(
|
||||
hipTextureFilterMode* pfm,
|
||||
const textureReference* texRef);
|
||||
|
||||
hipError_t hipTexRefGetMipmapLevelBias(
|
||||
float* pbias,
|
||||
const textureReference* texRef);
|
||||
|
||||
hipError_t hipTexRefGetMipmapLevelClamp(
|
||||
float* pminMipmapLevelClamp,
|
||||
float* pmaxMipmapLevelClamp,
|
||||
const textureReference* texRef);
|
||||
|
||||
hipError_t hipTexRefGetMipMappedArray(
|
||||
hipMipmappedArray_t* pArray,
|
||||
const textureReference* texRef);
|
||||
|
||||
hipError_t hipTexRefSetAddress(
|
||||
size_t* ByteOffset,
|
||||
textureReference* texRef,
|
||||
hipDeviceptr_t dptr,
|
||||
size_t bytes);
|
||||
|
||||
hipError_t hipTexRefSetAddress2D(
|
||||
textureReference* texRef,
|
||||
const HIP_ARRAY_DESCRIPTOR* desc,
|
||||
hipDeviceptr_t dptr,
|
||||
size_t Pitch);
|
||||
|
||||
hipError_t hipTexRefSetAddressMode(
|
||||
textureReference* texRef,
|
||||
int dim,
|
||||
hipTextureAddressMode am);
|
||||
|
||||
hipError_t hipTexRefSetArray(
|
||||
textureReference* tex,
|
||||
hipArray_const_t array,
|
||||
unsigned int flags);
|
||||
|
||||
hipError_t hipTexRefSetBorderColor(
|
||||
textureReference* texRef,
|
||||
float* pBorderColor);
|
||||
|
||||
hipError_t hipTexRefSetFilterMode(
|
||||
textureReference* texRef,
|
||||
hipTextureFilterMode fm);
|
||||
|
||||
hipError_t hipTexRefSetFlags(
|
||||
textureReference* texRef,
|
||||
unsigned int Flags);
|
||||
|
||||
hipError_t hipTexRefSetFormat(
|
||||
textureReference* texRef,
|
||||
hipArray_Format fmt,
|
||||
int NumPackedComponents);
|
||||
|
||||
hipError_t hipTexRefSetMaxAnisotropy(
|
||||
textureReference* texRef,
|
||||
unsigned int maxAniso);
|
||||
|
||||
hipError_t hipTexRefSetMipmapFilterMode(
|
||||
textureReference* texRef,
|
||||
hipTextureFilterMode fm);
|
||||
|
||||
hipError_t hipTexRefSetMipmapLevelBias(
|
||||
textureReference* texRef,
|
||||
float bias);
|
||||
|
||||
hipError_t hipTexRefSetMipmapLevelClamp(
|
||||
textureReference* texRef,
|
||||
float minMipMapLevelClamp,
|
||||
float maxMipMapLevelClamp);
|
||||
|
||||
hipError_t hipTexRefSetMipmappedArray(
|
||||
textureReference* texRef,
|
||||
hipMipmappedArray* mipmappedArray,
|
||||
unsigned int Flags);
|
||||
|
||||
hipError_t hipMipmappedArrayCreate(
|
||||
hipMipmappedArray_t* pHandle,
|
||||
HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc,
|
||||
unsigned int numMipmapLevels);
|
||||
|
||||
hipError_t hipMipmappedArrayDestroy(
|
||||
hipMipmappedArray_t hMipmappedArray);
|
||||
|
||||
hipError_t hipMipmappedArrayGetLevel(
|
||||
hipArray_t* pLevelArray,
|
||||
hipMipmappedArray_t hMipMappedArray,
|
||||
unsigned int level);
|
||||
|
||||
hipError_t hipTexObjectCreate(
|
||||
hipTextureObject_t* pTexObject,
|
||||
const HIP_RESOURCE_DESC* pResDesc,
|
||||
const HIP_TEXTURE_DESC* pTexDesc,
|
||||
const HIP_RESOURCE_VIEW_DESC* pResViewDesc);
|
||||
|
||||
hipError_t hipTexObjectDestroy(
|
||||
hipTextureObject_t texObject);
|
||||
|
||||
hipError_t hipTexObjectGetResourceDesc(
|
||||
HIP_RESOURCE_DESC* pResDesc,
|
||||
hipTextureObject_t texObject);
|
||||
|
||||
hipError_t hipTexObjectGetResourceViewDesc(
|
||||
HIP_RESOURCE_VIEW_DESC* pResViewDesc,
|
||||
hipTextureObject_t texObject);
|
||||
|
||||
hipError_t hipTexObjectGetTextureDesc(
|
||||
HIP_TEXTURE_DESC* pTexDesc,
|
||||
hipTextureObject_t texObject);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
@@ -3269,6 +3549,60 @@ hipError_t hipLaunchKernel(const void* function_address,
|
||||
} /* extern "c" */
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__)
|
||||
template <typename T>
|
||||
static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
|
||||
T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) {
|
||||
return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast<const void*>(f),dynSharedMemPerBlk,blockSizeLimit);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
|
||||
T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) {
|
||||
return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast<const void*>(f),dynSharedMemPerBlk,blockSizeLimit);
|
||||
}
|
||||
#endif // defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__)
|
||||
|
||||
#if defined(__cplusplus) && !defined(__HCC__)
|
||||
|
||||
template <typename T>
|
||||
hipError_t hipGetSymbolAddress(void** devPtr, const T &symbol) {
|
||||
return ::hipGetSymbolAddress(devPtr, (const void *)&symbol);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
hipError_t hipGetSymbolSize(size_t* size, const T &symbol) {
|
||||
return ::hipGetSymbolSize(size, (const void *)&symbol);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
hipError_t hipMemcpyToSymbol(const T& symbol, const void* src, size_t sizeBytes,
|
||||
size_t offset __dparm(0),
|
||||
hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) {
|
||||
return ::hipMemcpyToSymbol((const void*)&symbol, src, sizeBytes, offset, kind);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
hipError_t hipMemcpyToSymbolAsync(const T& symbol, const void* src, size_t sizeBytes, size_t offset,
|
||||
hipMemcpyKind kind, hipStream_t stream __dparm(0)) {
|
||||
return ::hipMemcpyToSymbolAsync((const void*)&symbol, src, sizeBytes, offset, kind, stream);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
hipError_t hipMemcpyFromSymbol(void* dst, const T &symbol,
|
||||
size_t sizeBytes, size_t offset __dparm(0),
|
||||
hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
|
||||
return ::hipMemcpyFromSymbol(dst, (const void*)&symbol, sizeBytes, offset, kind);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
hipError_t hipMemcpyFromSymbolAsync(void* dst, const T& symbol, size_t sizeBytes, size_t offset,
|
||||
hipMemcpyKind kind, hipStream_t stream __dparm(0)) {
|
||||
return ::hipMemcpyFromSymbolAsync(dst, (const void*)&symbol, sizeBytes, offset, kind, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if USE_PROF_API
|
||||
#include <hip/hcc_detail/hip_prof_str.h>
|
||||
#endif
|
||||
@@ -3307,12 +3641,16 @@ inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
|
||||
class TlsData;
|
||||
|
||||
#if !__HIP_VDI__
|
||||
hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
|
||||
const hipChannelFormatDesc* desc, size_t size = UINT_MAX);
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
const void* devPtr, const struct hipChannelFormatDesc* desc,
|
||||
size_t size, textureReference* tex);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture
|
||||
@@ -3329,11 +3667,13 @@ hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode re
|
||||
* @param[in] size - Size of the memory area pointed to by devPtr
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
|
||||
**/
|
||||
#if !__HIP_VDI__
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
|
||||
const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) {
|
||||
return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &desc, size, &tex);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture
|
||||
@@ -3349,81 +3689,114 @@ hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
|
||||
* @param[in] size - Size of the memory area pointed to by devPtr
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
|
||||
**/
|
||||
#if !__HIP_VDI__
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
|
||||
size_t size = UINT_MAX) {
|
||||
return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex);
|
||||
}
|
||||
#endif
|
||||
|
||||
// C API
|
||||
#if !__HIP_VDI__
|
||||
hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr,
|
||||
const hipChannelFormatDesc* desc, size_t width, size_t height,
|
||||
size_t pitch);
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
const void* devPtr, const struct hipChannelFormatDesc* desc,
|
||||
size_t width, size_t height, textureReference* tex, size_t pitch);
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTexture2D(size_t* offset, struct texture<T, dim, readMode>& tex,
|
||||
const void* devPtr, size_t width, size_t height, size_t pitch) {
|
||||
return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), width, height,
|
||||
&tex);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTexture2D(size_t* offset, struct texture<T, dim, readMode>& tex,
|
||||
const void* devPtr, const struct hipChannelFormatDesc& desc,
|
||||
size_t width, size_t height, size_t pitch) {
|
||||
return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &desc, width, height, &tex);
|
||||
}
|
||||
#endif
|
||||
|
||||
// C API
|
||||
#if !__HIP_VDI__
|
||||
hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
|
||||
const hipChannelFormatDesc* desc);
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
hipError_t ihipBindTextureToArrayImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
textureReference* tex);
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
|
||||
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, tex.channelDesc, &tex);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc& desc) {
|
||||
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, desc, &tex);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> *tex,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc* desc) {
|
||||
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, *desc, tex);
|
||||
}
|
||||
#endif
|
||||
|
||||
// C API
|
||||
#if !__HIP_VDI__
|
||||
hipError_t hipBindTextureToMipmappedArray(const textureReference* tex,
|
||||
hipMipmappedArray_const_t mipmappedArray,
|
||||
const hipChannelFormatDesc* desc);
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToMipmappedArray(const texture<T, dim, readMode>& tex,
|
||||
hipMipmappedArray_const_t mipmappedArray) {
|
||||
return hipSuccess;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToMipmappedArray(const texture<T, dim, readMode>& tex,
|
||||
hipMipmappedArray_const_t mipmappedArray,
|
||||
const hipChannelFormatDesc& desc) {
|
||||
return hipSuccess;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __HIP_VDI__ && !defined(__HCC__)
|
||||
|
||||
template <typename F>
|
||||
inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
|
||||
F kernel, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) {
|
||||
return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize,(hipFunction_t)kernel, dynSharedMemPerBlk, blockSizeLimit);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
|
||||
void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
|
||||
@@ -3453,15 +3826,22 @@ inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchPara
|
||||
*
|
||||
* @return #hipSuccess
|
||||
**/
|
||||
#if !__HIP_VDI__
|
||||
hipError_t hipUnbindTexture(const textureReference* tex);
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject);
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
|
||||
return ihipUnbindTextureImpl(tex.textureObject);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !__HIP_VDI__
|
||||
hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array);
|
||||
hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref);
|
||||
hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol);
|
||||
@@ -3499,11 +3879,110 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex);
|
||||
|
||||
hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc,
|
||||
hipDeviceptr_t devPtr, size_t pitch);
|
||||
#endif
|
||||
|
||||
hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc);
|
||||
|
||||
hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject);
|
||||
|
||||
#if __HIP_VDI__
|
||||
template<class T, int dim, enum hipTextureReadMode readMode>
|
||||
static inline hipError_t hipBindTexture(
|
||||
size_t *offset,
|
||||
const struct texture<T, dim, readMode> &tex,
|
||||
const void *devPtr,
|
||||
size_t size = UINT_MAX)
|
||||
{
|
||||
return hipBindTexture(offset, tex, devPtr, tex.channelDesc, size);
|
||||
}
|
||||
|
||||
template<class T, int dim, enum hipTextureReadMode readMode>
|
||||
static inline hipError_t hipBindTexture(
|
||||
size_t *offset,
|
||||
const struct texture<T, dim, readMode> &tex,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc &desc,
|
||||
size_t size = UINT_MAX)
|
||||
{
|
||||
return hipBindTexture(offset, &tex, devPtr, &desc, size);
|
||||
}
|
||||
|
||||
template<class T, int dim, enum hipTextureReadMode readMode>
|
||||
static inline hipError_t hipBindTexture2D(
|
||||
size_t *offset,
|
||||
const struct texture<T, dim, readMode> &tex,
|
||||
const void *devPtr,
|
||||
size_t width,
|
||||
size_t height,
|
||||
size_t pitch)
|
||||
{
|
||||
return hipBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, height, pitch);
|
||||
}
|
||||
|
||||
template<class T, int dim, enum hipTextureReadMode readMode>
|
||||
static inline hipError_t hipBindTexture2D(
|
||||
size_t *offset,
|
||||
const struct texture<T, dim, readMode> &tex,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc &desc,
|
||||
size_t width,
|
||||
size_t height,
|
||||
size_t pitch)
|
||||
{
|
||||
return hipBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitch);
|
||||
}
|
||||
|
||||
template<class T, int dim, enum hipTextureReadMode readMode>
|
||||
static inline hipError_t hipBindTextureToArray(
|
||||
const struct texture<T, dim, readMode> &tex,
|
||||
hipArray_const_t array)
|
||||
{
|
||||
struct cudaChannelFormatDesc desc;
|
||||
hipError_t err = hipGetChannelDesc(&desc, array);
|
||||
return (err == hipSuccess) ? hipBindTextureToArray(tex, array, desc) : err;
|
||||
}
|
||||
|
||||
template<class T, int dim, enum hipTextureReadMode readMode>
|
||||
static inline hipError_t hipBindTextureToArray(
|
||||
const struct texture<T, dim, readMode> &tex,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc &desc)
|
||||
{
|
||||
return hipBindTextureToArray(&tex, array, &desc);
|
||||
}
|
||||
|
||||
template<class T, int dim, enum hipTextureReadMode readMode>
|
||||
static inline hipError_t hipBindTextureToMipmappedArray(
|
||||
const struct texture<T, dim, readMode> &tex,
|
||||
hipMipmappedArray_const_t mipmappedArray)
|
||||
{
|
||||
struct hipChannelFormatDesc desc;
|
||||
hipArray_t levelArray;
|
||||
hipError_t err = hipGetMipmappedArrayLevel(&levelArray, mipmappedArray, 0);
|
||||
if (err != hipSuccess) {
|
||||
return err;
|
||||
}
|
||||
err = hipGetChannelDesc(&desc, levelArray);
|
||||
return (err == hipSuccess) ? hipBindTextureToMipmappedArray(tex, mipmappedArray, desc) : err;
|
||||
}
|
||||
|
||||
template<class T, int dim, enum hipTextureReadMode readMode>
|
||||
static inline hipError_t hipBindTextureToMipmappedArray(
|
||||
const struct texture<T, dim, readMode> &tex,
|
||||
hipMipmappedArray_const_t mipmappedArray,
|
||||
const struct cudaChannelFormatDesc &desc)
|
||||
{
|
||||
return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc);
|
||||
}
|
||||
|
||||
template<class T, int dim, enum hipTextureReadMode readMode>
|
||||
static inline hipError_t hipUnbindTexture(
|
||||
const struct texture<T, dim, readMode> &tex)
|
||||
{
|
||||
return hipUnbindTexture(&tex);
|
||||
}
|
||||
#endif
|
||||
|
||||
// doxygen end Texture
|
||||
/**
|
||||
* @}
|
||||
|
||||
@@ -57,25 +57,27 @@ struct __HIP_TEXTURE_ATTRIB texture : public textureReference {
|
||||
texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint,
|
||||
enum hipTextureAddressMode aMode = hipAddressModeClamp) {
|
||||
normalized = norm;
|
||||
readMode = hipReadModeNormalizedFloat;
|
||||
readMode = mode;
|
||||
filterMode = fMode;
|
||||
addressMode[0] = aMode;
|
||||
addressMode[1] = aMode;
|
||||
addressMode[2] = aMode;
|
||||
channelDesc = hipCreateChannelDesc<T>();
|
||||
sRGB = 0;
|
||||
textureObject = nullptr;
|
||||
}
|
||||
|
||||
texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode,
|
||||
struct hipChannelFormatDesc desc) {
|
||||
normalized = norm;
|
||||
readMode = hipReadModeNormalizedFloat;
|
||||
readMode = mode;
|
||||
filterMode = fMode;
|
||||
addressMode[0] = aMode;
|
||||
addressMode[1] = aMode;
|
||||
addressMode[2] = aMode;
|
||||
channelDesc = desc;
|
||||
sRGB = 0;
|
||||
textureObject = nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -28,7 +28,9 @@ extern "C" {
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#pragma GCC visibility push (default)
|
||||
#endif
|
||||
|
||||
enum hiprtcResult {
|
||||
HIPRTC_SUCCESS = 0,
|
||||
@@ -81,7 +83,9 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code);
|
||||
|
||||
hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet);
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#pragma GCC visibility pop
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip/hip_vector_types.h>
|
||||
|
||||
extern "C" {
|
||||
|
||||
#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4)))
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_1Db(unsigned int ADDRESS_SPACE_CONSTANT*i, int c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_load_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l);
|
||||
|
||||
__device__ void __ockl_image_store_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
|
||||
|
||||
__device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_grad_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float dx, float dy);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_grad_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float dx, float dy);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_grad_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_grad_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_grad_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float4::Native_vec_ dx, float4::Native_vec_ dy);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_sample_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_gather4r_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_gather4g_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_gather4b_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
|
||||
|
||||
__device__ float4::Native_vec_ __ockl_image_gather4a_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
|
||||
|
||||
};
|
||||
@@ -0,0 +1,386 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
||||
#include <hip/hip_vector_types.h>
|
||||
#include <hip/texture_types.h>
|
||||
#include <hip/hcc_detail/ockl_image.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#define TEXTURE_PARAMETERS_INIT \
|
||||
unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \
|
||||
unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD;
|
||||
|
||||
template<typename T>
|
||||
struct __hip_is_tex_channel_type
|
||||
{
|
||||
static constexpr bool value =
|
||||
std::is_same<T, char>::value ||
|
||||
std::is_same<T, unsigned char>::value ||
|
||||
std::is_same<T, short>::value ||
|
||||
std::is_same<T, unsigned short>::value ||
|
||||
std::is_same<T, int>::value ||
|
||||
std::is_same<T, unsigned int>::value ||
|
||||
std::is_same<T, float>::value;
|
||||
};
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int rank>
|
||||
struct __hip_is_tex_channel_type<HIP_vector_type<T, rank>>
|
||||
{
|
||||
static constexpr bool value =
|
||||
__hip_is_tex_channel_type<T>::value &&
|
||||
((rank == 1) ||
|
||||
(rank == 2) ||
|
||||
(rank == 4));
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct __hip_is_tex_normalized_channel_type
|
||||
{
|
||||
static constexpr bool value =
|
||||
std::is_same<T, char>::value ||
|
||||
std::is_same<T, unsigned char>::value ||
|
||||
std::is_same<T, short>::value ||
|
||||
std::is_same<T, unsigned short>::value;
|
||||
};
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int rank>
|
||||
struct __hip_is_tex_normalized_channel_type<HIP_vector_type<T, rank>>
|
||||
{
|
||||
static constexpr bool value =
|
||||
__hip_is_tex_normalized_channel_type<T>::value &&
|
||||
((rank == 1) ||
|
||||
(rank == 2) ||
|
||||
(rank == 4));
|
||||
};
|
||||
|
||||
template <
|
||||
typename T,
|
||||
hipTextureReadMode readMode,
|
||||
typename Enable = void>
|
||||
struct __hip_tex_ret
|
||||
{
|
||||
static_assert(std::is_same<Enable, void>::value, "Invalid channel type!");
|
||||
};
|
||||
|
||||
template <
|
||||
typename T,
|
||||
hipTextureReadMode readMode>
|
||||
using __hip_tex_ret_t = typename __hip_tex_ret<T, readMode, bool>::type;
|
||||
|
||||
template <typename T>
|
||||
struct __hip_tex_ret<
|
||||
T,
|
||||
hipReadModeElementType,
|
||||
typename std::enable_if<__hip_is_tex_channel_type<T>::value, bool>::type>
|
||||
{
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int rank>
|
||||
struct __hip_tex_ret<
|
||||
HIP_vector_type<T, rank>,
|
||||
hipReadModeElementType,
|
||||
typename std::enable_if<__hip_is_tex_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
|
||||
{
|
||||
using type = HIP_vector_type<__hip_tex_ret_t<T, hipReadModeElementType>, rank>;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct __hip_tex_ret<
|
||||
T,
|
||||
hipReadModeNormalizedFloat,
|
||||
typename std::enable_if<__hip_is_tex_normalized_channel_type<T>::value, bool>::type>
|
||||
{
|
||||
using type = float;
|
||||
};
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int rank>
|
||||
struct __hip_tex_ret<
|
||||
HIP_vector_type<T, rank>,
|
||||
hipReadModeNormalizedFloat,
|
||||
typename std::enable_if<__hip_is_tex_normalized_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
|
||||
{
|
||||
using type = HIP_vector_type<__hip_tex_ret_t<T, hipReadModeNormalizedFloat>, rank>;
|
||||
};
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1Dfetch(texture<T, hipTextureType1D, readMode> t, int x)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_load_1Db(i, x);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1D(texture<T, hipTextureType1D, readMode> t, float x)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_1D(i, s, x);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2D(texture<T, hipTextureType2D, readMode> t, float x, float y)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLayered(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLayered(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex3D(texture<T, hipTextureType3D, readMode> t, float x, float y, float z)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemap(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLod(texture<T, hipTextureType1D, readMode> t, float x, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_1D(i, s, x, level);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLod(texture<T, hipTextureType2D, readMode> t, float x, float y, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLayeredLod(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLayeredLod(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex3DLod(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLod(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLayered(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLayeredLod(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapGrad(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
// TODO missing in device libs.
|
||||
// auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
// return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLayeredGrad(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
// TODO missing in device libs.
|
||||
// auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
// return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DGrad(texture<T, hipTextureType1D, readMode> t, float x, float dPdx, float dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DGrad(texture<T, hipTextureType2D, readMode> t, float x, float y, float2 dPdx, float2 dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLayeredGrad(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float dPdx, float dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLayeredGrad(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float2 dPdx, float2 dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex3DGrad(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
hipTextureReadMode readMode,
|
||||
typename Enable = void>
|
||||
struct __hip_tex2dgather_ret
|
||||
{
|
||||
static_assert(std::is_same<Enable, void>::value, "Invalid channel type!");
|
||||
};
|
||||
|
||||
template <
|
||||
typename T,
|
||||
hipTextureReadMode readMode>
|
||||
using __hip_tex2dgather_ret_t = typename __hip_tex2dgather_ret<T, readMode, bool>::type;
|
||||
|
||||
template <typename T>
|
||||
struct __hip_tex2dgather_ret<
|
||||
T,
|
||||
hipReadModeElementType,
|
||||
typename std::enable_if<__hip_is_tex_channel_type<T>::value, bool>::type>
|
||||
{
|
||||
using type = HIP_vector_type<T, 4>;
|
||||
};
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int rank>
|
||||
struct __hip_tex2dgather_ret<
|
||||
HIP_vector_type<T, rank>,
|
||||
hipReadModeElementType,
|
||||
typename std::enable_if<__hip_is_tex_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
|
||||
{
|
||||
using type = HIP_vector_type<T, 4>;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct __hip_tex2dgather_ret<
|
||||
T,
|
||||
hipReadModeNormalizedFloat,
|
||||
typename std::enable_if<__hip_is_tex_normalized_channel_type<T>::value, bool>::type>
|
||||
{
|
||||
using type = float4;
|
||||
};
|
||||
|
||||
template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_tex2dgather_ret_t<T, readMode> tex2Dgather(texture<T, hipTextureType2D, readMode> t, float x, float y, int comp=0)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
switch (comp) {
|
||||
case 1: {
|
||||
auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
|
||||
return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
case 2: {
|
||||
auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
|
||||
return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
case 3: {
|
||||
auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
|
||||
return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
default: {
|
||||
auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
|
||||
return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,501 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
||||
#include <hip/hip_vector_types.h>
|
||||
#include <hip/hip_texture_types.h>
|
||||
#include <hip/hcc_detail/ockl_image.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#define TEXTURE_OBJECT_PARAMETERS_INIT \
|
||||
unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \
|
||||
unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD;
|
||||
|
||||
template<typename T>
|
||||
struct __hip_is_itex_channel_type
|
||||
{
|
||||
static constexpr bool value =
|
||||
std::is_same<T, char>::value ||
|
||||
std::is_same<T, unsigned char>::value ||
|
||||
std::is_same<T, short>::value ||
|
||||
std::is_same<T, unsigned short>::value ||
|
||||
std::is_same<T, int>::value ||
|
||||
std::is_same<T, unsigned int>::value ||
|
||||
std::is_same<T, float>::value;
|
||||
};
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int rank>
|
||||
struct __hip_is_itex_channel_type<HIP_vector_type<T, rank>>
|
||||
{
|
||||
static constexpr bool value =
|
||||
__hip_is_itex_channel_type<T>::value &&
|
||||
((rank == 1) ||
|
||||
(rank == 2) ||
|
||||
(rank == 4));
|
||||
};
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex1Dfetch(hipTextureObject_t textureObject, int x)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_load_1Db(i, x);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex1Dfetch(T *ptr, hipTextureObject_t textureObject, int x)
|
||||
{
|
||||
*ptr = tex1Dfetch<T>(textureObject, x);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex1D(hipTextureObject_t textureObject, float x)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_1D(i, s, x);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex1D(T *ptr, hipTextureObject_t textureObject, float x)
|
||||
{
|
||||
*ptr = tex1D<T>(textureObject, x);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex2D(hipTextureObject_t textureObject, float x, float y)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex2D(T *ptr, hipTextureObject_t textureObject, float x, float y)
|
||||
{
|
||||
*ptr = tex2D<T>(textureObject, x, y);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex3D(T *ptr, hipTextureObject_t textureObject, float x, float y, float z)
|
||||
{
|
||||
*ptr = tex3D<T>(textureObject, x, y, z);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex1DLayered(T *ptr, hipTextureObject_t textureObject, float x, int layer)
|
||||
{
|
||||
*ptr = tex1DLayered<T>(textureObject, x, layer);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex2DLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer)
|
||||
{
|
||||
*ptr = tex1DLayered<T>(textureObject, x, y, layer);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T texCubemap(hipTextureObject_t textureObject, float x, float y, float z)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void texCubemap(T *ptr, hipTextureObject_t textureObject, float x, float y, float z)
|
||||
{
|
||||
*ptr = texCubemap<T>(textureObject, x, y, z);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void texCubemapLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer)
|
||||
{
|
||||
*ptr = texCubemapLayered<T>(textureObject, x, y, z, layer);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
switch (comp) {
|
||||
case 1: {
|
||||
auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
break;
|
||||
}
|
||||
};
|
||||
return {};
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex2Dgather(T *ptr, hipTextureObject_t textureObject, float x, float y, int comp = 0)
|
||||
{
|
||||
*ptr = texCubemapLayered<T>(textureObject, x, y, comp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex1DLod(hipTextureObject_t textureObject, float x, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_lod_1D(i, s, x, level);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex1DLod(T *ptr, hipTextureObject_t textureObject, float x, float level)
|
||||
{
|
||||
*ptr = tex1DLod<T>(textureObject, x, level);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex2DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float level)
|
||||
{
|
||||
*ptr = tex2DLod<T>(textureObject, x, y, level);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex3DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level)
|
||||
{
|
||||
*ptr = tex3DLod<T>(textureObject, x, y, z, level);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex1DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, int layer, float level)
|
||||
{
|
||||
*ptr = tex1DLayeredLod<T>(textureObject, x, layer, level);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex2DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float level)
|
||||
{
|
||||
*ptr = tex2DLayeredLod<T>(textureObject, x, y, layer, level);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void texCubemapLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level)
|
||||
{
|
||||
*ptr = texCubemapLod<T>(textureObject, x, y, z, level);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T texCubemapGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
// TODO missing in device libs.
|
||||
// auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
// return *reinterpret_cast<T*>(&tmp);
|
||||
return {};
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void texCubemapGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
*ptr = texCubemapGrad<T>(textureObject, x, y, z, dPdx, dPdy);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void texCubemapLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float level)
|
||||
{
|
||||
*ptr = texCubemapLayeredLod<T>(textureObject, x, y, z, layer, level);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dPdx, float dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex1DGrad(T *ptr, hipTextureObject_t textureObject, float x, float dPdx, float dPdy)
|
||||
{
|
||||
*ptr = tex1DGrad<T>(textureObject, x, dPdx, dPdy);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex2DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy)
|
||||
{
|
||||
*ptr = tex2DGrad<T>(textureObject, x, y, dPdx, dPdy);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex3DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
*ptr = tex3DGrad<T>(textureObject, x, y, z, dPdx, dPdy);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex1DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy)
|
||||
{
|
||||
*ptr = tex1DLayeredGrad<T>(textureObject, x, layer, dPdx, dPdy);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
|
||||
return *reinterpret_cast<T*>(&tmp);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void tex2DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
|
||||
{
|
||||
*ptr = tex2DLayeredGrad<T>(textureObject, x, y, layer, dPdx, dPdy);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ T texCubemapLayeredGrad(hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
// TODO missing in device libs.
|
||||
// auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
// return *reinterpret_cast<T*>(&tmp);
|
||||
return {};
|
||||
}
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ void texCubemapLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
*ptr = texCubemapLayeredGrad<T>(textureObject, x, y, z, layer, dPdx, dPdy);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -109,7 +109,9 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
hipEvent_t stopEvent = nullptr)
|
||||
__attribute__((deprecated("use hipExtModuleLaunchKernel instead")));
|
||||
|
||||
#if !__HIP_VDI__ && defined(__cplusplus)
|
||||
//#if !__HIP_VDI__ && defined(__cplusplus)
|
||||
#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
|
||||
//kernel_descriptor and hip_impl::make_kernarg are in "grid_launch_GGL.hpp"
|
||||
|
||||
namespace hip_impl {
|
||||
inline
|
||||
|
||||
@@ -336,7 +336,7 @@ typedef enum hipDeviceAttribute_t {
|
||||
hipDeviceAttributeTexturePitchAlignment, ///<Pitch alignment requirement for 2D texture references bound to pitched memory;
|
||||
hipDeviceAttributeKernelExecTimeout, ///<Run time limit for kernels executed on the device
|
||||
hipDeviceAttributeCanMapHostMemory, ///<Device can map host memory into device address space
|
||||
hipDeviceAttributeEccEnabled, ///<Device has ECC support enabled
|
||||
hipDeviceAttributeEccEnabled, ///<Device has ECC support enabled
|
||||
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched functions
|
||||
@@ -344,9 +344,8 @@ typedef enum hipDeviceAttribute_t {
|
||||
///devices with unmatched grid dimensions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched block dimensions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched shared memories
|
||||
|
||||
} hipDeviceAttribute_t;
|
||||
|
||||
enum hipComputeMode {
|
||||
|
||||
@@ -26,7 +26,7 @@ set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
|
||||
string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION})
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip-base = ${HIP_BASE_VERSION}")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip-vdi = ${HIP_BASE_VERSION}")
|
||||
set(CPACK_RPM_PACKAGE_OBSOLETES "hip_samples")
|
||||
set(CPACK_RPM_PACKAGE_CONFLICTS "hip_samples")
|
||||
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt")
|
||||
|
||||
Executable
+29
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
pushd () {
|
||||
command pushd "$@" > /dev/null
|
||||
}
|
||||
|
||||
popd () {
|
||||
command popd "$@" > /dev/null
|
||||
}
|
||||
|
||||
|
||||
ROCMDIR=@ROCM_PATH@
|
||||
ROCMLIBDIR=$ROCMDIR/lib
|
||||
HIPDIR=$ROCMDIR/hip
|
||||
HIPLIBDIR=$ROCMDIR/hip/lib
|
||||
|
||||
# Soft-link to library files
|
||||
HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$])
|
||||
mkdir -p $ROCMLIBDIR
|
||||
mkdir -p $ROCMLIBDIR/cmake
|
||||
pushd $ROCMLIBDIR
|
||||
for f in $HIPLIBFILES
|
||||
do
|
||||
ln -s -r -f $HIPLIBDIR/$f $(basename $f)
|
||||
done
|
||||
# Make the hip cmake directory link.
|
||||
pushd cmake
|
||||
ln -s -r -f $HIPLIBDIR/cmake/hip hip
|
||||
popd
|
||||
popd
|
||||
Executable
+31
@@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
|
||||
pushd () {
|
||||
command pushd "$@" > /dev/null
|
||||
}
|
||||
|
||||
popd () {
|
||||
command popd "$@" > /dev/null
|
||||
}
|
||||
|
||||
ROCMDIR=@ROCM_PATH@
|
||||
ROCMLIBDIR=$ROCMDIR/lib
|
||||
HIPDIR=$ROCMDIR/hip
|
||||
HIPLIBDIR=$ROCMDIR/hip/lib
|
||||
([ ! -d $ROCMDIR ] || [ ! -d $HIPDIR ]) && exit 0
|
||||
([ ! -d $ROCMLIBDIR ] || [ ! -d $HIPLIBDIR ]) && exit 0
|
||||
|
||||
# Remove soft-links to libraries
|
||||
HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$])
|
||||
pushd $ROCMLIBDIR
|
||||
for f in $HIPLIBFILES; do
|
||||
[ -e $f ] || continue
|
||||
rm $(basename $f)
|
||||
done
|
||||
pushd cmake
|
||||
unlink hip
|
||||
popd
|
||||
rmdir --ignore-fail-on-non-empty cmake
|
||||
popd
|
||||
rmdir --ignore-fail-on-non-empty $ROCMLIBDIR
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
cmake_minimum_required(VERSION 2.8.3)
|
||||
project(hip_vdi)
|
||||
|
||||
install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64_static.a DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/lib/libhip_hcc.so DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/lib/libhiprtc.so DESTINATION lib)
|
||||
|
||||
install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip)
|
||||
|
||||
#############################
|
||||
# Packaging steps
|
||||
#############################
|
||||
set(CPACK_SET_DESTDIR TRUE)
|
||||
set(CPACK_INSTALL_PREFIX "/opt/rocm/hip")
|
||||
|
||||
## cmake generated target files contains IMPORTED_LOCATION_RELEASE etc. which
|
||||
## is installation path when building the project, which may be different from
|
||||
## the intallation path for packaging. These paths have to be replaced by
|
||||
## the package installation path, otherwise apps using pkg-config will fail.
|
||||
file(GLOB _target_files @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets*.cmake)
|
||||
foreach(_target_file ${_target_files})
|
||||
execute_process(COMMAND sed -i s:@CMAKE_INSTALL_PREFIX@:${CPACK_INSTALL_PREFIX}:g ${_target_file})
|
||||
endforeach()
|
||||
install(FILES ${_target_files} DESTINATION lib/cmake/hip)
|
||||
|
||||
set(CPACK_PACKAGE_NAME "hip-vdi")
|
||||
set(HCC_PACKAGE_NAME "vdi")
|
||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [VDI]")
|
||||
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
|
||||
set(CPACK_PACKAGE_CONTACT "Maneesh Gupta <maneesh.gupta@amd.com>")
|
||||
set(CPACK_PACKAGE_VERSION @HIP_VERSION_MAJOR@.@HIP_VERSION_MINOR@.@HIP_VERSION_PATCH@)
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR @HIP_VERSION_MAJOR@)
|
||||
set(CPACK_PACKAGE_VERSION_MINOR @HIP_VERSION_MINOR@)
|
||||
set(CPACK_PACKAGE_VERSION_PATCH @HIP_VERSION_PATCH@)
|
||||
set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH})
|
||||
set(CPACK_GENERATOR "TGZ;DEB;RPM")
|
||||
set(CPACK_BINARY_DEB "ON")
|
||||
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base (= ${CPACK_PACKAGE_VERSION}), comgr (>= 1.1), llvm-amdgpu")
|
||||
set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_vdi, hip-hcc (= ${CPACK_PACKAGE_VERSION})")
|
||||
set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_vdi")
|
||||
set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_vdi")
|
||||
set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst")
|
||||
set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm")
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
|
||||
string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION})
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base = ${HIP_BASE_VERSION}, comgr >= 1.1, llvm-amdgpu")
|
||||
set(CPACK_RPM_PACKAGE_PROVIDES "hip_vdi, hip-hcc = ${HIP_BASE_VERSION}")
|
||||
set(CPACK_RPM_PACKAGE_OBSOLETES "hip_vdi")
|
||||
set(CPACK_RPM_PACKAGE_CONFLICTS "hip_vdi")
|
||||
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt")
|
||||
set(CPACK_SOURCE_GENERATOR "TGZ")
|
||||
include(CPack)
|
||||
@@ -109,7 +109,7 @@ int main() {
|
||||
/***********************************************************************************/
|
||||
|
||||
//Timing directly the dispatch
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
hipExtLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, start, stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
|
||||
@@ -44,6 +44,8 @@ void multiplyCPU(float* C, float* A, float* B, int N){
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
|
||||
|
||||
void launchKernel(float* C, float* A, float* B, bool manual){
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
@@ -93,8 +95,10 @@ void launchKernel(float* C, float* A, float* B, bool manual){
|
||||
std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int main() {
|
||||
#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
|
||||
float *A, *B, *C0, *C1, *cpuC;
|
||||
float *Ad, *Bd, *C0d, *C1d;
|
||||
int errors=0;
|
||||
@@ -173,4 +177,8 @@ int main() {
|
||||
free(C0);
|
||||
free(C1);
|
||||
free(cpuC);
|
||||
#else
|
||||
std::cout <<"hipOccupancyMaxPotentialBlockSize template not support for Clang compiler"<<std::endl;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
HIP_PATH?= $(wildcard /opt/rocm/hip)
|
||||
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
|
||||
|
||||
HIPPROFILER=/opt/rocm/bin/rocm-profiler
|
||||
PROFILER_OPT=-A -o MT.atp -e HIP_PROFILE_API=1
|
||||
HIPPROFILER_POST_CMD=$(HIP_PATH)/bin/hipdemangleatp MT.atp
|
||||
|
||||
TARGET=hcc
|
||||
|
||||
SOURCES = MatrixTranspose.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
EXECUTABLE=./MatrixTranspose
|
||||
|
||||
.PHONY: test
|
||||
|
||||
|
||||
all: $(EXECUTABLE) profile
|
||||
|
||||
|
||||
|
||||
OPT =-g
|
||||
CXXFLAGS =$(OPT)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
$(EXECUTABLE): $(OBJECTS)
|
||||
$(HIPCC) $(OBJECTS) -o $@
|
||||
|
||||
|
||||
profile: $(EXECUTABLE)
|
||||
$(HIPPROFILER) $(PROFILER_OPT) $(EXECUTABLE)
|
||||
$(HIPPROFILER_POST_CMD)
|
||||
|
||||
|
||||
# Pass option to control start and stop iterations for profiling - see MatrixTranspose.cpp for implementation:
|
||||
# Note we start profiler in --startdisabled mode - no timing collected until app enabled it via hipProfilerStart()
|
||||
profile_trigger: $(EXECUTABLE)
|
||||
$(HIPPROFILER) $(PROFILER_OPT) --startdisabled $(EXECUTABLE) 3 6
|
||||
$(HIPPROFILER_POST_CMD)
|
||||
|
||||
|
||||
run: $(EXECUTABLE)
|
||||
$(EXECUTABLE)
|
||||
|
||||
|
||||
clean:
|
||||
rm -f $(EXECUTABLE)
|
||||
rm -f $(OBJECTS)
|
||||
rm -f $(HIP_PATH)/src/*.o
|
||||
|
||||
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_profile.h"
|
||||
|
||||
#define WIDTH 1024
|
||||
|
||||
#define NUM (WIDTH * WIDTH)
|
||||
|
||||
#define THREADS_PER_BLOCK_X 4
|
||||
#define THREADS_PER_BLOCK_Y 4
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
#define ITERATIONS 10
|
||||
|
||||
// Cmdline parms to control start and stop triggers
|
||||
int startTriggerIteration = -1;
|
||||
int stopTriggerIteration = -1;
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
out[y * width + x] = in[x * width + y];
|
||||
}
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Use a separate function to demonstrate how to use function name as part of scoped marker:
|
||||
void runGPU(float* Matrix, float* TransposeMatrix, float* gpuMatrix, float* gpuTransposeMatrix) {
|
||||
// __func__ is a standard C++ macro which expands to the name of the function, in this case
|
||||
// "runGPU"
|
||||
HIP_SCOPED_MARKER(__func__, "MyGroup");
|
||||
|
||||
for (int i = 0; i < ITERATIONS; i++) {
|
||||
if (i == startTriggerIteration) {
|
||||
hipProfilerStart();
|
||||
}
|
||||
if (i == stopTriggerIteration) {
|
||||
hipProfilerStop();
|
||||
}
|
||||
|
||||
float eventMs = 0.0f;
|
||||
|
||||
hipEvent_t start, stop;
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
|
||||
|
||||
// Record the start event
|
||||
hipEventRecord(start, NULL);
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
|
||||
// Record the stop event
|
||||
hipEventRecord(stop, NULL);
|
||||
hipEventSynchronize(stop);
|
||||
|
||||
hipEventElapsedTime(&eventMs, start, stop);
|
||||
|
||||
printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs);
|
||||
|
||||
// Record the start event
|
||||
hipEventRecord(start, NULL);
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose,
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
|
||||
gpuMatrix, WIDTH);
|
||||
|
||||
// Record the stop event
|
||||
hipEventRecord(stop, NULL);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&eventMs, start, stop);
|
||||
|
||||
printf("kernel Execution time = %6.3fms\n", eventMs);
|
||||
|
||||
// Record the start event
|
||||
hipEventRecord(start, NULL);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
|
||||
// Record the stop event
|
||||
hipEventRecord(stop, NULL);
|
||||
hipEventSynchronize(stop);
|
||||
|
||||
hipEventElapsedTime(&eventMs, start, stop);
|
||||
|
||||
printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc >= 2) {
|
||||
startTriggerIteration = atoi(argv[1]);
|
||||
printf("info : will start tracing at iteration:%d\n", startTriggerIteration);
|
||||
}
|
||||
if (argc >= 3) {
|
||||
stopTriggerIteration = atoi(argv[2]);
|
||||
printf("info : will stop tracing at iteration:%d\n", stopTriggerIteration);
|
||||
}
|
||||
|
||||
float* Matrix;
|
||||
float* TransposeMatrix;
|
||||
float* cpuTransposeMatrix;
|
||||
|
||||
float* gpuMatrix;
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
{
|
||||
// Show example of how to create a "scoped marker".
|
||||
// The scoped marker records the time spent inside the { scope } of the marker - the begin
|
||||
// timestamp is at the beginning of the code scope, and the end is recorded when the SCOPE
|
||||
// exits. This can be viewed in CodeXL timeline relative to other GPU and CPU events. This
|
||||
// marker captures the time spent in setup including host allocation, initialization, and
|
||||
// device memory allocation.
|
||||
HIP_SCOPED_MARKER("Setup", "MyGroup");
|
||||
|
||||
|
||||
Matrix = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
// initialize the input data
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
|
||||
// FYI, the scoped-marker will be destroyed here when the scope exits, and will record its
|
||||
// "end" timestamp.
|
||||
}
|
||||
|
||||
runGPU(Matrix, TransposeMatrix, gpuMatrix, gpuTransposeMatrix);
|
||||
|
||||
|
||||
// show how to use explicit begin/end markers:
|
||||
// We begin the timed region with HIP_BEGIN_MARKER, passing in the markerName and group:
|
||||
// The region will stop when HIP_END_MARKER is called
|
||||
// This is another way to mark begin/end - as an alternative to scoped markers.
|
||||
HIP_BEGIN_MARKER("Check&TearDown", "MyGroup");
|
||||
|
||||
int errors = 0;
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
// verify the results
|
||||
double eps = 1.0E-6;
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
printf("FAILED: %d errors\n", errors);
|
||||
} else {
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
|
||||
// This ends the last marker started in this thread, in this case "Check&TearDown"
|
||||
HIP_END_MARKER();
|
||||
|
||||
return errors;
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
## Using hipEvents to measure performance ###
|
||||
|
||||
This tutorial is follow-up of the previous two tutorial where we learn how to write our first hip program, in which we compute Matrix Transpose and in second one, we added feature to measure time taken for memory transfer and kernel execution. In this tutorial, we'll explain how to use the codexl/rocm-profiler for hip timeline tracing. Also, we will augment the source code with additional markers so we can see the high-level application flow alongside the information that CodeXL automatically collects.
|
||||
|
||||
|
||||
## Introduction:
|
||||
|
||||
CodeXL and rocm-profiler are the tool used for profiling the application, which is of prominent use in optimizing the application by means of finding the memory bottlenecks and etc.
|
||||
|
||||
## Requirement:
|
||||
[CodeXL Installation](http://gpuopen.com/compute-product/codexl/)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
Programmers familiar with CUDA, OpenCL will be able to quickly learn and start coding with the HIP API. In case you are not, don't worry. You choose to start with the best one. We'll be explaining everything assuming you are completely new to gpgpu programming.
|
||||
|
||||
## Simple Matrix Transpose
|
||||
|
||||
We will be using the Simple Matrix Transpose source code from the previous tutorial as it is.
|
||||
|
||||
## Using CodeXL markers for HIP Functions
|
||||
|
||||
HIP can generate markers at function being/end which are displayed on the CodeXL timeline view. To do this, you need to install ROCm-Profiler and enable HIP to generate the markers:
|
||||
|
||||
1. Install ROCm-Profiler Installing HIP from the rocm pre-built packages, installs the ROCm-Profiler as well. Alternatively, you can build ROCm-Profiler using the instructions given below.
|
||||
|
||||
|
||||
2. Run with profiler enabled to generate ATP file.
|
||||
(These steps are also captured in the Makefile)
|
||||
The HIP_PROFILE_API enables display of the HIP APIs on the CodeXL trimeline view.
|
||||
`/opt/rocm/bin/rocm-profiler -o <outputATPFileName> -A <applicationName> -e HIP_PROFILE_API=1 <applicationArguments>`
|
||||
|
||||
##Using HIP_TRACE_API
|
||||
|
||||
You can also print the HIP function strings to stderr using HIP_TRACE_API environment variable. This can also be combined with the more detailed debug information provided by the HIP_DB switch. For example:
|
||||
`HIP_TRACE_API=1 HIP_DB=0x2 ./myHipApp`
|
||||
Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md)
|
||||
@@ -57,12 +57,14 @@ static inline std::uint32_t __convert_float_to_half(float a) noexcept {
|
||||
|
||||
// On machines without fp16 instructions, clang lowers llvm.convert.from.fp16
|
||||
// to call of this function.
|
||||
extern "C" float __gnu_h2f_ieee(unsigned short h){
|
||||
extern "C" __attribute__((visibility("default")))
|
||||
float __gnu_h2f_ieee(unsigned short h){
|
||||
return __convert_half_to_float((std::uint32_t) h);
|
||||
}
|
||||
|
||||
// On machines without fp16 instructions, clang lowers llvm.convert.to.fp16
|
||||
// to call of this function.
|
||||
extern "C" unsigned short __gnu_f2h_ieee(float f){
|
||||
extern "C" __attribute__((visibility("default")))
|
||||
unsigned short __gnu_f2h_ieee(float f){
|
||||
return (unsigned short)__convert_float_to_half(f);
|
||||
}
|
||||
|
||||
+247
-10
@@ -28,6 +28,7 @@ THE SOFTWARE.
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "hip_fatbin.h"
|
||||
#include "trace_helper.h"
|
||||
#include "program_state.inl"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC visibility push (default)
|
||||
@@ -94,8 +95,10 @@ __hipRegisterFatBinary(const void* data)
|
||||
module->executable, agent);
|
||||
|
||||
if (module->executable.handle) {
|
||||
modules->at(deviceId) = module;
|
||||
tprintf(DB_FB, "Loaded code object for %s\n", name);
|
||||
hip_impl::program_state_impl::read_kernarg_metadata(image, module->kernargs);
|
||||
modules->at(deviceId) = module;
|
||||
|
||||
tprintf(DB_FB, "Loaded code object for %s, args size=%ld\n", name, module->kernargs.size());
|
||||
} else {
|
||||
fprintf(stderr, "Failed to load code object for %s\n", name);
|
||||
abort();
|
||||
@@ -157,16 +160,215 @@ extern "C" void __hipRegisterFunction(
|
||||
g_functions.insert(std::make_pair(hostFunction, std::move(functions)));
|
||||
}
|
||||
|
||||
static inline const char* hsa_strerror(hsa_status_t status) {
|
||||
const char* str = nullptr;
|
||||
if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) {
|
||||
return str;
|
||||
}
|
||||
return "Unknown error";
|
||||
}
|
||||
|
||||
struct RegisteredVar {
|
||||
public:
|
||||
RegisteredVar(): size_(0), devicePtr_(nullptr) {}
|
||||
~RegisteredVar() {}
|
||||
|
||||
static inline const char* hsa_strerror(hsa_status_t status) {
|
||||
const char* str = nullptr;
|
||||
if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) {
|
||||
return str;
|
||||
}
|
||||
return "Unknown error";
|
||||
}
|
||||
|
||||
hipDeviceptr_t getdeviceptr() const { return devicePtr_; };
|
||||
size_t getvarsize() const { return size_; };
|
||||
|
||||
size_t size_; // Size of the variable
|
||||
hipDeviceptr_t devicePtr_; //Device Memory Address of the variable.
|
||||
};
|
||||
|
||||
struct DeviceVar {
|
||||
void* shadowVptr;
|
||||
std::string hostVar;
|
||||
size_t size;
|
||||
std::vector<hipModule_t>* modules;
|
||||
std::vector<RegisteredVar> rvars;
|
||||
bool dyn_undef;
|
||||
};
|
||||
|
||||
std::unordered_multimap<std::string, DeviceVar > g_vars;
|
||||
|
||||
//The logic follows PlatformState::getGlobalVar in VDI RT
|
||||
static DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod) {
|
||||
DeviceVar* dvar = nullptr;
|
||||
if (hmod != nullptr) {
|
||||
// If module is provided, then get the var only from that module
|
||||
auto var_range = g_vars.equal_range(hostVar);
|
||||
for (auto it = var_range.first; it != var_range.second; ++it) {
|
||||
if ((*it->second.modules)[deviceId] == hmod) {
|
||||
dvar = &(it->second);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If var count is < 2, return the var
|
||||
if (g_vars.count(hostVar) < 2) {
|
||||
auto it = g_vars.find(hostVar);
|
||||
dvar = ((it == g_vars.end()) ? nullptr : &(it->second));
|
||||
} else {
|
||||
// If var count is > 2, return the original var,
|
||||
// if original var count != 1, return g_vars.end()/Invalid
|
||||
size_t orig_global_count = 0;
|
||||
auto var_range = g_vars.equal_range(hostVar);
|
||||
for (auto it = var_range.first; it != var_range.second; ++it) {
|
||||
// when dyn_undef is set, it is a shadow var
|
||||
if (it->second.dyn_undef == false) {
|
||||
++orig_global_count;
|
||||
dvar = &(it->second);
|
||||
}
|
||||
}
|
||||
dvar = ((orig_global_count == 1) ? dvar : nullptr);
|
||||
}
|
||||
}
|
||||
return dvar;
|
||||
}
|
||||
|
||||
hipError_t ihipGetGlobalVar(hipDeviceptr_t* dev_ptr, size_t* size_ptr,
|
||||
const char* hostVar, hipModule_t hmod) {
|
||||
GET_TLS();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (!ctx) return hipErrorInvalidValue;
|
||||
|
||||
auto device = ctx->getDevice();
|
||||
|
||||
if (!device) return hipErrorInvalidValue;
|
||||
|
||||
ihipDevice_t* currentDevice = ihipGetDevice(device->_deviceId);
|
||||
|
||||
if (!currentDevice) return hipErrorInvalidValue;
|
||||
|
||||
int deviceId = device->_deviceId;
|
||||
|
||||
DeviceVar* dvar = findVar(std::string(hostVar), deviceId, hmod);
|
||||
if (dvar == nullptr) return hipErrorInvalidValue;
|
||||
|
||||
if (dvar->rvars[deviceId].getdeviceptr() == nullptr) return hipErrorInvalidValue;
|
||||
|
||||
*size_ptr = dvar->rvars[deviceId].getvarsize();
|
||||
*dev_ptr = dvar->rvars[deviceId].getdeviceptr();
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
static bool createGlobalVarObj(const hsa_executable_t& hsaExecutable, const hsa_agent_t& hasAgent,
|
||||
const char* global_name, void** device_pptr, size_t* bytes) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_symbol_kind_t sym_type;
|
||||
hsa_executable_symbol_t global_symbol;
|
||||
std::string buildLog;
|
||||
|
||||
/* Find HSA Symbol by name */
|
||||
status = hsa_executable_get_symbol_by_name(hsaExecutable, global_name, &hasAgent,
|
||||
&global_symbol);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog += "Error: Failed to find the Symbol by Name: ";
|
||||
buildLog += hsa_strerror(status);
|
||||
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Find HSA Symbol Type */
|
||||
status = hsa_executable_symbol_get_info(global_symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE,
|
||||
&sym_type);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog += "Error: Failed to find the Symbol Type : ";
|
||||
buildLog += hsa_strerror(status);
|
||||
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Make sure symbol type is VARIABLE */
|
||||
if (sym_type != HSA_SYMBOL_KIND_VARIABLE) {
|
||||
buildLog += "Error: Symbol is not of type VARIABLE : ";
|
||||
buildLog += hsa_strerror(status);
|
||||
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Retrieve the size of the variable */
|
||||
status = hsa_executable_symbol_get_info(global_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, bytes);
|
||||
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog += "Error: Failed to retrieve the Symbol Size : ";
|
||||
buildLog += hsa_strerror(status);
|
||||
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Find HSA Symbol Address */
|
||||
status = hsa_executable_symbol_get_info(global_symbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, device_pptr);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog += "Error: Failed to find the Symbol Address : ";
|
||||
buildLog += hsa_strerror(status);
|
||||
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
|
||||
return false;
|
||||
} else {
|
||||
tprintf(DB_FB, "createGlobalVarObj: var %s : device=%p, size=%zu\n", global_name, *device_pptr, *bytes);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Registers a device-side global variable.
|
||||
// For each global variable in device code, there is a corresponding shadow
|
||||
// global variable in host code. The shadow host variable is used to keep
|
||||
// track of the value of the device side global variable between kernel
|
||||
// executions.
|
||||
// The basic logic is taken from VDI RT, but there is much difference.
|
||||
extern "C" void __hipRegisterVar(
|
||||
std::vector<hipModule_t>* modules,
|
||||
char* hostVar,
|
||||
char* deviceVar,
|
||||
const char* deviceName,
|
||||
int ext,
|
||||
int size,
|
||||
int constant,
|
||||
int global)
|
||||
std::vector<hipModule_t>* modules, // The device modules containing code object
|
||||
char* var, // The shadow variable in host code
|
||||
char* hostVar, // Variable name in host code
|
||||
const char* deviceVar, // Variable name in device code
|
||||
int ext, // Whether this variable is external
|
||||
int size, // Size of the variable
|
||||
int constant, // Whether this variable is constant
|
||||
int global) // Unknown, always 0
|
||||
{
|
||||
HIP_INIT_API(__hipRegisterVar, modules, var, hostVar, deviceVar, ext, size, constant, global);
|
||||
|
||||
DeviceVar dvar{var, std::string{ hostVar }, static_cast<size_t>(size), modules,
|
||||
std::vector<RegisteredVar>{ g_deviceCnt }, false };
|
||||
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; deviceId++) {
|
||||
auto device = ihipGetDevice(deviceId);
|
||||
if(!device) {
|
||||
continue;
|
||||
}
|
||||
hsa_executable_t& executable = (*modules)[deviceId]->executable;
|
||||
hsa_agent_t& agent = g_allAgents[deviceId + 1];
|
||||
size_t bytes = 0;
|
||||
hipDeviceptr_t devicePtr = nullptr;
|
||||
|
||||
bool success = createGlobalVarObj(executable, agent, hostVar, &devicePtr, &bytes);
|
||||
if(!success) {
|
||||
return;
|
||||
}
|
||||
dvar.rvars[deviceId].devicePtr_ = devicePtr;
|
||||
dvar.rvars[deviceId].size_ = bytes;
|
||||
|
||||
hc::AmPointerInfo ptrInfo(nullptr, devicePtr, devicePtr, bytes, device->_acc, true, false);
|
||||
hc::am_memtracker_add(devicePtr, ptrInfo);
|
||||
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
hc::am_memtracker_update(devicePtr, device->_deviceId, 0u, ihipGetTlsDefaultCtx());
|
||||
#else
|
||||
hc::am_memtracker_update(devicePtr, device->_deviceId, 0u);
|
||||
#endif
|
||||
}
|
||||
g_vars.insert(std::make_pair(std::string(hostVar), dvar));
|
||||
}
|
||||
|
||||
extern "C" void __hipUnregisterFatBinary(std::vector<hipModule_t>* modules)
|
||||
@@ -226,6 +428,41 @@ extern "C" hipError_t __hipPopCallConfiguration(
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
int getCurrentDeviceId()
|
||||
{
|
||||
GET_TLS();
|
||||
|
||||
int deviceId = 0;
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if(!ctx) return deviceId;
|
||||
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
|
||||
if(crit->_execStack.size() != 0)
|
||||
{
|
||||
auto &exec = crit->_execStack.top();
|
||||
|
||||
if (exec._hStream) {
|
||||
deviceId = exec._hStream->getDevice()->_deviceId;
|
||||
} else if (ctx->getDevice()) {
|
||||
deviceId = ctx->getDevice()->_deviceId;
|
||||
}
|
||||
} else if (ctx->getDevice()) {
|
||||
deviceId = ctx->getDevice()->_deviceId;
|
||||
}
|
||||
return deviceId;
|
||||
}
|
||||
|
||||
hipFunction_t ihipGetDeviceFunction(const void *hostFunction)
|
||||
{
|
||||
int deviceId = getCurrentDeviceId();
|
||||
auto it = g_functions.find(hostFunction);
|
||||
if (it == g_functions.end() || !it->second[deviceId]) {
|
||||
return nullptr;
|
||||
}
|
||||
return it->second[deviceId];
|
||||
}
|
||||
|
||||
hipError_t hipSetupArgument(
|
||||
const void *arg,
|
||||
|
||||
@@ -33,7 +33,7 @@ THE SOFTWARE.
|
||||
#include "hip_prof_api.h"
|
||||
#include "hip_util.h"
|
||||
#include "env.h"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#if (__hcc_workweek__ < 16354)
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
@@ -1009,6 +1009,18 @@ hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod,
|
||||
hipStream_t ihipSyncAndResolveStream(hipStream_t, bool lockAcquired = 0);
|
||||
hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream);
|
||||
|
||||
/**
|
||||
* @brief Copies the memory address and size of symbol @p symbolName
|
||||
*
|
||||
* @param[in] symbolName - Symbol on device
|
||||
* @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol
|
||||
* @param[out] size - Pointer to the size of the symbol
|
||||
* @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound, #hipErrorInvalidValue
|
||||
*
|
||||
*/
|
||||
hipError_t ihipGetGlobalVar(hipDeviceptr_t* dev_ptr, size_t* size_ptr, const char* hostVar,
|
||||
hipModule_t hmod = nullptr);
|
||||
|
||||
// Stream printf functions:
|
||||
inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s) {
|
||||
os << "stream:";
|
||||
@@ -1080,4 +1092,14 @@ static inline ihipCtx_t* iihipGetTlsDefaultCtx(TlsData* tls) {
|
||||
return tls->defaultCtx;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get device function from host kernel function pointer
|
||||
* Needed only for clang + HIP-HCC RT
|
||||
*
|
||||
* @param [in] hostFunction host kernel function pointer
|
||||
*
|
||||
* @returns hipFuntion_t, nullptr
|
||||
*/
|
||||
hipFunction_t ihipGetDeviceFunction(const void *hostFunction);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -344,6 +344,8 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList
|
||||
if (kds[i] == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
if (!kds[i]->_kernarg_layout.empty()) continue;
|
||||
|
||||
hip_impl::kernargs_size_align kargs = ps.get_kernargs_size_align(
|
||||
reinterpret_cast<std::uintptr_t>(lp.func));
|
||||
kds[i]->_kernarg_layout = *reinterpret_cast<const std::vector<std::pair<std::size_t, std::size_t>>*>(
|
||||
@@ -397,6 +399,14 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList
|
||||
return result;
|
||||
}
|
||||
|
||||
__attribute__((visibility("default")))
|
||||
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags) {
|
||||
HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags);
|
||||
auto& ps = hip_impl::get_program_state();
|
||||
return ihipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags, ps);
|
||||
}
|
||||
|
||||
void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size_t* usedLDS)
|
||||
{
|
||||
if (f->_is_code_object_v3) {
|
||||
@@ -736,7 +746,6 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
mg_sync *mg_sync_ptr = 0;
|
||||
vector<mg_info *> mg_info_ptr;
|
||||
|
||||
|
||||
result = hip_internal::ihipHostMalloc(tls, (void **)&mg_sync_ptr, sizeof(mg_sync), hipHostMallocDefault, true);
|
||||
if (result != hipSuccess) {
|
||||
return hipErrorInvalidValue;
|
||||
@@ -1091,7 +1100,12 @@ namespace hip_impl {
|
||||
|
||||
hipError_t agent_globals::read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
|
||||
const char* name) {
|
||||
return impl->read_agent_global_from_process(dptr, bytes, name);
|
||||
hipError_t result = impl->read_agent_global_from_process(dptr, bytes, name);
|
||||
if(result != hipSuccess) {
|
||||
// For Clang Compiler + Hcc Rt
|
||||
result = ihipGetGlobalVar(dptr, bytes, name);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // Namespace hip_impl.
|
||||
@@ -1259,19 +1273,34 @@ hipError_t ihipModuleGetFunction(TlsData *tls, hipFunction_t* func, hipModule_t
|
||||
if (!*func) return hipErrorInvalidValue;
|
||||
|
||||
std::string name_str(name);
|
||||
std::string namekd_str(name_str + ".kd");
|
||||
bool kernel_by_namekd = false;
|
||||
|
||||
auto kernel = find_kernel_by_name(hmod->executable, name_str.c_str(), agent);
|
||||
|
||||
if (kernel.handle == 0u) {
|
||||
name_str.append(".kd");
|
||||
kernel = find_kernel_by_name(hmod->executable, name_str.c_str(), agent);
|
||||
kernel_by_namekd = true; //Find kernel by namekd_str
|
||||
kernel = find_kernel_by_name(hmod->executable, namekd_str.c_str(), agent);
|
||||
}
|
||||
|
||||
if (kernel.handle == 0u) return hipErrorNotFound;
|
||||
|
||||
//For hipModuleLoad(), hmod->kernargs must contain an args with key
|
||||
//name_str or namekd_str.
|
||||
//For hipLaunchKernelGGL(), hmod->kernargs is empty, thus we need
|
||||
//insert hmod->kernargs[name_str]
|
||||
auto it = hmod->kernargs.find(name_str); //Look up args from the original name
|
||||
if (it == hmod->kernargs.end()) {
|
||||
it = hmod->kernargs.find(namekd_str); //Look up args from .kd name
|
||||
}
|
||||
|
||||
// TODO: refactor the whole ihipThisThat, which is a mess and yields the
|
||||
// below, due to hipFunction_t being a pointer to ihipModuleSymbol_t.
|
||||
|
||||
func[0][0] = *static_cast<hipFunction_t>(
|
||||
Kernel_descriptor{kernel_object(kernel), name_str, hmod->kernargs[name_str]});
|
||||
Kernel_descriptor{kernel_object(kernel),
|
||||
kernel_by_namekd ? namekd_str : name_str,
|
||||
it != hmod->kernargs.end() ? it->second : hmod->kernargs[name_str]});
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
@@ -250,7 +250,7 @@ struct _hiprtcProgram {
|
||||
|
||||
const auto it{find_if(reader.sections.begin(), reader.sections.end(),
|
||||
[](const section* x) {
|
||||
return x->get_name() == ".kernel";
|
||||
return (x->get_name() == ".hip_fatbin") || (x->get_name() == ".kernel");
|
||||
})};
|
||||
|
||||
if (it == reader.sections.end()) return false;
|
||||
@@ -513,7 +513,7 @@ extern "C" hiprtcResult hiprtcCompileProgram(hiprtcProgram p, int n, const char*
|
||||
|
||||
const auto src{p->writeTemporaryFiles(tmp.path())};
|
||||
|
||||
vector<string> args{hipcc, "-shared"};
|
||||
vector<string> args{hipcc, "-fPIC -shared"};
|
||||
if (n) args.insert(args.cend(), o, o + n);
|
||||
|
||||
handleTarget(args);
|
||||
|
||||
@@ -19,6 +19,8 @@
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
#include <amd_comgr.h>
|
||||
#include "hc.hpp"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include <link.h>
|
||||
|
||||
@@ -734,6 +736,27 @@ public:
|
||||
!= AMD_COMGR_STATUS_SUCCESS)
|
||||
return;
|
||||
|
||||
//Look up “.value_kind” to decide whether to ignore it
|
||||
//See http://llvm.org/docs/AMDGPUUsage.html#code-object-v3-metadata-mattr-code-object-v3
|
||||
amd_comgr_metadata_node_t arg_value_kind_md;
|
||||
if (amd_comgr_metadata_lookup(arg_md, ".value_kind", &arg_value_kind_md)
|
||||
!= AMD_COMGR_STATUS_SUCCESS)
|
||||
return;
|
||||
|
||||
std::string arg_value_kind{ metadata_to_string(arg_value_kind_md) };
|
||||
|
||||
if (amd_comgr_destroy_metadata(arg_value_kind_md)
|
||||
!= AMD_COMGR_STATUS_SUCCESS)
|
||||
return;
|
||||
|
||||
if (arg_value_kind.find("hidden_") == 0) {
|
||||
if (amd_comgr_destroy_metadata(arg_md)
|
||||
!= AMD_COMGR_STATUS_SUCCESS)
|
||||
return;
|
||||
|
||||
continue; //Ignore hidden arg
|
||||
}
|
||||
|
||||
amd_comgr_metadata_node_t arg_size_md;
|
||||
if (amd_comgr_metadata_lookup(arg_md, ".size", &arg_size_md)
|
||||
!= AMD_COMGR_STATUS_SUCCESS)
|
||||
@@ -937,14 +960,16 @@ public:
|
||||
|
||||
auto it0 = get_functions(agent).find(function_address);
|
||||
|
||||
if (it0 == get_functions(agent).cend()) {
|
||||
hip_throw(std::runtime_error{
|
||||
if (it0 != get_functions(agent).cend()) return it0->second;
|
||||
|
||||
// For hip-clang compiler + Hcc RT
|
||||
hipFunction_t f = ihipGetDeviceFunction((const void*)function_address);
|
||||
if (f) return reinterpret_cast<Kernel_descriptor&>(*f);
|
||||
|
||||
hip_throw(std::runtime_error{
|
||||
"No device code available for function: " +
|
||||
std::string(name(function_address)) +
|
||||
", for agent: " + name(agent)});
|
||||
}
|
||||
|
||||
return it0->second;
|
||||
}
|
||||
|
||||
const std::vector<std::pair<std::size_t, std::size_t>>&
|
||||
|
||||
@@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 2.8.3)
|
||||
project(hip_tests)
|
||||
|
||||
# Setup
|
||||
set(HIP_PATH @CMAKE_INSTALL_PREFIX@)
|
||||
set(ENV{HIP_PATH} ${HIP_PATH})
|
||||
#set(HIP_PATH @CMAKE_INSTALL_PREFIX@)
|
||||
#set(ENV{HIP_PATH} ${HIP_PATH})
|
||||
set(HIP_SRC_PATH @hip_SOURCE_DIR@)
|
||||
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
|
||||
include(${HIP_SRC_PATH}/tests/hit/HIT.cmake)
|
||||
|
||||
@@ -109,11 +109,11 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt
|
||||
elseif(_link_options_found)
|
||||
list(APPEND ${_link_options} ${arg})
|
||||
elseif(_exclude_platforms_found)
|
||||
set(${_exclude_platforms} ${arg})
|
||||
list(APPEND ${_exclude_platforms} ${arg})
|
||||
elseif(_exclude_runtime_found)
|
||||
set(${_exclude_runtime} ${arg})
|
||||
list(APPEND ${_exclude_runtime} ${arg})
|
||||
elseif(_exclude_compiler_found)
|
||||
set(${_exclude_compiler} ${arg})
|
||||
list(APPEND ${_exclude_compiler} ${arg})
|
||||
elseif(_depends_found)
|
||||
list(APPEND ${_depends} ${arg})
|
||||
else()
|
||||
@@ -160,11 +160,11 @@ macro(PARSE_CUSTOMBUILD_COMMAND _target _buildcmd _exclude_platforms _exclude_ru
|
||||
set(_depends_found TRUE)
|
||||
else()
|
||||
if(_exclude_platforms_found)
|
||||
set(${_exclude_platforms} ${arg})
|
||||
list(APPEND ${_exclude_platforms} ${arg})
|
||||
elseif(_exclude_runtime_found)
|
||||
set(${_exclude_runtime} ${arg})
|
||||
list(APPEND ${_exclude_runtime} ${arg})
|
||||
elseif(_exclude_compiler_found)
|
||||
set(${_exclude_compiler} ${arg})
|
||||
list(APPEND ${_exclude_compiler} ${arg})
|
||||
elseif(_depends_found)
|
||||
list(APPEND ${_depends} ${arg})
|
||||
else()
|
||||
@@ -203,11 +203,11 @@ macro(PARSE_TEST_COMMAND _target _arguments _exclude_platforms _exclude_runtime
|
||||
set(_exclude_compiler_found TRUE)
|
||||
else()
|
||||
if(_exclude_platforms_found)
|
||||
set(${_exclude_platforms} ${arg})
|
||||
list(APPEND ${_exclude_platforms} ${arg})
|
||||
elseif(_exclude_runtime_found)
|
||||
set(${_exclude_runtime} ${arg})
|
||||
list(APPEND ${_exclude_runtime} ${arg})
|
||||
elseif(_exclude_compiler_found)
|
||||
set(${_exclude_compiler} ${arg})
|
||||
list(APPEND ${_exclude_compiler} ${arg})
|
||||
else()
|
||||
list(APPEND ${_arguments} ${arg})
|
||||
endif()
|
||||
@@ -246,11 +246,11 @@ macro(PARSE_TEST_NAMED_COMMAND _target _testname _arguments _exclude_platforms _
|
||||
set(_exclude_compiler_found TRUE)
|
||||
else()
|
||||
if(_exclude_platforms_found)
|
||||
set(${_exclude_platforms} ${arg})
|
||||
list(APPEND ${_exclude_platforms} ${arg})
|
||||
elseif(_exclude_runtime_found)
|
||||
set(${_exclude_runtime} ${arg})
|
||||
list(APPEND ${_exclude_runtime} ${arg})
|
||||
elseif(_exclude_compiler_found)
|
||||
set(${_exclude_compiler} ${arg})
|
||||
list(APPEND ${_exclude_compiler} ${arg})
|
||||
else()
|
||||
list(APPEND ${_arguments} ${arg})
|
||||
endif()
|
||||
@@ -297,13 +297,13 @@ macro(HIT_ADD_FILES _dir _label _parent)
|
||||
string(REGEX REPLACE " " ";" _cmd "${_cmd}")
|
||||
parse_build_command(_target _sources _hipcc_options _hcc_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _depends ${_dir} ${_cmd})
|
||||
string(REGEX REPLACE "/" "." target ${_label}/${_target})
|
||||
if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM})
|
||||
if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms)
|
||||
insert_into_map("_exclude" "${target}" TRUE)
|
||||
elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER})
|
||||
elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler)
|
||||
insert_into_map("_exclude" "${target}" TRUE)
|
||||
elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME})
|
||||
elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime)
|
||||
insert_into_map("_exclude" "${target}" TRUE)
|
||||
elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER})
|
||||
elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND ${HIP_COMPILER} IN_LIST _exclude_compiler)
|
||||
insert_into_map("_exclude" "${target}" TRUE)
|
||||
else()
|
||||
set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
@@ -336,13 +336,13 @@ macro(HIT_ADD_FILES _dir _label _parent)
|
||||
string(REGEX REPLACE " " ";" _cmd "${_cmd}")
|
||||
parse_custombuild_command(_target _buildcmd _exclude_platforms _exclude_runtime _exclude_compiler _depends ${_cmd})
|
||||
string(REGEX REPLACE "/" "." target ${_label}/${_target})
|
||||
if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM})
|
||||
if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms)
|
||||
insert_into_map("_exclude" "${target}" TRUE)
|
||||
elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER})
|
||||
elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler)
|
||||
insert_into_map("_exclude" "${target}" TRUE)
|
||||
elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME})
|
||||
elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime)
|
||||
insert_into_map("_exclude" "${target}" TRUE)
|
||||
elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER})
|
||||
elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND ${HIP_COMPILER} IN_LIST _exclude_compiler)
|
||||
insert_into_map("_exclude" "${target}" TRUE)
|
||||
else()
|
||||
string(REGEX REPLACE ";" " " _buildcmd "${_buildcmd}")
|
||||
@@ -370,10 +370,10 @@ macro(HIT_ADD_FILES _dir _label _parent)
|
||||
parse_test_command(_target _arguments _exclude_platforms _exclude_runtime _exclude_compiler ${_cmd})
|
||||
string(REGEX REPLACE "/" "." target ${_label}/${_target})
|
||||
read_from_map("_exclude" "${target}" _exclude_test_from_build)
|
||||
if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM})
|
||||
elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER})
|
||||
elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME})
|
||||
elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER})
|
||||
if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms)
|
||||
elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler)
|
||||
elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime)
|
||||
elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler)
|
||||
elseif(_exclude_test_from_build STREQUAL TRUE)
|
||||
else()
|
||||
make_test(${_label}/${_target} ${_arguments})
|
||||
@@ -392,10 +392,10 @@ macro(HIT_ADD_FILES _dir _label _parent)
|
||||
parse_test_named_command(_target _testname _arguments _exclude_platforms _exclude_runtime _exclude_compiler ${_cmd})
|
||||
string(REGEX REPLACE "/" "." target ${_label}/${_target})
|
||||
read_from_map("_exclude" "${target}" _exclude_test_from_build)
|
||||
if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM})
|
||||
elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER})
|
||||
elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME})
|
||||
elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER})
|
||||
if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms)
|
||||
elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler)
|
||||
elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime)
|
||||
elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler)
|
||||
elseif(_exclude_test_from_build STREQUAL TRUE)
|
||||
else()
|
||||
make_named_test(${_label}/${_target} ${_label}/${_testname}.tst ${_arguments})
|
||||
|
||||
@@ -18,7 +18,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -18,7 +18,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -22,7 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
/* HIT_START
|
||||
* BUILD_CMD: libfoo_amd %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD_CMD: libfoo_nvidia %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM hcc
|
||||
* BUILD_CMD: libfoo_nvidia %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM hcc vdi
|
||||
* BUILD_CMD: %t %hc %S/%s -o %T/%t -ldl
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
|
||||
@@ -19,10 +19,10 @@
|
||||
|
||||
|
||||
/* HIT_START
|
||||
* BUILD_CMD: gpu.o %hc -I%hip-path/include -g -c %S/gpu.cpp -o %T/gpu.o EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD_CMD: launchkernel.o %cc -D__HIP_PLATFORM_HCC__ -g -I%hip-path/include -c %S/LaunchKernel.c -o %T/launchkernel.o EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD_CMD: LaunchKernel %hc %T/launchkernel.o %T/gpu.o -g -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/%t DEPENDS gpu.o launchkernel.o EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD_CMD: gpu.o %hc -I%hip-path/include -g -c %S/gpu.cpp -o %T/gpu.o EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* BUILD_CMD: launchkernel.o %hc -D__HIP_PLATFORM_HCC__ -g -I%hip-path/include -c %S/LaunchKernel.c -o %T/launchkernel.o EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* BUILD_CMD: LaunchKernel %hc %T/launchkernel.o %T/gpu.o -g -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/%t DEPENDS gpu.o launchkernel.o EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -36,7 +36,7 @@ bool LaunchKernelArg()
|
||||
dim3 blocks = {1,1,1};
|
||||
dim3 threads = {1,1,1};
|
||||
|
||||
HIPCHECK(hipLaunchKernel(kernel, blocks, threads, NULL, 0, 0));
|
||||
HIPCHECK(hipLaunchKernel((const void *)kernel, blocks, threads, NULL, 0, 0));
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -52,7 +52,7 @@ bool LaunchKernelArg1()
|
||||
HIPCHECK(hipMalloc((void**)&A_d, sizeof(int)));
|
||||
|
||||
void* Args[]={&A_d};
|
||||
HIPCHECK(hipLaunchKernel(kernel1, blocks, threads, Args, 0, 0));
|
||||
HIPCHECK(hipLaunchKernel((const void *)kernel1, blocks, threads, Args, 0, 0));
|
||||
|
||||
// Get the result back to host memory
|
||||
HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost));
|
||||
@@ -84,7 +84,7 @@ bool LaunchKernelArg2()
|
||||
HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice));
|
||||
|
||||
void* Args[]={&A_d, &B_d};
|
||||
HIPCHECK(hipLaunchKernel(kernel2, blocks, threads, Args,0,0));
|
||||
HIPCHECK(hipLaunchKernel((const void *)kernel2, blocks, threads, Args,0,0));
|
||||
|
||||
// Get the result back to host memory
|
||||
HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost));
|
||||
@@ -123,7 +123,7 @@ bool LaunchKernelArg3()
|
||||
HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice));
|
||||
|
||||
void* Args[]={&A_d, &B_d, &C_d};
|
||||
HIPCHECK(hipLaunchKernel(kernel3, blocks, threads, Args,0,0));
|
||||
HIPCHECK(hipLaunchKernel((const void *)kernel3, blocks, threads, Args,0,0));
|
||||
|
||||
// Get the result back to host memory
|
||||
HIPCHECK(hipMemcpy(&C, C_d, sizeof(int), hipMemcpyDeviceToHost));
|
||||
@@ -154,7 +154,7 @@ bool LaunchKernelArg4()
|
||||
struct things t = {2,20,200};
|
||||
|
||||
void* Args[]={&A_d, &c, &s, &i, &t};
|
||||
HIPCHECK(hipLaunchKernel(kernel4, blocks, threads, Args, 0, 0));
|
||||
HIPCHECK(hipLaunchKernel((const void *)kernel4, blocks, threads, Args, 0, 0));
|
||||
|
||||
// Get the result back to host memory
|
||||
HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
* */
|
||||
|
||||
/* HIT_START
|
||||
* BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_NVCC__ -I%hip-path/include -I/usr/local/cuda/include %S/%s -o %T/hipMalloc_nv -L/usr/local/cuda/lib64 -lcudart EXCLUDE_HIP_PLATFORM hcc
|
||||
* BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_HCC__ -I%hip-path/include %S/%s -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: hipMalloc_nv EXCLUDE_HIP_PLATFORM hcc
|
||||
* TEST: hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_NVCC__ -I%hip-path/include -I/usr/local/cuda/include %S/%s -o %T/hipMalloc_nv -L/usr/local/cuda/lib64 -lcudart EXCLUDE_HIP_PLATFORM hcc vdi
|
||||
* BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_HCC__ -I%hip-path/include %S/%s -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* TEST: hipMalloc_nv EXCLUDE_HIP_PLATFORM hcc vdi
|
||||
* TEST: hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -143,7 +143,7 @@ int main()
|
||||
hipMemcpyDtoH(hOut.get(), dOut, bufferSize);
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
if (a * hX[i] + hY[i] != hOut[i]) { failed("Validation failed."); }
|
||||
if (fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i])* 1e-6) { failed("Validation failed."); }
|
||||
}
|
||||
|
||||
hipFree(dX);
|
||||
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM all
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM all
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ THE SOFTWARE.
|
||||
|
||||
void test(size_t N) {
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
|
||||
@@ -51,6 +51,7 @@ void test(size_t N) {
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
#endif
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
||||
@@ -24,9 +24,9 @@ THE SOFTWARE.
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM hcc
|
||||
* TEST: %t --memcpyWithPeer EXCLUDE_HIP_PLATFORM hcc
|
||||
* TEST: %t --mirrorPeers EXCLUDE_HIP_PLATFORM hcc
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM hcc vdi
|
||||
* TEST: %t --memcpyWithPeer EXCLUDE_HIP_PLATFORM hcc vdi
|
||||
* TEST: %t --mirrorPeers EXCLUDE_HIP_PLATFORM hcc vdi
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "printf_common.h"
|
||||
|
||||
__global__ void test_kernel() {
|
||||
printf("%#o\n", 042);
|
||||
printf("%#x\n", 0x42);
|
||||
printf("%#X\n", 0x42);
|
||||
printf("%#08x\n", 0x42);
|
||||
printf("%#f\n", -123.456);
|
||||
printf("%#F\n", 123.456);
|
||||
printf("%#e\n", 123.456);
|
||||
printf("%#E\n", -123.456);
|
||||
printf("%#g\n", -123.456);
|
||||
printf("%#G\n", 123.456);
|
||||
printf("%#a\n", 123.456);
|
||||
printf("%#A\n", -123.456);
|
||||
printf("%#.8x\n", 0x42);
|
||||
printf("%#16.8x\n", 0x42);
|
||||
printf("%-#16.8x\n", 0x42);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
std::string reference(R"here(042
|
||||
0x42
|
||||
0X42
|
||||
0x000042
|
||||
-123.456000
|
||||
123.456000
|
||||
1.234560e+02
|
||||
-1.234560E+02
|
||||
-123.456
|
||||
123.456
|
||||
0x1.edd2f1a9fbe77p+6
|
||||
-0X1.EDD2F1A9FBE77P+6
|
||||
0x00000042
|
||||
0x00000042
|
||||
0x00000042
|
||||
)here");
|
||||
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
std::string device_output = gulp(CapturedData);
|
||||
|
||||
HIPASSERT(device_output == reference);
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,275 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "printf_common.h"
|
||||
#include <vector>
|
||||
|
||||
// Global string constants don't work inside device functions, so we
|
||||
// use a macro to repeat the declaration in host and device contexts.
|
||||
DECLARE_DATA();
|
||||
|
||||
__global__ void kernel_uniform0(int *retval) {
|
||||
uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
retval[tid] = printf("Hello World\n");
|
||||
}
|
||||
|
||||
static void test_uniform0(int *retval, uint num_blocks,
|
||||
uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
for (uint i = 0; i != num_threads; ++i) {
|
||||
retval[i] = 0x23232323;
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(kernel_uniform0, dim3(num_blocks), dim3(threads_per_block),
|
||||
0, 0, retval);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
for (uint ii = 0; ii != num_threads; ++ii) {
|
||||
HIPASSERT(retval[ii] == strlen("Hello World\n"));
|
||||
}
|
||||
|
||||
std::map<std::string, int> linecount;
|
||||
for (std::string line; std::getline(CapturedData, line);) {
|
||||
linecount[line]++;
|
||||
}
|
||||
|
||||
HIPASSERT(linecount.size() == 1);
|
||||
HIPASSERT(linecount["Hello World"] == num_threads);
|
||||
}
|
||||
|
||||
__global__ void kernel_uniform1(int *retval) {
|
||||
uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
retval[tid] = printf("Six times Eight is %d\n", 42);
|
||||
}
|
||||
|
||||
static void test_uniform1(int *retval, uint num_blocks,
|
||||
uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
for (uint i = 0; i != num_threads; ++i) {
|
||||
retval[i] = 0x23232323;
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(kernel_uniform1, dim3(num_blocks), dim3(threads_per_block),
|
||||
0, 0, retval);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
for (uint ii = 0; ii != num_threads; ++ii) {
|
||||
HIPASSERT(retval[ii] == strlen("Six times Eight is 42") + 1);
|
||||
}
|
||||
|
||||
std::map<std::string, int> linecount;
|
||||
for (std::string line; std::getline(CapturedData, line);) {
|
||||
linecount[line]++;
|
||||
}
|
||||
|
||||
HIPASSERT(linecount.size() == 1);
|
||||
HIPASSERT(linecount["Six times Eight is 42"] == num_threads);
|
||||
}
|
||||
|
||||
__global__ void kernel_divergent0(int *retval) {
|
||||
uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
retval[tid] = printf("Thread ID: %d\n", tid);
|
||||
}
|
||||
|
||||
static void test_divergent0(int *retval, uint num_blocks,
|
||||
uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
for (uint i = 0; i != num_threads; ++i) {
|
||||
retval[i] = 0x23232323;
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(kernel_divergent0, dim3(num_blocks),
|
||||
dim3(threads_per_block), 0, 0, retval);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
for (uint ii = 0; ii != 10; ++ii) {
|
||||
HIPASSERT(retval[ii] == 13);
|
||||
}
|
||||
|
||||
for (uint ii = 10; ii != num_threads; ++ii) {
|
||||
HIPASSERT(retval[ii] == 14);
|
||||
}
|
||||
|
||||
std::vector<uint> threadIds;
|
||||
for (std::string line; std::getline(CapturedData, line);) {
|
||||
auto pos = line.find(':');
|
||||
HIPASSERT(line.substr(0, pos) == "Thread ID");
|
||||
threadIds.push_back(std::stoul(line.substr(pos + 2)));
|
||||
}
|
||||
|
||||
std::sort(threadIds.begin(), threadIds.end());
|
||||
HIPASSERT(threadIds.size() == num_threads);
|
||||
HIPASSERT(threadIds.back() == num_threads - 1);
|
||||
}
|
||||
|
||||
__global__ void kernel_divergent1(int *retval) {
|
||||
uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
if (tid % 2) {
|
||||
retval[tid] = printf("Hello World\n");
|
||||
} else {
|
||||
retval[tid] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_divergent1(int *retval, uint num_blocks,
|
||||
uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
for (uint i = 0; i != num_threads; ++i) {
|
||||
retval[i] = 0x23232323;
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(kernel_divergent1, dim3(num_blocks),
|
||||
dim3(threads_per_block), 0, 0, retval);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
for (uint ii = 0; ii != num_threads; ++ii) {
|
||||
if (ii % 2) {
|
||||
HIPASSERT(retval[ii] == strlen("Hello World\n"));
|
||||
} else {
|
||||
HIPASSERT(retval[ii] == -1);
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, int> linecount;
|
||||
for (std::string line; std::getline(CapturedData, line);) {
|
||||
linecount[line]++;
|
||||
}
|
||||
|
||||
HIPASSERT(linecount.size() == 1);
|
||||
HIPASSERT(linecount["Hello World"] == num_threads / 2);
|
||||
}
|
||||
|
||||
__global__ void kernel_series(int *retval) {
|
||||
DECLARE_DATA();
|
||||
|
||||
const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int result = 0;
|
||||
|
||||
result += printf("%s\n", msg_long1);
|
||||
result += printf("%s\n", msg_short);
|
||||
result += printf("%s\n", msg_long2);
|
||||
|
||||
retval[tid] = result;
|
||||
}
|
||||
|
||||
static void test_series(int *retval, uint num_blocks, uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
for (uint i = 0; i != num_threads; ++i) {
|
||||
retval[i] = 0x23232323;
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(kernel_series, dim3(num_blocks), dim3(threads_per_block),
|
||||
0, 0, retval);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
for (uint ii = 0; ii != num_threads; ++ii) {
|
||||
HIPASSERT(retval[ii] ==
|
||||
strlen(msg_long1) + strlen(msg_short) + strlen(msg_long2) + 3);
|
||||
}
|
||||
|
||||
std::map<std::string, int> linecount;
|
||||
for (std::string line; std::getline(CapturedData, line);) {
|
||||
linecount[line]++;
|
||||
}
|
||||
|
||||
HIPASSERT(linecount.size() == 3);
|
||||
HIPASSERT(linecount[msg_long1] == num_threads);
|
||||
HIPASSERT(linecount[msg_long2] == num_threads);
|
||||
HIPASSERT(linecount[msg_short] == num_threads);
|
||||
}
|
||||
|
||||
__global__ void kernel_divergent_loop() {
|
||||
DECLARE_DATA();
|
||||
|
||||
const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int result = 0;
|
||||
|
||||
for (int i = 0; i <= tid; ++i) {
|
||||
printf("%d\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_divergent_loop(uint num_blocks, uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
|
||||
hipLaunchKernelGGL(kernel_divergent_loop, dim3(num_blocks), dim3(threads_per_block),
|
||||
0, 0);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
std::map<int, int> count;
|
||||
while (true) {
|
||||
int i;
|
||||
CapturedData >> i;
|
||||
if (CapturedData.fail())
|
||||
break;
|
||||
count[i]++;
|
||||
}
|
||||
|
||||
HIPASSERT(count.size() == num_threads);
|
||||
for (int i = 0; i != num_threads; ++i) {
|
||||
HIPASSERT(count[i] == num_threads - i);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
uint num_blocks = 1;
|
||||
uint threads_per_block = 64;
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
|
||||
void *retval_void;
|
||||
HIPCHECK(hipHostMalloc(&retval_void, 4 * num_threads));
|
||||
auto retval = reinterpret_cast<int *>(retval_void);
|
||||
|
||||
test_uniform0(retval, num_blocks, threads_per_block);
|
||||
test_uniform1(retval, num_blocks, threads_per_block);
|
||||
test_divergent0(retval, num_blocks, threads_per_block);
|
||||
test_divergent1(retval, num_blocks, threads_per_block);
|
||||
test_series(retval, num_blocks, threads_per_block);
|
||||
test_divergent_loop(num_blocks, threads_per_block);
|
||||
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "printf_common.h"
|
||||
|
||||
__global__ void test_kernel() {
|
||||
printf("%08d\n", 42);
|
||||
printf("%08i\n", -42);
|
||||
printf("%08u\n", 42);
|
||||
printf("%08g\n", 123.456);
|
||||
printf("%0+8d\n", 42);
|
||||
printf("%+d\n", -42);
|
||||
printf("%+08d\n", 42);
|
||||
printf("%-8s\n", "xyzzy");
|
||||
printf("% i\n", -42);
|
||||
printf("%-16.8d\n", 42);
|
||||
printf("%16.8d\n", 42);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
std::string reference(R"here(00000042
|
||||
-0000042
|
||||
00000042
|
||||
0123.456
|
||||
+0000042
|
||||
-42
|
||||
+0000042
|
||||
xyzzy
|
||||
-42
|
||||
00000042
|
||||
00000042
|
||||
)here");
|
||||
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
std::string device_output = gulp(CapturedData);
|
||||
|
||||
HIPASSERT(device_output == reference);
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "printf_common.h"
|
||||
|
||||
DECLARE_DATA();
|
||||
|
||||
__global__ void print_things() {
|
||||
DECLARE_DATA();
|
||||
|
||||
uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
const char *msg[] = {msg_short, msg_long1, msg_long2};
|
||||
|
||||
printf("%s\n", msg[tid % 3]);
|
||||
if (tid % 3 == 0)
|
||||
printf("%s\n", msg_short);
|
||||
printf("%s\n", msg[(tid + 1) % 3]);
|
||||
printf("%s\n", msg[(tid + 2) % 3]);
|
||||
}
|
||||
|
||||
int main() {
|
||||
uint num_blocks = 14;
|
||||
uint threads_per_block = 250;
|
||||
uint threads_per_device = num_blocks * threads_per_block;
|
||||
|
||||
int num_devices = 0;
|
||||
hipGetDeviceCount(&num_devices);
|
||||
|
||||
CaptureStream captured(stdout);
|
||||
for (int i = 0; i != num_devices; ++i) {
|
||||
hipSetDevice(i);
|
||||
hipLaunchKernelGGL(print_things, dim3(num_blocks), dim3(threads_per_block),
|
||||
0, 0);
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
std::map<std::string, int> linecount;
|
||||
for (std::string line; std::getline(CapturedData, line);) {
|
||||
linecount[line]++;
|
||||
}
|
||||
|
||||
uint num_threads = threads_per_device * num_devices;
|
||||
HIPASSERT(linecount.size() == 3);
|
||||
HIPASSERT(linecount[msg_long1] == num_threads);
|
||||
HIPASSERT(linecount[msg_long2] == num_threads);
|
||||
HIPASSERT(linecount[msg_short] ==
|
||||
num_threads + ((threads_per_device + 2) / 3) * num_devices);
|
||||
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,301 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "printf_common.h"
|
||||
#include <vector>
|
||||
|
||||
// Global string constants don't work inside device functions, so we
|
||||
// use a macro to repeat the declaration in host and device contexts.
|
||||
DECLARE_DATA();
|
||||
|
||||
__global__ void kernel_mixed0(int *retval) {
|
||||
DECLARE_DATA();
|
||||
|
||||
uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
ulong result = 0;
|
||||
|
||||
// Three strings passed as divergent values to the same hostcall.
|
||||
const char *msg;
|
||||
switch (tid % 3) {
|
||||
case 0:
|
||||
msg = msg_short;
|
||||
break;
|
||||
case 1:
|
||||
msg = msg_long1;
|
||||
break;
|
||||
case 2:
|
||||
msg = msg_long2;
|
||||
break;
|
||||
}
|
||||
|
||||
retval[tid] = printf("%s\n", msg);
|
||||
}
|
||||
|
||||
static void test_mixed0(int *retval, uint num_blocks, uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
for (uint i = 0; i != num_threads; ++i) {
|
||||
retval[i] = 0x23232323;
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(kernel_mixed0, dim3(num_blocks), dim3(threads_per_block),
|
||||
0, 0, retval);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
for (uint ii = 0; ii != num_threads; ++ii) {
|
||||
switch (ii % 3) {
|
||||
case 0:
|
||||
HIPASSERT(retval[ii] == strlen(msg_short) + 1);
|
||||
break;
|
||||
case 1:
|
||||
HIPASSERT(retval[ii] == strlen(msg_long1) + 1);
|
||||
break;
|
||||
case 2:
|
||||
HIPASSERT(retval[ii] == strlen(msg_long2) + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, int> linecount;
|
||||
for (std::string line; std::getline(CapturedData, line);) {
|
||||
linecount[line]++;
|
||||
}
|
||||
|
||||
HIPASSERT(linecount.size() == 3);
|
||||
HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3);
|
||||
HIPASSERT(linecount[msg_long1] == (num_threads + 1) / 3);
|
||||
HIPASSERT(linecount[msg_long2] == (num_threads + 0) / 3);
|
||||
}
|
||||
|
||||
__global__ void kernel_mixed1(int *retval) {
|
||||
DECLARE_DATA();
|
||||
|
||||
const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
|
||||
// Three strings passed to divergent hostcalls.
|
||||
switch (tid % 3) {
|
||||
case 0:
|
||||
retval[tid] = printf("%s\n", msg_short);
|
||||
break;
|
||||
case 1:
|
||||
retval[tid] = printf("%s\n", msg_long1);
|
||||
break;
|
||||
case 2:
|
||||
retval[tid] = printf("%s\n", msg_long2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_mixed1(int *retval, uint num_blocks, uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
for (uint i = 0; i != num_threads; ++i) {
|
||||
retval[i] = 0x23232323;
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(kernel_mixed1, dim3(num_blocks), dim3(threads_per_block),
|
||||
0, 0, retval);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
for (uint ii = 0; ii != num_threads; ++ii) {
|
||||
switch (ii % 3) {
|
||||
case 0:
|
||||
HIPASSERT(retval[ii] == strlen(msg_short) + 1);
|
||||
break;
|
||||
case 1:
|
||||
HIPASSERT(retval[ii] == strlen(msg_long1) + 1);
|
||||
break;
|
||||
case 2:
|
||||
HIPASSERT(retval[ii] == strlen(msg_long2) + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, int> linecount;
|
||||
for (std::string line; std::getline(CapturedData, line);) {
|
||||
linecount[line]++;
|
||||
}
|
||||
|
||||
HIPASSERT(linecount.size() == 3);
|
||||
HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3);
|
||||
HIPASSERT(linecount[msg_long1] == (num_threads + 1) / 3);
|
||||
HIPASSERT(linecount[msg_long2] == (num_threads + 0) / 3);
|
||||
}
|
||||
|
||||
__global__ void kernel_mixed2(int *retval) {
|
||||
DECLARE_DATA();
|
||||
|
||||
const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
|
||||
// Three different strings. All workitems print all three, but
|
||||
// in different orders.
|
||||
const char *msg[] = {msg_short, msg_long1, msg_long2};
|
||||
retval[tid] =
|
||||
printf("%s%s%s\n", msg[tid % 3], msg[(tid + 1) % 3], msg[(tid + 2) % 3]);
|
||||
}
|
||||
|
||||
static void test_mixed2(int *retval, uint num_blocks, uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
for (uint i = 0; i != num_threads; ++i) {
|
||||
retval[i] = 0x23232323;
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(kernel_mixed2, dim3(num_blocks), dim3(threads_per_block),
|
||||
0, 0, retval);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
for (uint ii = 0; ii != num_threads; ++ii) {
|
||||
HIPASSERT(retval[ii] ==
|
||||
strlen(msg_short) + strlen(msg_long1) + strlen(msg_long2) + 1);
|
||||
}
|
||||
|
||||
std::map<std::string, int> linecount;
|
||||
for (std::string line; std::getline(CapturedData, line);) {
|
||||
linecount[line]++;
|
||||
}
|
||||
|
||||
std::string str1 =
|
||||
std::string(msg_short) + std::string(msg_long1) + std::string(msg_long2);
|
||||
std::string str2 =
|
||||
std::string(msg_long1) + std::string(msg_long2) + std::string(msg_short);
|
||||
std::string str3 =
|
||||
std::string(msg_long2) + std::string(msg_short) + std::string(msg_long1);
|
||||
|
||||
HIPASSERT(linecount.size() == 3);
|
||||
HIPASSERT(linecount[str1] == (num_threads + 2) / 3);
|
||||
HIPASSERT(linecount[str2] == (num_threads + 1) / 3);
|
||||
HIPASSERT(linecount[str3] == (num_threads + 0) / 3);
|
||||
}
|
||||
|
||||
__global__ void kernel_mixed3(int *retval) {
|
||||
DECLARE_DATA();
|
||||
|
||||
const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int result = 0;
|
||||
|
||||
result += printf("%s\n", msg_long1);
|
||||
if (tid % 3 == 0) {
|
||||
result += printf("%s\n", msg_short);
|
||||
}
|
||||
result += printf("%s\n", msg_long2);
|
||||
|
||||
retval[tid] = result;
|
||||
}
|
||||
|
||||
static void test_mixed3(int *retval, uint num_blocks, uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
for (uint i = 0; i != num_threads; ++i) {
|
||||
retval[i] = 0x23232323;
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(kernel_mixed3, dim3(num_blocks), dim3(threads_per_block),
|
||||
0, 0, retval);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
for (uint ii = 0; ii != num_threads; ++ii) {
|
||||
if (ii % 3 == 0) {
|
||||
HIPASSERT(retval[ii] ==
|
||||
strlen(msg_long1) + strlen(msg_short) + strlen(msg_long2) + 3);
|
||||
} else {
|
||||
HIPASSERT(retval[ii] == strlen(msg_long1) + strlen(msg_long2) + 2);
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, int> linecount;
|
||||
for (std::string line; std::getline(CapturedData, line);) {
|
||||
linecount[line]++;
|
||||
}
|
||||
|
||||
HIPASSERT(linecount.size() == 3);
|
||||
HIPASSERT(linecount[msg_long1] == num_threads);
|
||||
HIPASSERT(linecount[msg_long2] == num_threads);
|
||||
HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3);
|
||||
}
|
||||
|
||||
__global__ void kernel_numbers() {
|
||||
uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
for (uint i = 0; i != 7; ++i) {
|
||||
uint base = tid * 21 + i * 3;
|
||||
printf("%d %d %d\n", base, base + 1, base + 2);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_numbers(uint num_blocks, uint threads_per_block) {
|
||||
CaptureStream captured(stdout);
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
|
||||
hipLaunchKernelGGL(kernel_numbers, dim3(num_blocks), dim3(threads_per_block),
|
||||
0, 0);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
|
||||
std::vector<uint> points;
|
||||
while (true) {
|
||||
uint i;
|
||||
CapturedData >> i;
|
||||
if (CapturedData.fail())
|
||||
break;
|
||||
points.push_back(i);
|
||||
}
|
||||
|
||||
std::sort(points.begin(), points.end());
|
||||
points.erase(std::unique(points.begin(), points.end()), points.end());
|
||||
HIPASSERT(points.size() == 21 * num_threads);
|
||||
HIPASSERT(points.back() == 21 * num_threads - 1);
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
uint num_blocks = 150;
|
||||
uint threads_per_block = 250;
|
||||
uint num_threads = num_blocks * threads_per_block;
|
||||
|
||||
void *retval_void;
|
||||
HIPCHECK(hipHostMalloc(&retval_void, 4 * num_threads));
|
||||
auto retval = reinterpret_cast<int *>(retval_void);
|
||||
|
||||
test_mixed0(retval, num_blocks, threads_per_block);
|
||||
test_mixed1(retval, num_blocks, threads_per_block);
|
||||
test_mixed2(retval, num_blocks, threads_per_block);
|
||||
test_mixed3(retval, num_blocks, threads_per_block);
|
||||
test_numbers(num_blocks, threads_per_block);
|
||||
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "printf_common.h"
|
||||
|
||||
__global__ void test_kernel() {
|
||||
const char *N = nullptr;
|
||||
const char *s = "hello world";
|
||||
|
||||
printf("xyzzy\n");
|
||||
printf("%%\n");
|
||||
printf("hello %% world\n");
|
||||
printf("%%s\n");
|
||||
// Two special tests to make sure that the compiler pass correctly
|
||||
// skips over a '%%' without affecting the logic for locating
|
||||
// string arguments.
|
||||
printf("%%s%p\n", (void *)0xf01dab1eca55e77e);
|
||||
printf("%%c%s\n", "xyzzy");
|
||||
printf("%c%c%c\n", 's', 'e', 'p');
|
||||
printf("%d\n", -42);
|
||||
printf("%u\n", 42);
|
||||
printf("%f\n", 123.456);
|
||||
printf("%F\n", -123.456);
|
||||
printf("%e\n", -123.456);
|
||||
printf("%E\n", 123.456);
|
||||
printf("%g\n", 123.456);
|
||||
printf("%G\n", -123.456);
|
||||
printf("%c\n", 'x');
|
||||
printf("%s\n", N);
|
||||
printf("%p\n", N);
|
||||
printf("%.*f %*.*s %p\n", 8, 3.14159, 8, 5, s, (void *)0xf01dab1eca55e77e);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
std::string reference(R"here(xyzzy
|
||||
%
|
||||
hello % world
|
||||
%s
|
||||
%s0xf01dab1eca55e77e
|
||||
%cxyzzy
|
||||
sep
|
||||
-42
|
||||
42
|
||||
123.456000
|
||||
-123.456000
|
||||
-1.234560e+02
|
||||
1.234560E+02
|
||||
123.456
|
||||
-123.456
|
||||
x
|
||||
|
||||
(nil)
|
||||
3.14159000 hello 0xf01dab1eca55e77e
|
||||
)here");
|
||||
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
std::string device_output = gulp(CapturedData);
|
||||
|
||||
HIPASSERT(device_output == reference);
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "printf_common.h"
|
||||
|
||||
__global__ void test_kernel() {
|
||||
printf("%*d\n", 16, 42);
|
||||
printf("%.*d\n", 8, 42);
|
||||
printf("%*.*d\n", -16, 8, 42);
|
||||
printf("%*.*f %s * %.*s\n", 16, 8, 123.456, "hello", 5, "worldxyz");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
std::string reference(R"here( 42
|
||||
00000042
|
||||
00000042
|
||||
123.45600000 hello * world
|
||||
)here");
|
||||
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
std::string device_output = gulp(CapturedData);
|
||||
|
||||
HIPASSERT(device_output == reference);
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "printf_common.h"
|
||||
|
||||
__global__ void test_kernel() {
|
||||
printf("%16d\n", 42);
|
||||
printf("%.8d\n", 42);
|
||||
printf("%16.5d\n", -42);
|
||||
printf("%.8x\n", 0x42);
|
||||
printf("%.8o\n", 042);
|
||||
printf("%16.8e\n", 12345.67891);
|
||||
printf("%16.8f\n", -12345.67891);
|
||||
printf("%16.8g\n", 12345.67891);
|
||||
printf("%8.4e\n", -12345.67891);
|
||||
printf("%8.4f\n", 12345.67891);
|
||||
printf("%8.4g\n", 12345.67891);
|
||||
printf("%4.2f\n", 12345.67891);
|
||||
printf("%.1f\n", 12345.67891);
|
||||
printf("%.5s\n", "helloxyz");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
std::string reference(R"here( 42
|
||||
00000042
|
||||
-00042
|
||||
00000042
|
||||
00000042
|
||||
1.23456789e+04
|
||||
-12345.67891000
|
||||
12345.679
|
||||
-1.2346e+04
|
||||
12345.6789
|
||||
1.235e+04
|
||||
12345.68
|
||||
12345.7
|
||||
hello
|
||||
)here");
|
||||
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0);
|
||||
hipStreamSynchronize(0);
|
||||
auto CapturedData = captured.getCapturedData();
|
||||
std::string device_output = gulp(CapturedData);
|
||||
|
||||
HIPASSERT(device_output == reference);
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#include <errno.h>
|
||||
#include <error.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
|
||||
struct CaptureStream {
|
||||
int saved_fd;
|
||||
int orig_fd;
|
||||
int temp_fd;
|
||||
|
||||
char tempname[13] = "mytestXXXXXX";
|
||||
|
||||
CaptureStream(FILE *original) {
|
||||
orig_fd = fileno(original);
|
||||
saved_fd = dup(orig_fd);
|
||||
|
||||
temp_fd = mkstemp(tempname);
|
||||
if (errno) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
|
||||
fflush(nullptr);
|
||||
dup2(temp_fd, orig_fd);
|
||||
if (errno) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
close(temp_fd);
|
||||
if (errno) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
void restoreStream() {
|
||||
if (saved_fd == -1)
|
||||
return;
|
||||
fflush(nullptr);
|
||||
dup2(saved_fd, orig_fd);
|
||||
if (errno) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
close(saved_fd);
|
||||
if (errno) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
saved_fd = -1;
|
||||
}
|
||||
|
||||
std::ifstream getCapturedData() {
|
||||
restoreStream();
|
||||
std::ifstream temp(tempname);
|
||||
return temp;
|
||||
}
|
||||
|
||||
~CaptureStream() {
|
||||
restoreStream();
|
||||
remove(tempname);
|
||||
if (errno) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static std::string gulp(std::ifstream &input) {
|
||||
std::string retval;
|
||||
input.seekg(0, std::ios_base::end);
|
||||
retval.resize(input.tellg());
|
||||
input.seekg(0, std::ios_base::beg);
|
||||
input.read(&retval[0], retval.size());
|
||||
input.close();
|
||||
return retval;
|
||||
}
|
||||
|
||||
#define DECLARE_DATA() \
|
||||
const char *msg_short = "Carpe diem."; \
|
||||
const char *msg_long1 = "Lorem ipsum dolor sit amet, consectetur nullam. " \
|
||||
"In mollis imperdiet nibh nec ullamcorper."; \
|
||||
const char *msg_long2 = "Curabitur nec metus sit amet augue vehicula " \
|
||||
"ultrices ut id leo. Lorem ipsum dolor sit amet, " \
|
||||
"consectetur adipiscing elit amet.";
|
||||
|
||||
#endif
|
||||
@@ -24,7 +24,7 @@ THE SOFTWARE.
|
||||
// forces synchronization : set
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* TEST: %t --iterations 10
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
* */
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -60,16 +60,16 @@ int main() {
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
HIPCHECK(hipStreamCreate(&s));
|
||||
HIPCHECK(hipSetDevice(1));
|
||||
HIPCHECK(hipStreamCreate(&s));
|
||||
HIPCHECK(hipMemcpyPeerAsync(X_d, 1, A_d, 0, Nbytes, s));
|
||||
HIPCHECK(hipMemcpyPeerAsync(Y_d, 1, B_d, 0, Nbytes, s));
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
static_cast<const int*>(X_d), static_cast<const int*>(Y_d), Z_d, N);
|
||||
HIPCHECK(hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HIPCHECK(hipStreamSynchronize(s));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
HIPCHECK(hipStreamDestroy(s));
|
||||
|
||||
@@ -26,17 +26,19 @@ THE SOFTWARE.
|
||||
* BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11
|
||||
* TEST: %t EXCLUDE_HIP_PLATFORM all
|
||||
* HIT_END
|
||||
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#include <hc_am.hpp>
|
||||
#endif
|
||||
|
||||
#define USE_HCC_MEMTRACKER 0 /* Debug flag to show the memtracker periodically */
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_VDI__)
|
||||
#include <hc_am.hpp>
|
||||
#else
|
||||
#define USE_HCC_MEMTRACKER 0
|
||||
#endif
|
||||
|
||||
int elementSizes[] = {1, 16, 1024, 524288, 16 * 1000 * 1000};
|
||||
int nSizes = sizeof(elementSizes) / sizeof(int);
|
||||
@@ -201,7 +203,8 @@ int main(int argc, char* argv[]) {
|
||||
};
|
||||
|
||||
for (int index = 0; index < nSizes; index++) {
|
||||
testMultiGpu(dev0, dev1, elementSizes[index], false /*GPU Synchronization*/);
|
||||
//ToDo: Enable when verified on all platforms
|
||||
//testMultiGpu(dev0, dev1, elementSizes[index], false /*GPU Synchronization*/);
|
||||
testMultiGpu(dev0, dev1, elementSizes[index], true /*Host Synchronization*/);
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ THE SOFTWARE.
|
||||
|
||||
/* HIT_START
|
||||
* BUILD_CMD: matmul.code %hc --genco %S/matmul.cpp -o matmul.code EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -20,7 +20,7 @@ THE SOFTWARE.
|
||||
// Simple test for hipLaunchCooperativeKernelMultiDevice API.
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -178,8 +178,6 @@ int main() {
|
||||
|
||||
hipLaunchCooperativeKernelMultiDevice(launchParamsList, nGpu, 0);
|
||||
|
||||
HIPCHECK(hipMemcpy(init, dC, sizeof(long), hipMemcpyDeviceToHost));
|
||||
|
||||
if (*dC != (((long)(BufferSizeInDwords) * (BufferSizeInDwords - 1)) / 2)) {
|
||||
std::cout << "Data validation failed for grid size = " << dimGrid.x << " and block size = " << dimBlock.x << "\n";
|
||||
std::cout << "Test failed! \n";
|
||||
|
||||
@@ -22,7 +22,7 @@ THE SOFTWARE.
|
||||
// Simple test for hipLaunchCooperativeKernel API.
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -18,7 +18,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11
|
||||
* BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -33,6 +33,9 @@ THE SOFTWARE.
|
||||
|
||||
#define fileName "tex2d_kernel.code"
|
||||
|
||||
#if __HIP__
|
||||
__hip_pinned_shadow__
|
||||
#endif
|
||||
texture<float, 2, hipReadModeElementType> tex;
|
||||
bool testResult = false;
|
||||
|
||||
|
||||
@@ -21,11 +21,15 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code
|
||||
* BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code EXCLUDE_HIP_PLATFORM vdi
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#if __HIP__
|
||||
__hip_pinned_shadow__
|
||||
#endif
|
||||
extern texture<float, 2, hipReadModeElementType> tex;
|
||||
|
||||
extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) {
|
||||
|
||||
+1
-1
@@ -22,7 +22,7 @@ THE SOFTWARE.
|
||||
// Test the Grid_Launch syntax.
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#include "test_common.h"
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11
|
||||
* BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -55,11 +55,15 @@ THE SOFTWARE.
|
||||
printf("%sPASSED!%s\n", KGRN, KNRM); \
|
||||
exit(0);
|
||||
|
||||
// The real "assert" would have written to stderr. But it is
|
||||
// sufficient to just fflush here without getting pedantic. This also
|
||||
// ensures that we don't lose any earlier writes to stdout.
|
||||
#define failed(...) \
|
||||
printf("%serror: ", KRED); \
|
||||
printf(__VA_ARGS__); \
|
||||
printf("\n"); \
|
||||
printf("error: TEST FAILED\n%s", KNRM); \
|
||||
fflush(NULL); \
|
||||
abort();
|
||||
|
||||
#define warn(...) \
|
||||
|
||||
@@ -18,7 +18,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -22,7 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc hcc vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -30,78 +30,92 @@ THE SOFTWARE.
|
||||
#define SIZE 10
|
||||
|
||||
static float getNormalizedValue(const float value,
|
||||
const enum hipArray_Format texFormat) {
|
||||
switch (texFormat) {
|
||||
case HIP_AD_FORMAT_SIGNED_INT8:
|
||||
return (value / SCHAR_MAX);
|
||||
case HIP_AD_FORMAT_UNSIGNED_INT8:
|
||||
return (value / UCHAR_MAX);
|
||||
case HIP_AD_FORMAT_SIGNED_INT16:
|
||||
return (value / SHRT_MAX);
|
||||
case HIP_AD_FORMAT_UNSIGNED_INT16:
|
||||
return (value / USHRT_MAX);
|
||||
default:
|
||||
return value;
|
||||
}
|
||||
const hipChannelFormatDesc& desc) {
|
||||
if ((desc.x == 8) && (desc.f == hipChannelFormatKindSigned))
|
||||
return (value / SCHAR_MAX);
|
||||
if ((desc.x == 8) && (desc.f == hipChannelFormatKindUnsigned))
|
||||
return (value / UCHAR_MAX);
|
||||
if ((desc.x == 16) && (desc.f == hipChannelFormatKindSigned))
|
||||
return (value / SHRT_MAX);
|
||||
if ((desc.x == 16) && (desc.f == hipChannelFormatKindUnsigned))
|
||||
return (value / USHRT_MAX);
|
||||
return value;
|
||||
}
|
||||
|
||||
#if __HIP__
|
||||
__hip_pinned_shadow__
|
||||
#endif
|
||||
texture<float, hipTextureType1D, hipReadModeElementType> textureNormalizedVal_1D;
|
||||
texture<char, hipTextureType1D, hipReadModeNormalizedFloat> texc;
|
||||
|
||||
#if __HIP__
|
||||
__hip_pinned_shadow__
|
||||
#endif
|
||||
texture<unsigned char, hipTextureType1D, hipReadModeNormalizedFloat> texuc;
|
||||
|
||||
#if __HIP__
|
||||
__hip_pinned_shadow__
|
||||
#endif
|
||||
texture<short, hipTextureType1D, hipReadModeNormalizedFloat> texs;
|
||||
|
||||
#if __HIP__
|
||||
__hip_pinned_shadow__
|
||||
#endif
|
||||
texture<unsigned short, hipTextureType1D, hipReadModeNormalizedFloat> texus;
|
||||
|
||||
|
||||
template<typename T>
|
||||
__global__ void normalizedValTextureTest(unsigned int numElements, float* pDst)
|
||||
{
|
||||
unsigned int elementID = hipThreadIdx_x;
|
||||
if(elementID >= numElements)
|
||||
return;
|
||||
float coord =(float) elementID/(numElements-1);
|
||||
pDst[elementID] = tex1D(textureNormalizedVal_1D, coord);
|
||||
return;
|
||||
float coord =(float) elementID/numElements;
|
||||
if(std::is_same<T, char>::value)
|
||||
pDst[elementID] = tex1D(texc, coord);
|
||||
else if(std::is_same<T, unsigned char>::value)
|
||||
pDst[elementID] = tex1D(texuc, coord);
|
||||
else if(std::is_same<T, short>::value)
|
||||
pDst[elementID] = tex1D(texs, coord);
|
||||
else if(std::is_same<T, unsigned short>::value)
|
||||
pDst[elementID] = tex1D(texus, coord);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool textureTest(enum hipArray_Format texFormat)
|
||||
bool textureTest(texture<T, hipTextureType1D, hipReadModeNormalizedFloat> *tex)
|
||||
{
|
||||
T hData[] = {65, 66, 67, 68, 69, 70, 71, 72,73,74};
|
||||
T *dData = NULL;
|
||||
HIPCHECK(hipMalloc((void **) &dData, sizeof(T)*SIZE));
|
||||
HIPCHECK(hipMemcpyHtoD((hipDeviceptr_t)dData, hData, sizeof(T)*SIZE));
|
||||
textureReference* texRef = &textureNormalizedVal_1D;
|
||||
HIPCHECK(hipTexRefSetAddressMode(texRef, 0, hipAddressModeClamp));
|
||||
HIPCHECK(hipTexRefSetAddressMode(texRef, 1, hipAddressModeClamp));
|
||||
HIPCHECK(hipTexRefSetFilterMode(texRef, hipFilterModePoint));
|
||||
HIPCHECK(hipTexRefSetFlags(texRef, HIP_TRSF_NORMALIZED_COORDINATES));
|
||||
HIPCHECK(hipTexRefSetFormat(texRef, texFormat, 1));
|
||||
|
||||
HIP_ARRAY_DESCRIPTOR desc;
|
||||
desc.Width = SIZE;
|
||||
desc.Height = 1;
|
||||
desc.Format = texFormat;
|
||||
desc.NumChannels = 1;
|
||||
HIPCHECK(hipTexRefSetAddress2D(texRef, &desc, (hipDeviceptr_t)dData, sizeof(T)*SIZE));
|
||||
|
||||
bool testResult = true;
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
|
||||
hipArray_t dData;
|
||||
HIPCHECK(hipMallocArray(&dData, &desc, SIZE, 1, hipArrayDefault));
|
||||
|
||||
T hData[] = {65, 66, 67, 68, 69, 70, 71, 72, 73, 74};
|
||||
HIPCHECK(hipMemcpy2DToArray(dData, 0, 0, hData, sizeof(T)*SIZE, sizeof(T)*SIZE, 1, hipMemcpyHostToDevice));
|
||||
|
||||
tex->normalized = true;
|
||||
tex->channelDesc = desc;
|
||||
HIPCHECK(hipBindTextureToArray(tex, dData, &desc));
|
||||
|
||||
float *dOutputData = NULL;
|
||||
HIPCHECK(hipMalloc((void **) &dOutputData, sizeof(float)*SIZE));
|
||||
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(normalizedValTextureTest), dim3(1,1,1), dim3(SIZE,1,1), 0, 0, SIZE, dOutputData);
|
||||
|
||||
hipLaunchKernelGGL(normalizedValTextureTest<T>, dim3(1,1,1), dim3(SIZE,1,1), 0, 0, SIZE, dOutputData);
|
||||
|
||||
float *hOutputData = new float[SIZE];
|
||||
HIPCHECK(hipMemcpyDtoH(hOutputData, (hipDeviceptr_t)dOutputData, (sizeof(float)*SIZE)));
|
||||
|
||||
HIPCHECK(hipMemcpy(hOutputData, dOutputData, (sizeof(float)*SIZE), hipMemcpyDeviceToHost));
|
||||
|
||||
bool testResult = true;
|
||||
for(int i = 0; i < SIZE; i++)
|
||||
{
|
||||
float expected = getNormalizedValue(float(hData[i]), texFormat);
|
||||
float expected = getNormalizedValue(float(hData[i]), desc);
|
||||
if(expected != hOutputData[i])
|
||||
{
|
||||
printf("mismatch at index:%d for texType:%d output:%f\n",i,texFormat,hOutputData[i]);
|
||||
printf("mismatch at index:%d output:%f expected:%f\n",i,hOutputData[i],expected);
|
||||
testResult = false;
|
||||
break;
|
||||
break;
|
||||
}
|
||||
}
|
||||
hipFree(dData);
|
||||
hipFree(dOutputData);
|
||||
hipUnbindTexture(textureNormalizedVal_1D);
|
||||
|
||||
HIPCHECK(hipFreeArray(dData));
|
||||
HIPCHECK(hipFree(dOutputData));
|
||||
delete [] hOutputData;
|
||||
return testResult;
|
||||
}
|
||||
@@ -118,12 +132,11 @@ int main(int argc, char** argv)
|
||||
std::cout << "Arch - AMD GPU :: " << props.gcnArch << std::endl;
|
||||
#endif
|
||||
|
||||
status &= textureTest<char> (HIP_AD_FORMAT_SIGNED_INT8);
|
||||
status &= textureTest<unsigned char> (HIP_AD_FORMAT_UNSIGNED_INT8);
|
||||
status &= textureTest<short> (HIP_AD_FORMAT_SIGNED_INT16);
|
||||
status &= textureTest<unsigned short>(HIP_AD_FORMAT_UNSIGNED_INT16);
|
||||
status &= textureTest<float> (HIP_AD_FORMAT_FLOAT);
|
||||
|
||||
status &= textureTest<char> (&texc);
|
||||
status &= textureTest<unsigned char> (&texuc);
|
||||
status &= textureTest<short> (&texs);
|
||||
status &= textureTest<unsigned short>(&texus);
|
||||
|
||||
if(status){
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -17,8 +17,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user