diff --git a/projects/hip/CONTRIBUTING.md b/projects/hip/CONTRIBUTING.md index 7bc946c576..685683e697 100644 --- a/projects/hip/CONTRIBUTING.md +++ b/projects/hip/CONTRIBUTING.md @@ -110,11 +110,6 @@ executing: ../../test/myocyte/run0.cmd... PASSED! executing: ../../test/nn/run0.cmd... PASSED! --TESTING: nw executing: ../../test/nw/run0.cmd... PASSED! ---TESTING: particlefilter -executing: ../../test/particlefilter/run0.cmd... *** Error in `./particlefilter_naive': free(): invalid next size (fast): 0x0000000001ad89d0 *** - FAILED! -executing: ../../test/particlefilter/run1.cmd... *** Error in `./particlefilter_float': free(): invalid next size (fast): 0x0000000001a7e890 *** - FAILED! --TESTING: pathfinder executing: ../../test/pathfinder/run0.cmd... PASSED! --TESTING: srad diff --git a/projects/hip/README.md b/projects/hip/README.md index 309951ddb8..9e52b7cd20 100644 --- a/projects/hip/README.md +++ b/projects/hip/README.md @@ -30,6 +30,7 @@ HIP code can be developed either on AMD HSA or Boltzmann platform using hcc comp * Install [hcc](https://bitbucket.org/multicoreware/hcc/wiki/Home) including supporting HSA kernel and runtime driver stack * By default HIP looks for hcc in /opt/hcc (can be overridden by setting HCC_HOME environment variable) * By default HIP looks for HSA in /opt/hsa (can be overridden by setting HSA_PATH environment variable) +* Ensure that ROCR runtime is installed and added to LD_LIBRARY_PATH #### NVIDIA (nvcc) * Install CUDA SDK from manufacturer website diff --git a/projects/hip/RELEASE.md b/projects/hip/RELEASE.md index 605c2ee69f..ae0a0d2b4e 100644 --- a/projects/hip/RELEASE.md +++ b/projects/hip/RELEASE.md @@ -6,10 +6,31 @@ We have attempted to document known bugs and limitations - in particular the [HI - Async memory copies. - hipStream support. - Multi-GPU -- Shared-scope atomic operations. (due to compiler limitation, shared-scope map atomics map to global scope) +- Shared-scope atomic operations. (due to compiler limitation, shared-scope map atomics map to global) - Tuning built-in functions, including shfl. - Performance optimization. Stay tuned - the work for many of these features is already in-flight. + +## Revision History: + +=================================================================================================== +Release:0.80.01 +Date: 2016.02.18 +- Improve reporting and support for device-side math functions. +- Update Runtime Documentation. +- Improve implementations of cross-lane operations (_ballot, _any, _all). +- Provide shuffle intrinsics (performance optimization in-progress). +- Support hipDeviceAttribute for querying "one-shot" device attributes, as an alternative to hipDeviceGetProperties. + + +=================================================================================================== +Release:0.80.00 : +Date: 2016.01.25 + +Initial release with GPUOpen Launch. + + + diff --git a/projects/hip/bin/findcode.sh b/projects/hip/bin/findcode.sh index 087e4873c0..a2334b3e2d 100755 --- a/projects/hip/bin/findcode.sh +++ b/projects/hip/bin/findcode.sh @@ -1,5 +1,5 @@ #!/bin/bash -SEARCH_DIR=$1 +SEARCH_DIRS=$@ -find $SEARCH_DIR -name '*.cpp' -o -name '*.h' -o -name '*.cu' -o -name '*.cuh' -o -name '*.c' -o -name '*.hpp' +find $SEARCH_DIRS -name '*.cpp' -o -name '*.h' -o -name '*.cu' -o -name '*.cuh' -o -name '*.c' -o -name '*.hpp' diff --git a/projects/hip/bin/hipcc b/projects/hip/bin/hipcc index d001c6febe..29163d7c02 100755 --- a/projects/hip/bin/hipcc +++ b/projects/hip/bin/hipcc @@ -19,6 +19,10 @@ use File::Basename; # HCC_HOME : Path to HCC SDK (default /opt/hcc). Used on AMD platforms only. # HSA_PATH : Path to HSA dir (default /opt/hsa). Used on AMD platforms only. +if(scalar @ARGV == 0){ +print "No Arguments passed, exiting ...\n"; +exit(-1); +} $verbose = $ENV{'HIPCC_VERBOSE'}; $verbose = 0 unless defined $verbose; @@ -71,6 +75,7 @@ if ($HIP_PLATFORM eq "hcc") { $HIPLDFLAGS .= " -L$HSA_PATH/lib -lhsa-runtime64 -lhc_am"; # Add C++ libs for GCC. $HIPLDFLAGS .= " -lstdc++"; + $HIPLDFLAGS .= " -lm"; if ($verbose & 0x2) { print ("HSA_PATH=$HSA_PATH\n"); @@ -163,7 +168,7 @@ if ($needHipHcc) { if ((not -e $object) or ((stat($source))[9] > (stat($object))[9])) { my $CMD = "$HCC $HCCFLAGS -I$HSA_PATH/include -I$HIP_PATH/include -Wall -c $source -o $object"; if ($verbose & 0x10) { - $CMD .= " -g" ; + $CMD .= " -g -O0" ; } else { $CMD .= " -O3" ; } diff --git a/projects/hip/bin/hipconfig b/projects/hip/bin/hipconfig index db53d6014e..b369a83e52 100755 --- a/projects/hip/bin/hipconfig +++ b/projects/hip/bin/hipconfig @@ -14,7 +14,7 @@ GetOptions( ,"compiler|c" => \$p_compiler ,"platform|P" => \$p_platform ,"cpp_config|cxx_config|C" => \$p_cpp_config - ,"full|f" => \$p_full, + ,"full|f|info" => \$p_full, ,"newline|n" => \$p_newline ); @@ -82,6 +82,7 @@ if ($p_full) { system("$HCC_HOME/bin/hcc-config --cxxflags"); print ("HCC-ldflags : "); system("$HCC_HOME/bin/hcc-config --ldflags"); + printf("\n"); } if ($HIP_PLATFORM eq "nvcc") { print "\n" ; @@ -93,11 +94,15 @@ if ($p_full) { print "\n" ; print "=== Environment Variables\n"; - system("env | egrep '^HIP|^HSA|^HCC|^CUDA'"); + system("echo PATH=\$PATH"); + system("env | egrep '^HIP|^HSA|^HCC|^CUDA|^LD_LIBRARY_PATH'"); + print "\n" ; print "== Linux Kernel\n"; system ("uname -a"); + + print "\n" ; $printed = 1; } diff --git a/projects/hip/bin/hipify b/projects/hip/bin/hipify index f4de89aab2..c49598f4ef 100755 --- a/projects/hip/bin/hipify +++ b/projects/hip/bin/hipify @@ -277,6 +277,8 @@ while (@ARGV) { $ft{'mem'} += s/\bcudaMemcpyKind\b/hipMemcpyKind/g; + $ft{'mem'} += s/\bcudaPointerAttributes\b/hipPointerAttribute_t/g; + #-------- # Memory management: @@ -364,6 +366,12 @@ while (@ARGV) { $ft{'err'} += s/\bcudaDevAttrMaxThreadsPerMultiProcessor\b/hipDeviceAttributeMaxThreadsPerMultiProcessor/g; $ft{'err'} += s/\bcudaDevAttrComputeCapabilityMajor\b/hipDeviceAttributeComputeCapabilityMajor/g; $ft{'err'} += s/\bcudaDevAttrComputeCapabilityMinor\b/hipDeviceAttributeComputeCapabilityMinor/g; + $ft{'err'} += s/\bcudaDevAttrConcurrentKernels\b/hipDeviceAttributeConcurrentKernels/g; + $ft{'err'} += s/\bcudaDevAttrPciBusId\b/hipDeviceAttributePciBusId/g; + $ft{'err'} += s/\bcudaDevAttrPciDeviceId\b/hipDeviceAttributePciDeviceId/g; + $ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerMultiprocessor\b/hipDeviceAttributeMaxSharedMemoryPerMultiprocessor/g; + $ft{'err'} += s/\bcudaDevAttrMemoryClockRate\b/hipDeviceAttributeMemoryClockRate/g; + $ft{'err'} += s/\bcudaDevAttrGlobalMemoryBusWidth\b/hipDeviceAttributeMemoryBusWidth/g; $ft{'dev'} += s/\bcudaDeviceAttr\b/hipDeviceAttribute_t/g; $ft{'dev'} += s/\bcudaDeviceGetAttribute\b/hipDeviceGetAttribute/g; diff --git a/projects/hip/docs/RuntimeAPI/html/Synchonization.html b/projects/hip/docs/RuntimeAPI/html/Synchonization.html index c21879b667..f60f0cc1e4 100644 --- a/projects/hip/docs/RuntimeAPI/html/Synchonization.html +++ b/projects/hip/docs/RuntimeAPI/html/Synchonization.html @@ -79,8 +79,8 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');

The following commands are "host-asynchronous" - meaning they do not wait for any preceding commands to complete, and may return control to the host thread before the requested operation completes:

"Host-synchronous" commands have the following properties:

@@ -109,7 +109,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/annotated.html b/projects/hip/docs/RuntimeAPI/html/annotated.html index 49ef4eac76..4a61698f75 100644 --- a/projects/hip/docs/RuntimeAPI/html/annotated.html +++ b/projects/hip/docs/RuntimeAPI/html/annotated.html @@ -104,7 +104,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/bug.html b/projects/hip/docs/RuntimeAPI/html/bug.html index bdc31de0f8..d9c64d7eb8 100644 --- a/projects/hip/docs/RuntimeAPI/html/bug.html +++ b/projects/hip/docs/RuntimeAPI/html/bug.html @@ -95,7 +95,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/classes.html b/projects/hip/docs/RuntimeAPI/html/classes.html index 6bd01f36c7..70491ef32f 100644 --- a/projects/hip/docs/RuntimeAPI/html/classes.html +++ b/projects/hip/docs/RuntimeAPI/html/classes.html @@ -105,7 +105,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html b/projects/hip/docs/RuntimeAPI/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html index fd08fac040..7d6a9d0e72 100644 --- a/projects/hip/docs/RuntimeAPI/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +++ b/projects/hip/docs/RuntimeAPI/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/src Directory Reference +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/src Directory Reference @@ -92,7 +92,7 @@ Files diff --git a/projects/hip/docs/RuntimeAPI/html/dir_6d8604cb65fa6b83549668eb0ce09cac.html b/projects/hip/docs/RuntimeAPI/html/dir_6d8604cb65fa6b83549668eb0ce09cac.html index 7d2d877c9b..9fb345b393 100644 --- a/projects/hip/docs/RuntimeAPI/html/dir_6d8604cb65fa6b83549668eb0ce09cac.html +++ b/projects/hip/docs/RuntimeAPI/html/dir_6d8604cb65fa6b83549668eb0ce09cac.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hcc_detail Directory Reference +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail Directory Reference @@ -87,20 +87,25 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');

Files

file  hip_runtime.h [code] + Contains definitions of APIs for HIP runtime.
  -file  hip_runtime_api.h [code] +file  hip_runtime_api.h [code] + Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h.
  -file  hip_texture.h [code] +file  hip_texture.h [code] + HIP C++ Texture API for hcc compiler.
  -file  hip_vector_types.h [code] +file  hip_vector_types.h [code] + Defines the different newt vector types for HIP runtime.
  -file  host_defines.h [code] +file  host_defines.h [code] + TODO-doc.
  diff --git a/projects/hip/docs/RuntimeAPI/html/dir_d44c64559bbebec7f509842c48db8b23.html b/projects/hip/docs/RuntimeAPI/html/dir_d44c64559bbebec7f509842c48db8b23.html index 1de6328c9c..34ef4a1333 100644 --- a/projects/hip/docs/RuntimeAPI/html/dir_d44c64559bbebec7f509842c48db8b23.html +++ b/projects/hip/docs/RuntimeAPI/html/dir_d44c64559bbebec7f509842c48db8b23.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include Directory Reference +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include Directory Reference @@ -103,7 +103,7 @@ Files diff --git a/projects/hip/docs/RuntimeAPI/html/files.html b/projects/hip/docs/RuntimeAPI/html/files.html index 06f026ba5f..bbb7037a0e 100644 --- a/projects/hip/docs/RuntimeAPI/html/files.html +++ b/projects/hip/docs/RuntimeAPI/html/files.html @@ -89,11 +89,11 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
[detail level 123]
- - - - - + + + + + @@ -105,7 +105,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/functions.html b/projects/hip/docs/RuntimeAPI/html/functions.html index 5f2b3f24eb..ce1f2c7bf7 100644 --- a/projects/hip/docs/RuntimeAPI/html/functions.html +++ b/projects/hip/docs/RuntimeAPI/html/functions.html @@ -81,6 +81,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
  • l
  • m
  • n
  • +
  • p
  • r
  • s
  • t
  • @@ -125,6 +126,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
  • computeMode : hipDeviceProp_t
  • +
  • concurrentKernels +: hipDeviceProp_t +
  • @@ -197,6 +201,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
  • maxGridSize : hipDeviceProp_t
  • +
  • maxSharedMemoryPerMultiProcessor +: hipDeviceProp_t +
  • maxThreadsDim : hipDeviceProp_t
  • @@ -206,6 +213,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
  • maxThreadsPerMultiProcessor : hipDeviceProp_t
  • +
  • memoryClockRate +: hipDeviceProp_t +
  • minor : hipDeviceProp_t
  • @@ -222,6 +232,16 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); +

    - p -

    + +

    - r -

    @@ -197,6 +201,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
  • maxGridSize : hipDeviceProp_t
  • +
  • maxSharedMemoryPerMultiProcessor +: hipDeviceProp_t +
  • maxThreadsDim : hipDeviceProp_t
  • @@ -206,6 +213,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
  • maxThreadsPerMultiProcessor : hipDeviceProp_t
  • +
  • memoryClockRate +: hipDeviceProp_t +
  • minor : hipDeviceProp_t
  • @@ -222,6 +232,16 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); +

    - p -

    + +

    - r -

    @@ -96,141 +102,298 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    Here is a list of all documented file members with links to the documentation:
    +

    - _ -

    + + +

    - d -

    + +

    - h -

    + + +

    - o -

    diff --git a/projects/hip/docs/RuntimeAPI/html/globals_defs.html b/projects/hip/docs/RuntimeAPI/html/globals_defs.html index 35a768bc8c..1d2aada594 100644 --- a/projects/hip/docs/RuntimeAPI/html/globals_defs.html +++ b/projects/hip/docs/RuntimeAPI/html/globals_defs.html @@ -70,6 +70,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
  • All
  • Functions
  • Variables
  • +
  • Typedefs
  • +
  • Enumerations
  • +
  • Enumerator
  • Macros
  • @@ -90,14 +93,38 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
     
    diff --git a/projects/hip/docs/RuntimeAPI/html/globals_enum.html b/projects/hip/docs/RuntimeAPI/html/globals_enum.html new file mode 100644 index 0000000000..76cd3b9e6f --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/globals_enum.html @@ -0,0 +1,120 @@ + + + + + + +HIP: Heterogenous-computing Interface for Portability: File Members + + + + + + + + + +
    +
    +
    o-include
    |o-hcc_detail
    ||o*hip_runtime.h
    ||o*hip_runtime_api.h
    ||o*hip_texture.h
    ||o*hip_vector_types.h
    ||\*host_defines.h
    ||o*hip_runtime.hContains definitions of APIs for HIP runtime
    ||o*hip_runtime_api.hContains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h
    ||o*hip_texture.hHIP C++ Texture API for hcc compiler
    ||o*hip_vector_types.hDefines the different newt vector types for HIP runtime
    ||\*host_defines.hTODO-doc
    |o*hip_common.h
    |o*hip_runtime.h
    |o*hip_runtime_api.h
    + + + + + +
    +
    HIP: Heterogenous-computing Interface for Portability +
    +
    + + + + + + + + + +
    + All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
    + + +
    + +
    + +
    +
    + + + + diff --git a/projects/hip/docs/RuntimeAPI/html/globals_eval.html b/projects/hip/docs/RuntimeAPI/html/globals_eval.html new file mode 100644 index 0000000000..5e5e2c4e57 --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/globals_eval.html @@ -0,0 +1,147 @@ + + + + + + +HIP: Heterogenous-computing Interface for Portability: File Members + + + + + + + + + +
    +
    + + + + + + +
    +
    HIP: Heterogenous-computing Interface for Portability +
    +
    +
    + + + + + + +
    + +
    + All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
    + + +
    + +
    + +
    +
    + + + + diff --git a/projects/hip/docs/RuntimeAPI/html/globals_func.html b/projects/hip/docs/RuntimeAPI/html/globals_func.html index dff1f0b16e..17fe7c5f66 100644 --- a/projects/hip/docs/RuntimeAPI/html/globals_func.html +++ b/projects/hip/docs/RuntimeAPI/html/globals_func.html @@ -70,6 +70,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
  • All
  • Functions
  • Variables
  • +
  • Typedefs
  • +
  • Enumerations
  • +
  • Enumerator
  • Macros
  • @@ -98,124 +101,185 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');

    - h -

    diff --git a/projects/hip/docs/RuntimeAPI/html/globals_type.html b/projects/hip/docs/RuntimeAPI/html/globals_type.html new file mode 100644 index 0000000000..2c2504da8c --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/globals_type.html @@ -0,0 +1,114 @@ + + + + + + +HIP: Heterogenous-computing Interface for Portability: File Members + + + + + + + + + +
    +
    + + + + + + +
    +
    HIP: Heterogenous-computing Interface for Portability +
    +
    +
    + + + + + + +
    + +
    + All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
    + + +
    + +
    + +
    +
    + + + + diff --git a/projects/hip/docs/RuntimeAPI/html/globals_vars.html b/projects/hip/docs/RuntimeAPI/html/globals_vars.html index 8117f6041c..eb20781be3 100644 --- a/projects/hip/docs/RuntimeAPI/html/globals_vars.html +++ b/projects/hip/docs/RuntimeAPI/html/globals_vars.html @@ -70,6 +70,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
  • All
  • Functions
  • Variables
  • +
  • Typedefs
  • +
  • Enumerations
  • +
  • Enumerator
  • Macros
  • @@ -106,7 +109,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/group__API.html b/projects/hip/docs/RuntimeAPI/html/group__API.html index ad2ed4ea23..895c153259 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__API.html +++ b/projects/hip/docs/RuntimeAPI/html/group__API.html @@ -110,7 +110,7 @@ Modules diff --git a/projects/hip/docs/RuntimeAPI/html/group__Device.html b/projects/hip/docs/RuntimeAPI/html/group__Device.html index c5b4996e69..5f42bdb840 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__Device.html +++ b/projects/hip/docs/RuntimeAPI/html/group__Device.html @@ -99,6 +99,9 @@ Functions hipError_t hipGetDeviceCount (int *count)  Return number of compute-capable devices. More...
      +hipError_t hipDeviceGetAttribute (int *pi, hipDeviceAttribute_t attr, int device) + Query device attribute. More...
    hipError_t hipDeviceGetProperties (hipDeviceProp_t *prop, int device)  Returns device properties. More...
      @@ -121,6 +124,48 @@ Functions

    Detailed Description

    ----------------------------------------------------------------------------------------------—

    Function Documentation

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    hipError_t hipDeviceGetAttribute (int * pi,
    hipDeviceAttribute_t attr,
    int device 
    )
    +
    + +

    Query device attribute.

    +
    Parameters
    + + + + +
    [out]pipointer to value to return
    [in]attrattribute to query
    [in]devicewhich device to query for information
    +
    +
    + +
    +
    @@ -136,7 +181,7 @@ Functions

    Set Cache configuration for a specific function.

    -

    Note: AMD devices and recent NVIDIA GPUS do not support reconfigurable cache. This hint is ignored on those architectures.

    +

    Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.

    Returns
    hipSuccess
    @@ -197,7 +242,7 @@ Functions

    Get Shared memory bank configuration.

    -

    Note: AMD devices and recent NVIDIA GPUS do not support shared cache banking, and the hint is ignored on those architectures.

    +

    Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.

    Returns
    hipSuccess
    @@ -240,7 +285,7 @@ Functions

    Set L1/Shared cache partition.

    -

    Note: AMD devices and recent NVIDIA GPUS do not support reconfigurable cache. This hint is ignored on those architectures.

    +

    Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.

    Returns
    hipSuccess
    @@ -260,7 +305,7 @@ Functions

    Set Shared memory bank configuration.

    -

    Note: AMD devices and recent NVIDIA GPUS do not support shared cache banking, and the hint is ignored on those architectures.

    +

    Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.

    Returns
    hipSuccess
    @@ -301,7 +346,7 @@ Functions

    Set Cache configuration for a specific function.

    -

    Note: AMD devices and recent NVIDIA GPUS do not support reconfigurable cache. This hint is ignored on those architectures.

    +

    Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.

    Returns
    hipSuccess
    @@ -354,8 +399,8 @@ Functions -

    Returns in *count the number of devices that have ability to run compute commands. If there are no such devices, then hipGetDeviceCount will return hipErrorNoDevice. If 1 or more devices can be found, then hipGetDeviceCount returns hipSuccess.

    -
    Returns
    hipSuccess, hipErrorNoDevice
    +

    Returns in *count the number of devices that have ability to run compute commands. If there are no such devices, then hipGetDeviceCount will return hipErrorNoDevice. If 1 or more devices can be found, then hipGetDeviceCount returns hipSuccess.

    +
    Returns
    hipSuccess, hipErrorNoDevice
    @@ -397,7 +442,7 @@ Functions diff --git a/projects/hip/docs/RuntimeAPI/html/group__Error.html b/projects/hip/docs/RuntimeAPI/html/group__Error.html index cea57daed9..f6aeb04eb5 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__Error.html +++ b/projects/hip/docs/RuntimeAPI/html/group__Error.html @@ -197,7 +197,7 @@ Functions diff --git a/projects/hip/docs/RuntimeAPI/html/group__Event.html b/projects/hip/docs/RuntimeAPI/html/group__Event.html index 07c053b451..3b54300dd3 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__Event.html +++ b/projects/hip/docs/RuntimeAPI/html/group__Event.html @@ -209,11 +209,11 @@ Functions -
    Returns
    : hipSuccess, hipErrorInvalidResourceHandle, hipErrorNotReady,
    +
    Returns
    : hipSuccess, hipErrorInvalidResourceHandle, hipErrorNotReady,

    Computes the elapsed time between two events. Time is computed in ms, with a resolution of approximately 1 us.

    Events which are recorded in a NULL stream will block until all commands on all other streams complete execution, and then record the timestamp.

    Events which are recorded in a non-NULL stream will record their timestamp when they reach the head of the specified stream, after all previous commands in that stream have completed executing. Thus the time that the event recorded may be significantly after the host calls hipEventRecord.

    -

    If hipEventRecord has not been called on either event, then hipErrorInvalidResourceHandle is returned. If hipEventRecord has been called on both events, but the timestamp has not yet been recorded on one or both events (that is, hipEventQuery would return hipErrorNotReady on at least one of the events), then hipErrorNotReady is returned.

    +

    If hipEventRecord has not been called on either event, then hipErrorInvalidResourceHandle is returned. If hipEventRecord has been called on both events, but the timestamp has not yet been recorded on one or both events (that is, hipEventQuery would return hipErrorNotReady on at least one of the events), then hipErrorNotReady is returned.

    @@ -239,7 +239,7 @@ Functions
    Returns
    hipSuccess, hipEventNotReady
    -

    Query the status of the specified event. This function will return hipErrorNotReady if all commands in the appropriate stream (specified to hipEventRecord) have completed. If that work has not completed, or if hipEventRecord was not called on the event, then cudaSuccess is returned.

    +

    Query the status of the specified event. This function will return hipErrorNotReady if all commands in the appropriate stream (specified to hipEventRecord) have completed. If that work has not completed, or if hipEventRecord was not called on the event, then hipSuccess is returned.

    @@ -315,7 +315,7 @@ Functions diff --git a/projects/hip/docs/RuntimeAPI/html/group__GlobalDefs.html b/projects/hip/docs/RuntimeAPI/html/group__GlobalDefs.html index 7ec1058aed..53d57a58c1 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__GlobalDefs.html +++ b/projects/hip/docs/RuntimeAPI/html/group__GlobalDefs.html @@ -119,6 +119,9 @@ Typedefs typedef enum hipError_t hipError_t   + +typedef enum hipDeviceAttribute_t hipDeviceAttribute_t +  typedef enum hipFuncCache hipFuncCache   typedef enum hipSharedMemConfig hipSharedMemConfig @@ -146,6 +149,38 @@ Enumerations
    }   +enum  hipDeviceAttribute_t {
    +  hipDeviceAttributeMaxThreadsPerBlock, +hipDeviceAttributeMaxBlockDimX, +hipDeviceAttributeMaxBlockDimY, +hipDeviceAttributeMaxBlockDimZ, +
    +  hipDeviceAttributeMaxGridDimX, +hipDeviceAttributeMaxGridDimY, +hipDeviceAttributeMaxGridDimZ, +hipDeviceAttributeMaxSharedMemoryPerBlock, +
    +  hipDeviceAttributeTotalConstantMemory, +hipDeviceAttributeWarpSize, +hipDeviceAttributeMaxRegistersPerBlock, +hipDeviceAttributeClockRate, +
    +  hipDeviceAttributeMemoryClockRate, +hipDeviceAttributeMultiprocessorCount, +hipDeviceAttributeComputeMode, +hipDeviceAttributeL2CacheSize, +
    +  hipDeviceAttributeMaxThreadsPerMultiProcessor, +hipDeviceAttributeComputeCapabilityMajor, +hipDeviceAttributeComputeCapabilityMinor, +hipDeviceAttributeConcurrentKernels, +
    +  hipDeviceAttributePciBusId, +hipDeviceAttributePciDeviceId, +hipDeviceAttributeMaxSharedMemoryPerMultiprocessor +
    + } +  enum  hipFuncCache { hipFuncCachePreferNone, hipFuncCachePreferShared, hipFuncCachePreferL1, @@ -238,7 +273,7 @@ Enumerations
    -
    Warning
    On AMD devices and recent NVIDIA devices, these hints and controls are ignored.
    +
    Warning
    On AMD devices and recent Nvidia devices, these hints and controls are ignored.
    @@ -251,11 +286,94 @@ Enumerations
    -
    Warning
    On AMD devices and recent NVIDIA devices, these hints and controls are ignored.
    +
    Warning
    On AMD devices and recent Nvidia devices, these hints and controls are ignored.

    Enumeration Type Documentation

    + +
    +
    + + + + +
    enum hipDeviceAttribute_t
    +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    Enumerator
    hipDeviceAttributeMaxThreadsPerBlock  +

    Maximum number of threads per block.

    +
    hipDeviceAttributeMaxBlockDimX  +

    Maximum x-dimension of a block.

    +
    hipDeviceAttributeMaxBlockDimY  +

    Maximum y-dimension of a block.

    +
    hipDeviceAttributeMaxBlockDimZ  +

    Maximum z-dimension of a block.

    +
    hipDeviceAttributeMaxGridDimX  +

    Maximum x-dimension of a grid.

    +
    hipDeviceAttributeMaxGridDimY  +

    Maximum y-dimension of a grid.

    +
    hipDeviceAttributeMaxGridDimZ  +

    Maximum z-dimension of a grid.

    +
    hipDeviceAttributeMaxSharedMemoryPerBlock  +

    Maximum shared memory available per block in bytes.

    +
    hipDeviceAttributeTotalConstantMemory  +

    Constant memory size in bytes.

    +
    hipDeviceAttributeWarpSize  +

    Warp size in threads.

    +
    hipDeviceAttributeMaxRegistersPerBlock  +

    Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor.

    +
    hipDeviceAttributeClockRate  +

    Peak clock frequency in kilohertz.

    +
    hipDeviceAttributeMemoryClockRate  +

    Peak memory clock frequency in kilohertz.

    +
    hipDeviceAttributeMultiprocessorCount  +

    Number of multiprocessors on the device.

    +
    hipDeviceAttributeComputeMode  +

    Compute mode that device is currently in.

    +
    hipDeviceAttributeL2CacheSize  +

    Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.

    +
    hipDeviceAttributeMaxThreadsPerMultiProcessor  +

    Maximum resident threads per multiprocessor.

    +
    hipDeviceAttributeComputeCapabilityMajor  +

    Major compute capability version number.

    +
    hipDeviceAttributeComputeCapabilityMinor  +

    Minor compute capability version number.

    +
    hipDeviceAttributeConcurrentKernels  +

    Device can possibly execute multiple kernels concurrently.

    +
    hipDeviceAttributePciBusId  +

    PCI Bus ID.

    +
    hipDeviceAttributePciDeviceId  +

    PCI Device ID.

    +
    hipDeviceAttributeMaxSharedMemoryPerMultiprocessor  +

    Maximum Shared Memory Per Multiprocessor.

    +
    + +
    +
    @@ -282,7 +400,7 @@ Enumerations

    Out of resources error.

    hipErrorInvalidValue  -

    One or more of the paramters passed to the API call is NULL or not in an acceptable range.

    +

    One or more of the parameters passed to the API call is NULL or not in an acceptable range.

    hipErrorInvalidResourceHandle 

    Resource handle (hipEvent_t or hipStream_t) invalid.

    @@ -291,10 +409,10 @@ Enumerations

    DeviceID must be in range 0...#compute-devices.

    hipErrorNoDevice  -

    Call to cudaGetDeviceCount returned 0 devices.

    +

    Call to hipGetDeviceCount returned 0 devices.

    hipErrorNotReady  -

    indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion). APIs that return this error include hipEventQuery and hipStreamQuery.

    +

    Indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion). APIs that return this error include hipEventQuery and hipStreamQuery.

    hipErrorUnknown 

    Unknown error.

    @@ -315,7 +433,7 @@ Enumerations
    -
    Warning
    On AMD devices and recent NVIDIA devices, these hints and controls are ignored.
    +
    Warning
    On AMD devices and recent Nvidia devices, these hints and controls are ignored.
    Enumerator
    hipFuncCachePreferNone 

    no preference for shared memory or L1 (default)

    @@ -372,7 +490,7 @@ Enumerations
    -
    Warning
    On AMD devices and recent NVIDIA devices, these hints and controls are ignored.
    +
    Warning
    On AMD devices and recent Nvidia devices, these hints and controls are ignored.
    diff --git a/projects/hip/docs/RuntimeAPI/html/group__HCC__Specific.html b/projects/hip/docs/RuntimeAPI/html/group__HCC__Specific.html index dda77995f2..10eb0d5298 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__HCC__Specific.html +++ b/projects/hip/docs/RuntimeAPI/html/group__HCC__Specific.html @@ -85,10 +85,10 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); - + - +
    Enumerator
    hipSharedMemBankSizeDefault 

    The compiler selects a device-specific value for the banking.

    @@ -390,7 +508,7 @@ Enumerations

    Functions

    hipError_t hipHccGetAccelerator (int deviceId, hc::accelerator *acc)
     Return hc::acclerator associated with the specified deviceId. More...
     Return hc::accelerator associated with the specified deviceId. More...
     
    hipError_t hipHccGetAcceleratorView (hipStream_t stream, hc::accelerator_view **av)
     Return hc::acclerator_view associated with the specified stream. More...
     Return hc::accelerator_view associated with the specified stream. More...
     

    Detailed Description

    @@ -123,7 +123,7 @@ Functions
    -

    Return hc::acclerator associated with the specified deviceId.

    +

    Return hc::accelerator associated with the specified deviceId.

    Returns
    hipSuccess, hipErrorInvalidDevice
    @@ -152,7 +152,7 @@ Functions
    -

    Return hc::acclerator_view associated with the specified stream.

    +

    Return hc::accelerator_view associated with the specified stream.

    Returns
    hipSuccess
    @@ -160,7 +160,7 @@ Functions diff --git a/projects/hip/docs/RuntimeAPI/html/group__HIP-ENV.html b/projects/hip/docs/RuntimeAPI/html/group__HIP-ENV.html index 49583a5127..b4f0537b8b 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__HIP-ENV.html +++ b/projects/hip/docs/RuntimeAPI/html/group__HIP-ENV.html @@ -101,7 +101,7 @@ int  diff --git a/projects/hip/docs/RuntimeAPI/html/group__Memory.html b/projects/hip/docs/RuntimeAPI/html/group__Memory.html index f2d06c6a49..362fb6a8d8 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__Memory.html +++ b/projects/hip/docs/RuntimeAPI/html/group__Memory.html @@ -85,25 +85,31 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');

    Functions

    hipError_t hipMalloc (void **ptr, size_t size) + Allocate memory on the default accelerator. More...
      hipError_t hipMallocHost (void **ptr, size_t size) + Allocate pinned host memory. More...
      hipError_t hipFree (void *ptr) + Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSynchronize() call. More...
      hipError_t hipFreeHost (void *ptr) + Free memory allocated by the hcc hip host memory allocation API. More...
      hipError_t hipMemcpy (void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind) + Copy data from src to dst. More...
      - -hipError_t hipMemcpyToSymbol (const char *symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind) +hipError_t hipMemcpyToSymbol (const char *symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind) + Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset bytes from the start of symbol symbol. More...
      hipError_t hipMemcpyAsync (void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0) + Copy data from src to dst asynchronously. More...
      - -hipError_t hipMemset (void *dst, int value, size_t sizeBytes) +hipError_t hipMemset (void *dst, int value, size_t sizeBytes) + Copy data from src to dst asynchronously. More...
      - -hipError_t hipMemsetAsync (void *dst, int value, size_t sizeBytes, hipStream_t=0) +hipError_t hipMemsetAsync (void *dst, int value, size_t sizeBytes, hipStream_t=0) + Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value value. More...
      hipError_t hipMemGetInfo (size_t *free, size_t *total) @@ -131,14 +137,15 @@ Functions
    -

    Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSynchronize() call.

    + +

    Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSynchronize() call.

    Parameters
    [in]ptrPointer to memory to be freed
    -
    Returns
    Error code
    +
    Returns
    hipSuccess, hipErrorMemoryFree
    @@ -155,14 +162,15 @@ Functions
    -

    Free memory allocated by the hcc hip host memory allocation API

    + +

    Free memory allocated by the hcc hip host memory allocation API.

    Parameters
    [in]ptrPointer to memory to be freed
    -
    Returns
    Error code
    +
    Returns
    hipSuccess, hipErrorMemoryFree
    @@ -189,7 +197,8 @@ Functions
    -

    Allocate memory on the default accelerator

    + +

    Allocate memory on the default accelerator.

    Parameters
    @@ -224,11 +233,12 @@ Functions
    [out]ptrPointer to the allocated memory
    -

    Allocate pinned host memory

    + +

    Allocate pinned host memory.

    Parameters
    - - + +
    [in]ptrPointer to the allocated host pinned memory
    [out]sizeRequested memory size
    [out]ptrPointer to the allocated host pinned memory
    [in]sizeRequested memory size
    @@ -271,18 +281,19 @@ Functions
    -

    Copy data from src to dst. It supports memory from host to device, device to host, device to device and host to host The src and dst must not overlap. If the

    -

    This function is host-synchronous for most inputs. It uses the default NULL stream and will synchronize with other blocking streams on the same device.

    + +

    Copy data from src to dst.

    +

    It supports memory from host to device, device to host, device to device and host to host The src and dst must not overlap. TODO: cudaErrorInvalidMemcpyDirection error code is not supported right now, use hipErrorUnknown for now

    Parameters
    - +
    being copy to
    [out]dstData being copy to
    [in]srcData being copy from
    [in]sizeBytesData size in bytes
    [in]copyTypeMemory copy type
    -
    Returns
    Error code
    +
    Returns
    hipSuccess, hipErrorInvalidValue, hipErrorMemoryFree, hipErrorUnknown
    @@ -327,7 +338,9 @@ Functions
    -

    Copy data from src to dst asynchronously. It supports memory from host to device, device to host, device to device and host to host.

    + +

    Copy data from src to dst asynchronously.

    +

    TODO: cudaErrorInvalidMemcpyDirection error code is not supported right now, use hipErrorUnknown for now

    Parameters
    @@ -337,14 +350,168 @@ Functions
    [out]dstData being copy to
    -
    Returns
    Error code
    +
    Returns
    hipSuccess, hipErrorInvalidValue, hipErrorMemoryFree, hipErrorUnknown
    + +
    + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    hipError_t hipMemcpyToSymbol (const char * symbolName,
    const void * src,
    size_t sizeBytes,
    size_t offset,
    hipMemcpyKind kind 
    )
    +
    + +

    Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset bytes from the start of symbol symbol.

    +

    The memory areas may not overlap. Symbol can either be a variable that resides in global or constant memory space, or it can be a character string, naming a variable that resides in global or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use hipErrorUnknown for now.

    +
    Parameters
    + + + + + + +
    [in]symbolName- Symbol destination on device
    [in]src- Data being copy from
    [in]sizeBytes- Data size in bytes
    [in]offset- Offset from start of symbol in bytes
    [in]kind- Type of transfer
    +
    +
    +
    Returns
    hipSuccess, hipErrorInvalidValue, hipErrorMemoryFree, hipErrorUnknown
    + +
    +
    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    hipError_t hipMemset (void * dst,
    int value,
    size_t sizeBytes 
    )
    +
    + +

    Copy data from src to dst asynchronously.

    +

    It supports memory from host to device, device to host, device to device and host to host.

    +
    Parameters
    + + + + + +
    [out]dstData being copy to
    [in]srcData being copy from
    [in]sizeBytesData size in bytes
    [in]accelerator_viewAccelerator view which the copy is being enqueued
    +
    +
    +
    Returns
    hipSuccess, hipErrorInvalidValue, hipErrorMemoryFree
    + +
    +
    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    hipError_t hipMemsetAsync (void * dst,
    int value,
    size_t sizeBytes,
    hipStream_t  = 0 
    )
    +
    + +

    Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value value.

    +

    hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the memset is complete. The operation can optionally be associated to a stream by passing a non-zero stream argument. If stream is non-zero, the operation may overlap with operations in other streams.

    +
    Parameters
    + + + + + +
    [out]dstPointer to device memory
    [in]value- Value to set for each byte of specified memory
    [in]sizeBytes- Size in bytes to set
    [in]stream- Stream identifier
    +
    +
    +
    Returns
    hipSuccess, hipErrorInvalidValue, hipErrorMemoryFree
    diff --git a/projects/hip/docs/RuntimeAPI/html/group__PeerToPeer.html b/projects/hip/docs/RuntimeAPI/html/group__PeerToPeer.html index 9b2174c2bd..c9417b25c0 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__PeerToPeer.html +++ b/projects/hip/docs/RuntimeAPI/html/group__PeerToPeer.html @@ -88,13 +88,16 @@ Functions  Determine if a device can access a peer's memory. More...
      hipError_t hipDeviceDisablePeerAccess (int peerDevice) + Disables registering memory on peerDevice for direct access from the current device. More...
      hipError_t hipDeviceEnablePeerAccess (int peerDevice, unsigned int flags) + Enables registering memory on peerDevice for direct access from the current device. More...
      - -hipError_t hipMemcpyPeer (void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes) +hipError_t hipMemcpyPeer (void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes) + Copies memory from one device to memory on another device. More...
      hipError_t hipMemcpyPeerAsync (void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream=0) + Copies memory from one device to memory on another device. More...
     

    Detailed Description

    @@ -135,7 +138,7 @@ Functions - +
    [out]canAccessPeerreturns true if specified devices are peers.
    [in]device
    [in]peerDeviceReturns "1" in canAccessPeer if the specified device is capable of directly accessing memory phyically located on peerDevice , or "0" if not.
    [in]peerDeviceReturns "1" in canAccessPeer if the specified device is capable of directly accessing memory physically located on peerDevice , or "0" if not.
    @@ -156,6 +159,15 @@ Functions
    + +

    Disables registering memory on peerDevice for direct access from the current device.

    +

    If there are any allocations on peerDevice which were registered in the current device using hipPeerRegister() then these allocations will be automatically unregistered. Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been enabled from the current device.

    +
    Parameters
    + + +
    [in]peerDeviceTODO:cudaErrorPeerAccessNotEnabled and cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown Returns hipSuccess, hipErrorUnknown
    +
    +
    Warning
    Need to update this function when RT supports P2P
    @@ -183,8 +195,74 @@ Functions
    + +

    Enables registering memory on peerDevice for direct access from the current device.

    +
    Parameters
    + + + +
    [in]peerDevice
    [in]flagsTODO:cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown Returns hipSuccess, hipErrorInvalidDevice, hipErrorInvalidValue, hipErrorUnknown
    +
    +
    Warning
    Need to update this function when RT supports P2P
    +
    + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    hipError_t hipMemcpyPeer (void * dst,
    int dstDevice,
    const void * src,
    int srcDevice,
    size_t sizeBytes 
    )
    +
    + +

    Copies memory from one device to memory on another device.

    +
    Parameters
    + + + + + + +
    [out]dst- Destination device pointer.
    [in]dstDevice- Destination device
    [in]src- Source device pointer
    [in]srcDevice- Source device
    [in]sizeBytes- Size of memory copy in bytes
    +
    +
    +

    Returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidDevice

    +
    @@ -234,6 +312,20 @@ Functions
    + +

    Copies memory from one device to memory on another device.

    +
    Parameters
    + + + + + + + +
    [out]dst- Destination device pointer.
    [in]dstDevice- Destination device
    [in]src- Source device pointer
    [in]srcDevice- Source device
    [in]sizeBytes- Size of memory copy in bytes
    [in]stream- Stream identifier
    +
    +
    +

    Returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidDevice

    Bug:
    This function uses a synchronous copy
    @@ -241,7 +333,7 @@ Functions diff --git a/projects/hip/docs/RuntimeAPI/html/group__Profiler.html b/projects/hip/docs/RuntimeAPI/html/group__Profiler.html index 354d17569d..30e28d3ac5 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__Profiler.html +++ b/projects/hip/docs/RuntimeAPI/html/group__Profiler.html @@ -80,12 +80,12 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');

    ----------------------------------------------------------------------------------------------—

    -

    The cudaProfilerInitialize API format for "configFile" is not supported.

    +
    Warning
    The cudaProfilerInitialize API format for "configFile" is not supported.

    On AMD platforms, hipProfilerStart and hipProfilerStop require installation of AMD's GPU perf counter API and defining GPU_PERF

    diff --git a/projects/hip/docs/RuntimeAPI/html/group__Stream.html b/projects/hip/docs/RuntimeAPI/html/group__Stream.html index 6f2efc9edc..7f54af7b00 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__Stream.html +++ b/projects/hip/docs/RuntimeAPI/html/group__Stream.html @@ -140,7 +140,7 @@ Functions -
    Returns
    hipSuccess, hipErrorInvalidValue
    +
    Returns
    hipSuccess, hipErrorInvalidValue

    Create a new asynchronous stream. Flags controls behavior of the stream. See hipStreamDefault, hipStreamNonBlocking. hipStream_t are under development - with current HIP use the NULL stream.

    @@ -206,7 +206,7 @@ Functions -
    Returns
    hipSuccess, hipErrorInvalidValue, hipErrorInvalidResourceHandle
    +
    Returns
    hipSuccess, hipErrorInvalidValue, hipErrorInvalidResourceHandle

    Return flags associated with this stream in *flags.

    See Also
    hipStreamCreateWithFlags
    Returns
    hipSuccess
    @@ -283,7 +283,7 @@ Functions diff --git a/projects/hip/docs/RuntimeAPI/html/group__Texture.html b/projects/hip/docs/RuntimeAPI/html/group__Texture.html index a19d108f0a..97808148e7 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__Texture.html +++ b/projects/hip/docs/RuntimeAPI/html/group__Texture.html @@ -121,7 +121,7 @@ template<class T , int dim, enum hipTextureReadMode readMode> diff --git a/projects/hip/docs/RuntimeAPI/html/group__Version.html b/projects/hip/docs/RuntimeAPI/html/group__Version.html index f5cb9f0063..21f2a17ee4 100644 --- a/projects/hip/docs/RuntimeAPI/html/group__Version.html +++ b/projects/hip/docs/RuntimeAPI/html/group__Version.html @@ -85,7 +85,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');

    Functions

    hipError_t hipDriverGetVersion (int *driverVersion) - Returns the approximate HIP driver versin. More...
    + Returns the approximate HIP driver version. More...
     

    Detailed Description

    @@ -105,8 +105,8 @@ Functions
    -

    Returns the approximate HIP driver versin.

    -
    Warning
    The HIP feature set does not correpond to an exact CUDA SDK driver revision. This function always set *driverVersion to 4 as an approximation though HIP supports some features which were introduced in later CUDA SDK revisions. HIP apps code should not rely on the driver revision number here and should use arch feature flags to test device capabiliies or conditional compilation.
    +

    Returns the approximate HIP driver version.

    +
    Warning
    The HIP feature set does not correspond to an exact CUDA SDK driver revision. This function always set *driverVersion to 4 as an approximation though HIP supports some features which were introduced in later CUDA SDK revisions. HIP apps code should not rely on the driver revision number here and should use arch feature flags to test device capabilities or conditional compilation.
    Returns
    hipSuccess
    @@ -114,7 +114,7 @@ Functions diff --git a/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime_8h.html b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime_8h.html index 2b02e80742..984fe07ceb 100644 --- a/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime_8h.html +++ b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime_8h.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hcc_detail/hip_runtime.h File Reference +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail/hip_runtime.h File Reference @@ -93,15 +93,19 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    hip_runtime.h File Reference
    + +

    Contains definitions of APIs for HIP runtime. +More...

    #include <cstring>
    #include <cmath>
    #include <string.h>
    #include <stddef.h>
    -#include <hip_runtime_api.h>
    +#include <hip_runtime_api.h>
    #include <hc.hpp>
    #include <grid_launch.h>
    -#include <hcc_detail/hip_texture.h>
    -#include <hcc_detail/host_defines.h>
    +#include <hcc_detail/hip_texture.h>
    +#include <hcc_detail/host_defines.h>
    +#include <hc_math.hpp>

    Go to the source code of this file.

    @@ -113,9 +117,6 @@ Macros - - @@ -224,12 +225,6 @@ __device__ unsigned int  - - - - @@ -290,6 +285,12 @@ __device__ unsigned int  + + + + @@ -305,9 +306,95 @@ __device__ int  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    #define hipLaunchParm   grid_launch_parm
     
    -#define __HIP_DEVICE_COMPILE__   0
     
    #define __launch_bounds__(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor)
     
    at
    __device__ unsigned long long int atomicMax (unsigned long long int *address, unsigned long long int val)
     
    -__device__ unsigned int atomicInc (unsigned int *address)
     
    -__device__ unsigned int atomicDec (unsigned int *address)
     
    __device__ int atomicCAS (int *address, int compare, int val)
     
    __
    __device__ unsigned int __ffsll (unsigned long long int input)
     
    +__device__ unsigned int __ffs (int input)
     
    +__device__ unsigned int __ffsll (long long int input)
     
    __device__ unsigned int __brev (unsigned int input)
     
    __any (
    __device__ unsigned long long int __ballot (int input)
     
    +__device__ int __shfl (int input, int lane, int width)
     
    +__device__ int __shfl_up (int input, unsigned int lane_delta, int width)
     
    +__device__ int __shfl_down (int input, unsigned int lane_delta, int width)
     
    +__device__ int __shfl_xor (int input, int lane_mask, int width)
     
    +__device__ float __shfl (float input, int lane, int width)
     
    +__device__ float __shfl_up (float input, unsigned int lane_delta, int width)
     
    +__device__ float __shfl_down (float input, unsigned int lane_delta, int width)
     
    +__device__ float __shfl_xor (float input, int lane_mask, int width)
     
    +int min (int arg1, int arg2) __attribute((hc
     
    +int max (int arg1, int arg2) __attribute((hc
     
    +__device__ float __cosf (float x)
     
    +__device__ float __expf (float x)
     
    +__device__ float __frsqrt_rn (float x)
     
    +__device__ float __fsqrt_rd (float x)
     
    +__device__ float __fsqrt_rn (float x)
     
    +__device__ float __fsqrt_ru (float x)
     
    +__device__ float __fsqrt_rz (float x)
     
    +__device__ float __log10f (float x)
     
    +__device__ float __log2f (float x)
     
    +__device__ float __logf (float x)
     
    +__device__ float __powf (float base, float exponent)
     
    +__device__ void __sincosf (float x, float *s, float *c)
     
    +__device__ float __sinf (float x)
     
    +__device__ float __tanf (float x)
     
    +__device__ float __dsqrt_rd (double x)
     
    +__device__ float __dsqrt_rn (double x)
     
    +__device__ float __dsqrt_ru (double x)
     
    +__device__ float __dsqrt_rz (double x)
     
    + + @@ -324,7 +411,9 @@ int 

    Variables

    int cpu
     
    int warpSize
     
     Make all HIP APIs host-synchronous.
     
    -

    Macro Definition Documentation

    +

    Detailed Description

    +

    Contains definitions of APIs for HIP runtime.

    +

    Macro Definition Documentation

    @@ -384,10 +473,25 @@ int Variable Documentation + +
    +
    + + + + +
    int cpu
    +
    +Initial value:
    {
    +
    return (int)(hc::precise_math::fmin((float)arg1, (float)arg2))
    +
    +
    +
    diff --git a/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime_8h_source.html b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime_8h_source.html index 667d8a38e2..a6c1b5210d 100644 --- a/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime_8h_source.html +++ b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime_8h_source.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hcc_detail/hip_runtime.h Source File +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail/hip_runtime.h Source File @@ -124,7 +124,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    38 
    39 #define CUDA_SUCCESS hipSuccess
    40 
    -
    41 #include <hip_runtime_api.h>
    +
    41 #include <hip_runtime_api.h>
    42 
    43 //---
    44 // Remainder of this file only compiles with HCC
    @@ -136,8 +136,8 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    50 //typedef grid_launch_parm hipLaunchParm ;
    51 #define hipLaunchParm grid_launch_parm
    52 
    -
    53 #include <hcc_detail/hip_texture.h>
    -
    54 #include <hcc_detail/host_defines.h>
    +
    53 #include <hcc_detail/hip_texture.h>
    +
    55 
    56 // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
    57 #if defined (__KALMAR_ACCELERATOR__) && not defined (__HCC_ACCELERATOR__)
    @@ -148,488 +148,530 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    62 #if defined(__HCC_ACCELERATOR__) and (__HCC_ACCELERATOR__ != 0)
    63 // Device compile and not host compile:
    64 
    -
    65 
    -
    66 #define __HIP_DEVICE_COMPILE__ 1
    -
    67 
    -
    68 //TODO-HCC enable __HIP_ARCH_HAS_ATOMICS__ when HCC supports these.
    -
    69  // 32-bit Atomics:
    -
    70 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
    -
    71 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
    -
    72 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0)
    -
    73 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0)
    -
    74 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
    -
    75 
    -
    76 // 64-bit Atomics:
    -
    77 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
    -
    78 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
    +
    65 //TODO-HCC enable __HIP_ARCH_HAS_ATOMICS__ when HCC supports these.
    +
    66  // 32-bit Atomics:
    +
    67 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
    +
    68 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
    +
    69 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0)
    +
    70 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0)
    +
    71 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
    +
    72 
    +
    73 // 64-bit Atomics:
    +
    74 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
    +
    75 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
    +
    76 
    +
    77 // Doubles
    +
    78 #define __HIP_ARCH_HAS_DOUBLES__ (1)
    79 
    -
    80 // Doubles
    -
    81 #define __HIP_ARCH_HAS_DOUBLES__ (1)
    -
    82 
    -
    83 //warp cross-lane operations:
    -
    84 #define __HIP_ARCH_HAS_WARP_VOTE__ (1)
    -
    85 #define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
    -
    86 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
    -
    87 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
    -
    88 
    -
    89 //sync
    -
    90 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0)
    -
    91 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
    -
    92 
    -
    93 // misc
    -
    94 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
    -
    95 #define __HIP_ARCH_HAS_3DGRID__ (1)
    -
    96 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
    -
    97 
    -
    98 #else
    -
    99 // Host compile and not device compile:
    -
    100 #define __HIP_DEVICE_COMPILE__ 0
    -
    101 
    -
    102 #endif
    +
    80 //warp cross-lane operations:
    +
    81 #define __HIP_ARCH_HAS_WARP_VOTE__ (1)
    +
    82 #define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
    +
    83 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
    +
    84 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
    +
    85 
    +
    86 //sync
    +
    87 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0)
    +
    88 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
    +
    89 
    +
    90 // misc
    +
    91 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
    +
    92 #define __HIP_ARCH_HAS_3DGRID__ (1)
    +
    93 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
    +
    94 
    +
    95 #endif
    +
    96 
    +
    97 
    +
    98 
    +
    99 
    +
    100 
    +
    101 //TODO-HCC this is currently ignored by HCC target of HIP
    +
    102 #define __launch_bounds__(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor)
    103 
    -
    104 
    -
    105 
    -
    106 
    -
    107 
    -
    108 //TODO-HCC this is currently ignored by HCC target of HIP
    -
    109 #define __launch_bounds__(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor)
    +
    104 // Detect if we are compiling C++ mode or C mode
    +
    105 #if defined(__cplusplus)
    +
    106 #define __HCC_CPP__
    +
    107 #elif defined(__STDC_VERSION__)
    +
    108 #define __HCC_C__
    +
    109 #endif
    110 
    -
    111 // Detect if we are compiling C++ mode or C mode
    -
    112 #if defined(__cplusplus)
    -
    113 #define __HCC_CPP__
    -
    114 #elif defined(__STDC_VERSION__)
    -
    115 #define __HCC_C__
    -
    116 #endif
    -
    117 
    -
    118 #define clock_t long long int
    -
    119 __device__ inline long long int clock64() { return (long long int)hc::__clock_u64(); };
    -
    120 __device__ inline clock_t clock() { return (clock_t)hc::__clock_u64(); };
    -
    121 
    -
    122 //atomicAdd()
    -
    123 __device__ inline int atomicAdd(int* address, int val)
    -
    124 {
    -
    125  return hc::atomic_fetch_add(address,val);
    -
    126 }
    -
    127 __device__ inline unsigned int atomicAdd(unsigned int* address,
    -
    128  unsigned int val)
    -
    129 {
    -
    130  return hc::atomic_fetch_add(address,val);
    -
    131 }
    -
    132 __device__ inline unsigned long long int atomicAdd(unsigned long long int* address,
    -
    133  unsigned long long int val)
    -
    134 {
    -
    135  return (long long int)hc::atomic_fetch_add((uint64_t*)address,(uint64_t)val);
    -
    136 }
    -
    137 __device__ inline float atomicAdd(float* address, float val)
    -
    138 {
    -
    139  return hc::atomic_fetch_add(address,val);
    -
    140 }
    -
    141 
    -
    142 //atomicSub()
    -
    143 __device__ inline int atomicSub(int* address, int val)
    -
    144 {
    -
    145  return hc::atomic_fetch_sub(address,val);
    -
    146 }
    -
    147 __device__ inline unsigned int atomicSub(unsigned int* address,
    -
    148  unsigned int val)
    -
    149 {
    -
    150  return hc::atomic_fetch_sub(address,val);
    -
    151 }
    -
    152 
    -
    153 //atomicExch()
    -
    154 __device__ inline int atomicExch(int* address, int val)
    -
    155 {
    -
    156  return hc::atomic_exchange(address,val);
    -
    157 }
    -
    158 __device__ inline unsigned int atomicExch(unsigned int* address,
    -
    159  unsigned int val)
    -
    160 {
    -
    161  return hc::atomic_exchange(address,val);
    -
    162 }
    -
    163 __device__ inline unsigned long long int atomicExch(unsigned long long int* address,
    -
    164  unsigned long long int val)
    -
    165 {
    -
    166  return (long long int)hc::atomic_exchange((uint64_t*)address,(uint64_t)val);
    -
    167 }
    -
    168 __device__ inline float atomicExch(float* address, float val)
    -
    169 {
    -
    170  return hc::atomic_exchange(address,val);
    -
    171 }
    -
    172 
    -
    173 //atomicMin()
    -
    174 __device__ inline int atomicMin(int* address, int val)
    -
    175 {
    -
    176  return hc::atomic_fetch_min(address,val);
    -
    177 }
    -
    178 __device__ inline unsigned int atomicMin(unsigned int* address,
    -
    179  unsigned int val)
    -
    180 {
    -
    181  return hc::atomic_fetch_min(address,val);
    -
    182 }
    -
    183 __device__ inline unsigned long long int atomicMin(unsigned long long int* address,
    -
    184  unsigned long long int val)
    -
    185 {
    -
    186  return (long long int)hc::atomic_fetch_min((uint64_t*)address,(uint64_t)val);
    -
    187 }
    -
    188 
    -
    189 //atomicMax()
    -
    190 __device__ inline int atomicMax(int* address, int val)
    -
    191 {
    -
    192  return hc::atomic_fetch_max(address,val);
    -
    193 }
    -
    194 __device__ inline unsigned int atomicMax(unsigned int* address,
    -
    195  unsigned int val)
    -
    196 {
    -
    197  return hc::atomic_fetch_max(address,val);
    -
    198 }
    -
    199 __device__ inline unsigned long long int atomicMax(unsigned long long int* address,
    -
    200  unsigned long long int val)
    -
    201 {
    -
    202  return (long long int)hc::atomic_fetch_max((uint64_t*)address,(uint64_t)val);
    +
    111 #define clock_t long long int
    +
    112 __device__ inline long long int clock64() { return (long long int)hc::__clock_u64(); };
    +
    113 __device__ inline clock_t clock() { return (clock_t)hc::__clock_u64(); };
    +
    114 
    +
    115 //atomicAdd()
    +
    116 __device__ inline int atomicAdd(int* address, int val)
    +
    117 {
    +
    118  return hc::atomic_fetch_add(address,val);
    +
    119 }
    +
    120 __device__ inline unsigned int atomicAdd(unsigned int* address,
    +
    121  unsigned int val)
    +
    122 {
    +
    123  return hc::atomic_fetch_add(address,val);
    +
    124 }
    +
    125 __device__ inline unsigned long long int atomicAdd(unsigned long long int* address,
    +
    126  unsigned long long int val)
    +
    127 {
    +
    128  return (long long int)hc::atomic_fetch_add((uint64_t*)address,(uint64_t)val);
    +
    129 }
    +
    130 __device__ inline float atomicAdd(float* address, float val)
    +
    131 {
    +
    132  return hc::atomic_fetch_add(address,val);
    +
    133 }
    +
    134 
    +
    135 //atomicSub()
    +
    136 __device__ inline int atomicSub(int* address, int val)
    +
    137 {
    +
    138  return hc::atomic_fetch_sub(address,val);
    +
    139 }
    +
    140 __device__ inline unsigned int atomicSub(unsigned int* address,
    +
    141  unsigned int val)
    +
    142 {
    +
    143  return hc::atomic_fetch_sub(address,val);
    +
    144 }
    +
    145 
    +
    146 //atomicExch()
    +
    147 __device__ inline int atomicExch(int* address, int val)
    +
    148 {
    +
    149  return hc::atomic_exchange(address,val);
    +
    150 }
    +
    151 __device__ inline unsigned int atomicExch(unsigned int* address,
    +
    152  unsigned int val)
    +
    153 {
    +
    154  return hc::atomic_exchange(address,val);
    +
    155 }
    +
    156 __device__ inline unsigned long long int atomicExch(unsigned long long int* address,
    +
    157  unsigned long long int val)
    +
    158 {
    +
    159  return (long long int)hc::atomic_exchange((uint64_t*)address,(uint64_t)val);
    +
    160 }
    +
    161 __device__ inline float atomicExch(float* address, float val)
    +
    162 {
    +
    163  return hc::atomic_exchange(address,val);
    +
    164 }
    +
    165 
    +
    166 //atomicMin()
    +
    167 __device__ inline int atomicMin(int* address, int val)
    +
    168 {
    +
    169  return hc::atomic_fetch_min(address,val);
    +
    170 }
    +
    171 __device__ inline unsigned int atomicMin(unsigned int* address,
    +
    172  unsigned int val)
    +
    173 {
    +
    174  return hc::atomic_fetch_min(address,val);
    +
    175 }
    +
    176 __device__ inline unsigned long long int atomicMin(unsigned long long int* address,
    +
    177  unsigned long long int val)
    +
    178 {
    +
    179  return (long long int)hc::atomic_fetch_min((uint64_t*)address,(uint64_t)val);
    +
    180 }
    +
    181 
    +
    182 //atomicMax()
    +
    183 __device__ inline int atomicMax(int* address, int val)
    +
    184 {
    +
    185  return hc::atomic_fetch_max(address,val);
    +
    186 }
    +
    187 __device__ inline unsigned int atomicMax(unsigned int* address,
    +
    188  unsigned int val)
    +
    189 {
    +
    190  return hc::atomic_fetch_max(address,val);
    +
    191 }
    +
    192 __device__ inline unsigned long long int atomicMax(unsigned long long int* address,
    +
    193  unsigned long long int val)
    +
    194 {
    +
    195  return (long long int)hc::atomic_fetch_max((uint64_t*)address,(uint64_t)val);
    +
    196 }
    +
    197 
    +
    198 //atomicCAS()
    +
    199 __device__ inline int atomicCAS(int* address, int compare, int val)
    +
    200 {
    +
    201  hc::atomic_compare_exchange(address,&compare,val);
    +
    202  return *address;
    203 }
    -
    204 
    -
    205 //atomicInc()
    -
    206 __device__ inline unsigned int atomicInc(unsigned int* address)
    +
    204 __device__ inline unsigned int atomicCAS(unsigned int* address,
    +
    205  unsigned int compare,
    +
    206  unsigned int val)
    207 {
    -
    208  return hc::atomic_fetch_inc(address);
    -
    209 }
    -
    210 
    -
    211 //atomicDec()
    -
    212 __device__ inline unsigned int atomicDec(unsigned int* address)
    -
    213 {
    -
    214  return hc::atomic_fetch_dec(address);
    -
    215 }
    -
    216 
    -
    217 //atomicCAS()
    -
    218 __device__ inline int atomicCAS(int* address, int compare, int val)
    -
    219 {
    -
    220  hc::atomic_compare_exchange(address,&compare,val);
    -
    221  return *address;
    -
    222 }
    -
    223 __device__ inline unsigned int atomicCAS(unsigned int* address,
    -
    224  unsigned int compare,
    +
    208  hc::atomic_compare_exchange(address,&compare,val);
    +
    209  return *address;
    +
    210 }
    +
    211 __device__ inline unsigned long long int atomicCAS(unsigned long long int* address,
    +
    212  unsigned long long int compare,
    +
    213  unsigned long long int val)
    +
    214 {
    +
    215  hc::atomic_compare_exchange((uint64_t*)address,(uint64_t*)&compare,(uint64_t)val);
    +
    216  return *address;
    +
    217 }
    +
    218 
    +
    219 //atomicAnd()
    +
    220 __device__ inline int atomicAnd(int* address, int val)
    +
    221 {
    +
    222  return hc::atomic_fetch_and(address,val);
    +
    223 }
    +
    224 __device__ inline unsigned int atomicAnd(unsigned int* address,
    225  unsigned int val)
    226 {
    -
    227  hc::atomic_compare_exchange(address,&compare,val);
    -
    228  return *address;
    -
    229 }
    -
    230 __device__ inline unsigned long long int atomicCAS(unsigned long long int* address,
    -
    231  unsigned long long int compare,
    -
    232  unsigned long long int val)
    -
    233 {
    -
    234  hc::atomic_compare_exchange((uint64_t*)address,(uint64_t*)&compare,(uint64_t)val);
    -
    235  return *address;
    -
    236 }
    -
    237 
    -
    238 //atomicAnd()
    -
    239 __device__ inline int atomicAnd(int* address, int val)
    -
    240 {
    -
    241  return hc::atomic_fetch_and(address,val);
    -
    242 }
    -
    243 __device__ inline unsigned int atomicAnd(unsigned int* address,
    -
    244  unsigned int val)
    -
    245 {
    -
    246  return hc::atomic_fetch_and(address,val);
    -
    247 }
    -
    248 __device__ inline unsigned long long int atomicAnd(unsigned long long int* address,
    -
    249  unsigned long long int val)
    -
    250 {
    -
    251  return (long long int)hc::atomic_fetch_and((uint64_t*)address,(uint64_t)val);
    -
    252 }
    -
    253 
    -
    254 //atomicOr()
    -
    255 __device__ inline int atomicOr(int* address, int val)
    -
    256 {
    -
    257  return hc::atomic_fetch_or(address,val);
    -
    258 }
    -
    259 __device__ inline unsigned int atomicOr(unsigned int* address,
    -
    260  unsigned int val)
    -
    261 {
    -
    262  return hc::atomic_fetch_or(address,val);
    -
    263 }
    -
    264 __device__ inline unsigned long long int atomicOr(unsigned long long int* address,
    -
    265  unsigned long long int val)
    -
    266 {
    -
    267  return (long long int)hc::atomic_fetch_or((uint64_t*)address,(uint64_t)val);
    -
    268 }
    -
    269 
    -
    270 //atomicXor()
    -
    271 __device__ inline int atomicXor(int* address, int val)
    -
    272 {
    -
    273  return hc::atomic_fetch_xor(address,val);
    -
    274 }
    -
    275 __device__ inline unsigned int atomicXor(unsigned int* address,
    -
    276  unsigned int val)
    -
    277 {
    -
    278  return hc::atomic_fetch_xor(address,val);
    -
    279 }
    -
    280 __device__ inline unsigned long long int atomicXor(unsigned long long int* address,
    -
    281  unsigned long long int val)
    -
    282 {
    -
    283  return (long long int)hc::atomic_fetch_xor((uint64_t*)address,(uint64_t)val);
    -
    284 }
    -
    285 
    -
    286 #ifdef __HCC__
    -
    287 #include <hc.hpp>
    -
    288 // integer intrinsic function __poc __clz __ffs __brev
    -
    289 __device__ inline unsigned int __popc( unsigned int input)
    +
    227  return hc::atomic_fetch_and(address,val);
    +
    228 }
    +
    229 __device__ inline unsigned long long int atomicAnd(unsigned long long int* address,
    +
    230  unsigned long long int val)
    +
    231 {
    +
    232  return (long long int)hc::atomic_fetch_and((uint64_t*)address,(uint64_t)val);
    +
    233 }
    +
    234 
    +
    235 //atomicOr()
    +
    236 __device__ inline int atomicOr(int* address, int val)
    +
    237 {
    +
    238  return hc::atomic_fetch_or(address,val);
    +
    239 }
    +
    240 __device__ inline unsigned int atomicOr(unsigned int* address,
    +
    241  unsigned int val)
    +
    242 {
    +
    243  return hc::atomic_fetch_or(address,val);
    +
    244 }
    +
    245 __device__ inline unsigned long long int atomicOr(unsigned long long int* address,
    +
    246  unsigned long long int val)
    +
    247 {
    +
    248  return (long long int)hc::atomic_fetch_or((uint64_t*)address,(uint64_t)val);
    +
    249 }
    +
    250 
    +
    251 //atomicXor()
    +
    252 __device__ inline int atomicXor(int* address, int val)
    +
    253 {
    +
    254  return hc::atomic_fetch_xor(address,val);
    +
    255 }
    +
    256 __device__ inline unsigned int atomicXor(unsigned int* address,
    +
    257  unsigned int val)
    +
    258 {
    +
    259  return hc::atomic_fetch_xor(address,val);
    +
    260 }
    +
    261 __device__ inline unsigned long long int atomicXor(unsigned long long int* address,
    +
    262  unsigned long long int val)
    +
    263 {
    +
    264  return (long long int)hc::atomic_fetch_xor((uint64_t*)address,(uint64_t)val);
    +
    265 }
    +
    266 
    +
    267 #include <hc.hpp>
    +
    268 // integer intrinsic function __poc __clz __ffs __brev
    +
    269 __device__ inline unsigned int __popc( unsigned int input)
    +
    270 {
    +
    271  return hc::__popcount_u32_b32( input);
    +
    272 }
    +
    273 
    +
    274 __device__ inline unsigned int __popcll( unsigned long long int input)
    +
    275 {
    +
    276  return hc::__popcount_u32_b64(input);
    +
    277 }
    +
    278 
    +
    279 __device__ inline unsigned int __clz(unsigned int input)
    +
    280 {
    +
    281  return hc::__firstbit_u32_u32( input);
    +
    282 }
    +
    283 
    +
    284 __device__ inline unsigned int __clzll(unsigned long long int input)
    +
    285 {
    +
    286  return hc::__firstbit_u32_u64( input);
    +
    287 }
    +
    288 
    +
    289 __device__ inline unsigned int __clz(int input)
    290 {
    -
    291  return hc::__popcount_u32_b32( input);
    +
    291  return hc::__firstbit_u32_s32( input);
    292 }
    293 
    -
    294 __device__ inline unsigned int __popcll( unsigned long long int input)
    +
    294 __device__ inline unsigned int __clzll(long long int input)
    295 {
    -
    296  return hc::__popcount_u32_b64(input);
    +
    296  return hc::__firstbit_u32_s64( input);
    297 }
    298 
    -
    299 __device__ inline unsigned int __clz(unsigned int input)
    +
    299 __device__ inline unsigned int __ffs(unsigned int input)
    300 {
    -
    301  return hc::__firstbit_u32_u32( input);
    +
    301  return hc::__lastbit_u32_u32( input)+1;
    302 }
    303 
    -
    304 __device__ inline unsigned int __clzll(unsigned long long int input)
    +
    304 __device__ inline unsigned int __ffsll(unsigned long long int input)
    305 {
    -
    306  return hc::__firstbit_u32_u64( input);
    +
    306  return hc::__lastbit_u32_u64( input)+1;
    307 }
    308 
    -
    309 __device__ inline unsigned int __clz(int input)
    +
    309 __device__ inline unsigned int __ffs(int input)
    310 {
    -
    311  return hc::__firstbit_u32_s32( input);
    +
    311  return hc::__lastbit_u32_s32( input)+1;
    312 }
    313 
    -
    314 __device__ inline unsigned int __clzll(long long int input)
    +
    314 __device__ inline unsigned int __ffsll(long long int input)
    315 {
    -
    316  return hc::__firstbit_u32_s64( input);
    +
    316  return hc::__lastbit_u32_s64( input)+1;
    317 }
    318 
    -
    319 __device__ inline unsigned int __ffs(unsigned int input)
    +
    319 __device__ inline unsigned int __brev( unsigned int input)
    320 {
    -
    321  return hc::__lastbit_u32_u32( input)+1;
    +
    321  return hc::__bitrev_b32( input);
    322 }
    323 
    -
    324 __device__ inline unsigned int __ffsll(unsigned long long int input)
    +
    324 __device__ inline unsigned long long int __brevll( unsigned long long int input)
    325 {
    -
    326  return hc::__lastbit_u32_u64( input)+1;
    +
    326  return hc::__bitrev_b64( input);
    327 }
    328 
    -
    329 __device__ inline unsigned int __brev( unsigned int input)
    -
    330 {
    -
    331  return hc::__bitrev_b32( input);
    -
    332 }
    -
    333 
    -
    334 __device__ inline unsigned long long int __brevll( unsigned long long int input)
    -
    335 {
    -
    336  return hc::__bitrev_b64( input);
    -
    337 }
    -
    338 
    -
    339 // warp vote function __all __any __ballot
    +
    329 // warp vote function __all __any __ballot
    +
    330 __device__ inline int __all( int input)
    +
    331 {
    +
    332  return hc::__all( input);
    +
    333 }
    +
    334 
    +
    335 __device__ inline int __any( int input)
    +
    336 {
    +
    337  if( hc::__any( input)!=0) return 1;
    +
    338  else return 0;
    +
    339 }
    340 
    -
    341 __device__ inline int __all( int input)
    +
    341 __device__ inline unsigned long long int __ballot( int input)
    342 {
    -
    343  return hc::__all( input);
    +
    343  return hc::__ballot( input);
    344 }
    345 
    -
    346 
    -
    347 __device__ inline int __any( int input)
    +
    346 // warp shuffle functions
    +
    347 __device__ inline int __shfl(int input, int lane, int width)
    348 {
    -
    349  return hc::__any( input);
    +
    349  return hc::__shfl(input,lane,width);
    350 }
    351 
    -
    352 
    -
    353 __device__ inline unsigned long long int __ballot( int input)
    -
    354 {
    -
    355  return hc::__ballot( input);
    -
    356 }
    -
    357 
    -
    358 #endif
    -
    359 
    -
    360 
    +
    352 __device__ inline int __shfl_up(int input, unsigned int lane_delta, int width)
    +
    353 {
    +
    354  return hc::__shfl_up(input,lane_delta,width);
    +
    355 }
    +
    356 
    +
    357 __device__ inline int __shfl_down(int input, unsigned int lane_delta, int width)
    +
    358 {
    +
    359  return hc::__shfl_down(input,lane_delta,width);
    +
    360 }
    361 
    -
    362 #ifdef __HCC_ACCELERATOR__
    -
    363 #include <hc_math.hpp>
    -
    364 // TODO: Choose whether default is precise math or fast math based on compilation flag.
    -
    365 using namespace hc::precise_math;
    +
    362 __device__ inline int __shfl_xor(int input, int lane_mask, int width)
    +
    363 {
    +
    364  return hc::__shfl_xor(input,lane_mask,width);
    +
    365 }
    366 
    -
    367 //TODO: Undo this once min/max functions are supported by hc
    -
    368 inline int min(int arg1, int arg2) __attribute((hc,cpu)) { \
    -
    369  return (int)(hc::precise_math::fmin((float)arg1, (float)arg2));}
    -
    370 inline int max(int arg1, int arg2) __attribute((hc,cpu)) { \
    -
    371  return (int)(hc::precise_math::fmax((float)arg1, (float)arg2));}
    -
    372 
    -
    373 
    -
    374 //TODO - add a couple fast math operations here, the set here will grow :
    -
    375 __device__ inline float __log2f(float x) {return hc::fast_math::log2(x); };
    -
    376 __device__ inline float __powf(float base, float exponent) {return hc::fast_math::powf(base, exponent); };
    -
    377 
    -
    378 #endif
    -
    379 
    -
    380 
    +
    367 __device__ inline float __shfl(float input, int lane, int width)
    +
    368 {
    +
    369  return hc::__shfl(input,lane,width);
    +
    370 }
    +
    371 
    +
    372 __device__ inline float __shfl_up(float input, unsigned int lane_delta, int width)
    +
    373 {
    +
    374  return hc::__shfl_up(input,lane_delta,width);
    +
    375 }
    +
    376 
    +
    377 __device__ inline float __shfl_down(float input, unsigned int lane_delta, int width)
    +
    378 {
    +
    379  return hc::__shfl_down(input,lane_delta,width);
    +
    380 }
    381 
    -
    385 #define hipThreadIdx_x (amp_get_local_id(2))
    -
    386 #define hipThreadIdx_y (amp_get_local_id(1))
    -
    387 #define hipThreadIdx_z (amp_get_local_id(0))
    -
    388 
    -
    389 #define hipBlockIdx_x (hc_get_group_id(2))
    -
    390 #define hipBlockIdx_y (hc_get_group_id(1))
    -
    391 #define hipBlockIdx_z (hc_get_group_id(0))
    -
    392 
    -
    393 #define hipBlockDim_x (amp_get_local_size(2))
    -
    394 #define hipBlockDim_y (amp_get_local_size(1))
    -
    395 #define hipBlockDim_z (amp_get_local_size(0))
    -
    396 
    -
    397 #define hipGridDim_x (hc_get_num_groups(2))
    -
    398 #define hipGridDim_y (hc_get_num_groups(1))
    -
    399 #define hipGridDim_z (hc_get_num_groups(0))
    -
    400 
    -
    401 
    -
    402 extern int warpSize ;
    -
    403 
    -
    404 
    -
    405 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
    -
    406 
    -
    407 
    -
    408 #if 0
    -
    409 #define KALMAR_PFE_BEGIN() \
    -
    410  hc::extent<3> ext(lp.gridDim.x, lp.gridDim.y, lp.gridDim.z);\
    -
    411  auto __hipExtTile = ext.tile(lp.groupDim.x, lp.groupDim.y, lp.groupDim.z);\
    -
    412  __hipExtTile.set_dynamic_group_segment_size(lp.groupMemBytes);\
    -
    413  \
    -
    414  hc::completion_future cf = hc::parallel_for_each (\
    -
    415  *lp.av,\
    -
    416  __hipExtTile,\
    -
    417  [=] (hc::tiled_index<3> __hipIdx) mutable [[hc]]
    -
    418 
    -
    419 
    +
    382 __device__ inline float __shfl_xor(float input, int lane_mask, int width)
    +
    383 {
    +
    384  return hc::__shfl_xor(input,lane_mask,width);
    +
    385 }
    +
    386 
    +
    387 
    +
    388 #include <hc_math.hpp>
    +
    389 // TODO: Choose whether default is precise math or fast math based on compilation flag.
    +
    390 #ifdef __HCC_ACCELERATOR__
    +
    391 using namespace hc::precise_math;
    +
    392 #endif
    +
    393 
    +
    394 //TODO: Undo this once min/max functions are supported by hc
    +
    395 inline int min(int arg1, int arg2) __attribute((hc,cpu)) { \
    +
    396  return (int)(hc::precise_math::fmin((float)arg1, (float)arg2));}
    +
    397 inline int max(int arg1, int arg2) __attribute((hc,cpu)) { \
    +
    398  return (int)(hc::precise_math::fmax((float)arg1, (float)arg2));}
    +
    399 
    +
    400 
    +
    401 //TODO - add a couple fast math operations here, the set here will grow :
    +
    402 __device__ inline float __cosf(float x) {return hc::fast_math::cosf(x); };
    +
    403 __device__ inline float __expf(float x) {return hc::fast_math::expf(x); };
    +
    404 __device__ inline float __frsqrt_rn(float x) {return hc::fast_math::rsqrt(x); };
    +
    405 __device__ inline float __fsqrt_rd(float x) {return hc::fast_math::sqrt(x); };
    +
    406 __device__ inline float __fsqrt_rn(float x) {return hc::fast_math::sqrt(x); };
    +
    407 __device__ inline float __fsqrt_ru(float x) {return hc::fast_math::sqrt(x); };
    +
    408 __device__ inline float __fsqrt_rz(float x) {return hc::fast_math::sqrt(x); };
    +
    409 __device__ inline float __log10f(float x) {return hc::fast_math::log10f(x); };
    +
    410 __device__ inline float __log2f(float x) {return hc::fast_math::log2f(x); };
    +
    411 __device__ inline float __logf(float x) {return hc::fast_math::logf(x); };
    +
    412 __device__ inline float __powf(float base, float exponent) {return hc::fast_math::powf(base, exponent); };
    +
    413 __device__ inline void __sincosf(float x, float *s, float *c) {return hc::fast_math::sincosf(x, s, c); };
    +
    414 __device__ inline float __sinf(float x) {return hc::fast_math::sinf(x); };
    +
    415 __device__ inline float __tanf(float x) {return hc::fast_math::tanf(x); };
    +
    416 __device__ inline float __dsqrt_rd(double x) {return hc::fast_math::sqrt(x); };
    +
    417 __device__ inline float __dsqrt_rn(double x) {return hc::fast_math::sqrt(x); };
    +
    418 __device__ inline float __dsqrt_ru(double x) {return hc::fast_math::sqrt(x); };
    +
    419 __device__ inline float __dsqrt_rz(double x) {return hc::fast_math::sqrt(x); };
    420 
    -
    421 #define KALMAR_PFE_END \
    -
    422  ); \
    -
    423  if (HIP_LAUNCH_BLOCKING) {\
    -
    424  if (HIP_TRACE_API) {\
    -
    425  fprintf(stderr, "hiptrace1: HIP_LAUNCH_BLOCKING ...\n");\
    -
    426  }\
    -
    427  cf.wait(); \
    -
    428  if (HIP_TRACE_API) {\
    -
    429  fprintf(stderr, "hiptrace1: ...completed.\n");\
    -
    430  }\
    -
    431  }
    -
    432 #endif
    -
    433 
    -
    434 
    -
    435 
    -
    436 #define HIP_KERNEL_NAME(...) __VA_ARGS__
    -
    437 
    -
    438 
    -
    439 #ifdef __HCC_CPP__
    -
    440 hc::accelerator_view *ihipLaunchKernel(hipStream_t stream);
    -
    441 
    -
    442 #if not defined(DISABLE_GRID_LAUNCH)
    -
    443 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
    -
    444 do {\
    -
    445  grid_launch_parm lp;\
    -
    446  lp.gridDim.x = _numBlocks3D.x; \
    -
    447  lp.gridDim.y = _numBlocks3D.y; \
    -
    448  lp.gridDim.z = _numBlocks3D.z; \
    -
    449  lp.groupDim.x = _blockDim3D.x; \
    -
    450  lp.groupDim.y = _blockDim3D.y; \
    -
    451  lp.groupDim.z = _blockDim3D.z; \
    -
    452  lp.groupMemBytes = _groupMemBytes;\
    -
    453  hc::completion_future cf;\
    -
    454  lp.cf = &cf; \
    -
    455  lp.av = (ihipLaunchKernel(_stream)); \
    -
    456  if (HIP_TRACE_API) {\
    -
    457  fprintf(stderr, "hiptrace1: launch '%s' gridDim:[%d.%d.%d] groupDim:[%d.%d.%d] groupMem:+%d stream=%p\n", \
    -
    458  #_kernelName, lp.gridDim.z, lp.gridDim.y, lp.gridDim.x, lp.groupDim.z, lp.groupDim.y, lp.groupDim.x, lp.groupMemBytes, (void*)(_stream));\
    -
    459  }\
    -
    460  _kernelName (lp, __VA_ARGS__);\
    -
    461 } while(0)
    -
    462 
    -
    463 #else
    -
    464 #warning(DISABLE_GRID_LAUNCH set)
    -
    465 
    -
    466 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
    -
    467 do {\
    -
    468  grid_launch_parm lp;\
    -
    469  lp.gridDim.x = _numBlocks3D.x * _blockDim3D.x;/*Convert from #blocks to #threads*/ \
    -
    470  lp.gridDim.y = _numBlocks3D.y * _blockDim3D.y;/*Convert from #blocks to #threads*/ \
    -
    471  lp.gridDim.z = _numBlocks3D.z * _blockDim3D.z;/*Convert from #blocks to #threads*/ \
    -
    472  lp.groupDim.x = _blockDim3D.x; \
    -
    473  lp.groupDim.y = _blockDim3D.y; \
    -
    474  lp.groupDim.z = _blockDim3D.z; \
    -
    475  lp.groupMemBytes = _groupMemBytes;\
    -
    476  hc::completion_future cf;\
    -
    477  lp.cf = &cf; \
    -
    478  lp.av = (ihipLaunchKernel(_stream)); \
    -
    479  if (HIP_TRACE_API) {\
    -
    480  fprintf(stderr, "hiptrace1: launch '%s' gridDim:[%d.%d.%d] groupDim:[%d.%d.%d] groupMem:+%d stream=%p\n", \
    -
    481  #_kernelName, lp.gridDim.z, lp.gridDim.y, lp.gridDim.x, lp.groupDim.z, lp.groupDim.y, lp.groupDim.x, lp.groupMemBytes, (void*)(_stream));\
    -
    482  }\
    -
    483  _kernelName (lp, __VA_ARGS__);\
    -
    484 } while(0)
    -
    485 /*end hipLaunchKernel */
    -
    486 #endif
    -
    487 
    -
    488 #elif defined (__HCC_C__)
    -
    489 
    -
    490 //TODO - develop C interface.
    -
    491 
    -
    492 #endif
    -
    493 
    -
    494 
    -
    495 #if not defined(DISABLE_GRID_LAUNCH)
    -
    496 // TODO -In GL these are no-ops and can be removed:
    -
    497 // Keep them around for a little while as a fallback.
    -
    498 #define KERNELBEGIN
    -
    499 #define KERNELEND
    -
    500 
    -
    501 #else
    -
    502 
    -
    503 // TODO-GL:
    -
    504 // These wrap the kernel in a PFE loop with macros.
    -
    505 // Not required with GL but exist here as a fallback.
    -
    506 #define KERNELBEGIN \
    -
    507  hc::extent<3> ext(lp.gridDim.x, lp.gridDim.y, lp.gridDim.z);\
    -
    508  auto __hipExtTile = ext.tile(lp.groupDim.x, lp.groupDim.y, lp.groupDim.z);\
    -
    509  __hipExtTile.set_dynamic_group_segment_size(lp.groupMemBytes);\
    -
    510  \
    -
    511  hc::completion_future cf = \
    -
    512  hc::parallel_for_each (\
    -
    513  *lp.av,\
    -
    514  __hipExtTile,\
    -
    515  [=] (hc::tiled_index<3> __hipIdx) mutable [[hc]] \
    -
    516  {
    -
    517 
    -
    518 
    -
    519 #define KERNELEND \
    -
    520  }); \
    -
    521  if (HIP_LAUNCH_BLOCKING) {\
    -
    522  if (HIP_TRACE_API) {\
    -
    523  fprintf(stderr, "hiptrace1: HIP_LAUNCH_BLOCKING ...\n");\
    -
    524  }\
    -
    525  cf.wait(); \
    -
    526  if (HIP_TRACE_API) {\
    -
    527  fprintf(stderr, "hiptrace1: ...completed.\n");\
    -
    528  }\
    -
    529  }
    -
    530 
    -
    531 #endif /*DISABLE_GRID_LAUNCH*/
    +
    424 #define hipThreadIdx_x (amp_get_local_id(2))
    +
    425 #define hipThreadIdx_y (amp_get_local_id(1))
    +
    426 #define hipThreadIdx_z (amp_get_local_id(0))
    +
    427 
    +
    428 #define hipBlockIdx_x (hc_get_group_id(2))
    +
    429 #define hipBlockIdx_y (hc_get_group_id(1))
    +
    430 #define hipBlockIdx_z (hc_get_group_id(0))
    +
    431 
    +
    432 #define hipBlockDim_x (amp_get_local_size(2))
    +
    433 #define hipBlockDim_y (amp_get_local_size(1))
    +
    434 #define hipBlockDim_z (amp_get_local_size(0))
    +
    435 
    +
    436 #define hipGridDim_x (hc_get_num_groups(2))
    +
    437 #define hipGridDim_y (hc_get_num_groups(1))
    +
    438 #define hipGridDim_z (hc_get_num_groups(0))
    +
    439 
    +
    440 
    +
    441 extern int warpSize ;
    +
    442 
    +
    443 
    +
    444 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
    +
    445 
    +
    446 
    +
    447 #if 0
    +
    448 #define KALMAR_PFE_BEGIN() \
    +
    449  hc::extent<3> ext(lp.gridDim.x, lp.gridDim.y, lp.gridDim.z);\
    +
    450  auto __hipExtTile = ext.tile(lp.groupDim.x, lp.groupDim.y, lp.groupDim.z);\
    +
    451  __hipExtTile.set_dynamic_group_segment_size(lp.groupMemBytes);\
    +
    452  \
    +
    453  hc::completion_future cf = hc::parallel_for_each (\
    +
    454  *lp.av,\
    +
    455  __hipExtTile,\
    +
    456  [=] (hc::tiled_index<3> __hipIdx) mutable [[hc]]
    +
    457 
    +
    458 
    +
    459 
    +
    460 #define KALMAR_PFE_END \
    +
    461  ); \
    +
    462  if (HIP_LAUNCH_BLOCKING) {\
    +
    463  if (HIP_TRACE_API) {\
    +
    464  fprintf(stderr, "hiptrace1: HIP_LAUNCH_BLOCKING ...\n");\
    +
    465  }\
    +
    466  cf.wait(); \
    +
    467  if (HIP_TRACE_API) {\
    +
    468  fprintf(stderr, "hiptrace1: ...completed.\n");\
    +
    469  }\
    +
    470  }
    +
    471 #endif
    +
    472 
    +
    473 
    +
    474 
    +
    475 #define HIP_KERNEL_NAME(...) __VA_ARGS__
    +
    476 
    +
    477 
    +
    478 #ifdef __HCC_CPP__
    +
    479 hc::accelerator_view *ihipLaunchKernel(hipStream_t stream);
    +
    480 
    +
    481 #if not defined(DISABLE_GRID_LAUNCH)
    +
    482 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
    +
    483 do {\
    +
    484  grid_launch_parm lp;\
    +
    485  lp.gridDim.x = _numBlocks3D.x; \
    +
    486  lp.gridDim.y = _numBlocks3D.y; \
    +
    487  lp.gridDim.z = _numBlocks3D.z; \
    +
    488  lp.groupDim.x = _blockDim3D.x; \
    +
    489  lp.groupDim.y = _blockDim3D.y; \
    +
    490  lp.groupDim.z = _blockDim3D.z; \
    +
    491  lp.groupMemBytes = _groupMemBytes;\
    +
    492  hc::completion_future cf;\
    +
    493  lp.cf = &cf; \
    +
    494  lp.av = (ihipLaunchKernel(_stream)); \
    +
    495  if (HIP_TRACE_API) {\
    +
    496  fprintf(stderr, "hiptrace1: launch '%s' gridDim:[%d.%d.%d] groupDim:[%d.%d.%d] groupMem:+%d stream=%p\n", \
    +
    497  #_kernelName, lp.gridDim.z, lp.gridDim.y, lp.gridDim.x, lp.groupDim.z, lp.groupDim.y, lp.groupDim.x, lp.groupMemBytes, (void*)(_stream));\
    +
    498  }\
    +
    499  _kernelName (lp, __VA_ARGS__);\
    +
    500 } while(0)
    +
    501 
    +
    502 #else
    +
    503 #warning(DISABLE_GRID_LAUNCH set)
    +
    504 
    +
    505 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
    +
    506 do {\
    +
    507  grid_launch_parm lp;\
    +
    508  lp.gridDim.x = _numBlocks3D.x * _blockDim3D.x;/*Convert from #blocks to #threads*/ \
    +
    509  lp.gridDim.y = _numBlocks3D.y * _blockDim3D.y;/*Convert from #blocks to #threads*/ \
    +
    510  lp.gridDim.z = _numBlocks3D.z * _blockDim3D.z;/*Convert from #blocks to #threads*/ \
    +
    511  lp.groupDim.x = _blockDim3D.x; \
    +
    512  lp.groupDim.y = _blockDim3D.y; \
    +
    513  lp.groupDim.z = _blockDim3D.z; \
    +
    514  lp.groupMemBytes = _groupMemBytes;\
    +
    515  hc::completion_future cf;\
    +
    516  lp.cf = &cf; \
    +
    517  lp.av = (ihipLaunchKernel(_stream)); \
    +
    518  if (HIP_TRACE_API) {\
    +
    519  fprintf(stderr, "hiptrace1: launch '%s' gridDim:[%d.%d.%d] groupDim:[%d.%d.%d] groupMem:+%d stream=%p\n", \
    +
    520  #_kernelName, lp.gridDim.z, lp.gridDim.y, lp.gridDim.x, lp.groupDim.z, lp.groupDim.y, lp.groupDim.x, lp.groupMemBytes, (void*)(_stream));\
    +
    521  }\
    +
    522  _kernelName (lp, __VA_ARGS__);\
    +
    523 } while(0)
    +
    524 /*end hipLaunchKernel */
    +
    525 #endif
    +
    526 
    +
    527 #elif defined (__HCC_C__)
    +
    528 
    +
    529 //TODO - develop C interface.
    +
    530 
    +
    531 #endif
    532 
    533 
    -
    534 #endif // __HCC__
    -
    535 
    -
    536 
    -
    541 extern int HIP_PRINT_ENV ;
    -
    542 extern int HIP_TRACE_API;
    -
    543 extern int HIP_LAUNCH_BLOCKING ;
    -
    544 
    -
    550 // End doxygen API:
    +
    534 #if not defined(DISABLE_GRID_LAUNCH)
    +
    535 // TODO -In GL these are no-ops and can be removed:
    +
    536 // Keep them around for a little while as a fallback.
    +
    537 #define KERNELBEGIN
    +
    538 #define KERNELEND
    +
    539 
    +
    540 #else
    +
    541 
    +
    542 // TODO-GL:
    +
    543 // These wrap the kernel in a PFE loop with macros.
    +
    544 // Not required with GL but exist here as a fallback.
    +
    545 #define KERNELBEGIN \
    +
    546  hc::extent<3> ext(lp.gridDim.x, lp.gridDim.y, lp.gridDim.z);\
    +
    547  auto __hipExtTile = ext.tile(lp.groupDim.x, lp.groupDim.y, lp.groupDim.z);\
    +
    548  __hipExtTile.set_dynamic_group_segment_size(lp.groupMemBytes);\
    +
    549  \
    +
    550  hc::completion_future cf = \
    +
    551  hc::parallel_for_each (\
    +
    552  *lp.av,\
    +
    553  __hipExtTile,\
    +
    554  [=] (hc::tiled_index<3> __hipIdx) mutable [[hc]] \
    +
    555  {
    +
    556 
    +
    557 
    +
    558 #define KERNELEND \
    +
    559  }); \
    +
    560  if (HIP_LAUNCH_BLOCKING) {\
    +
    561  if (HIP_TRACE_API) {\
    +
    562  fprintf(stderr, "hiptrace1: HIP_LAUNCH_BLOCKING ...\n");\
    +
    563  }\
    +
    564  cf.wait(); \
    +
    565  if (HIP_TRACE_API) {\
    +
    566  fprintf(stderr, "hiptrace1: ...completed.\n");\
    +
    567  }\
    +
    568  }
    +
    569 
    +
    570 #endif /*DISABLE_GRID_LAUNCH*/
    +
    571 
    +
    572 
    +
    573 #endif // __HCC__
    +
    574 
    +
    575 
    +
    580 extern int HIP_PRINT_ENV ;
    +
    581 extern int HIP_TRACE_API;
    +
    582 extern int HIP_LAUNCH_BLOCKING ;
    +
    583 
    +
    589 // End doxygen API:
    int HIP_TRACE_API
    Trace HIP APIs.
    Definition: hip_hcc.cpp:57
    +
    TODO-doc.
    Definition: hip_hcc.cpp:82
    +
    HIP C++ Texture API for hcc compiler.
    int HIP_PRINT_ENV
    Print all HIP-related environment variables.
    Definition: hip_hcc.cpp:56
    +
    Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language ...
    int HIP_LAUNCH_BLOCKING
    Make all HIP APIs host-synchronous.
    Definition: hip_hcc.cpp:58
    diff --git a/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime__api_8h.html b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime__api_8h.html new file mode 100644 index 0000000000..374a22be9b --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime__api_8h.html @@ -0,0 +1,326 @@ + + + + + + +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail/hip_runtime_api.h File Reference + + + + + + + + + +
    +
    + + + + + + +
    +
    HIP: Heterogenous-computing Interface for Portability +
    +
    +
    + + + + + + +
    + All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
    + + +
    + +
    + + +
    +
    +
    +Classes | +Macros | +Typedefs | +Enumerations | +Functions
    +
    +
    hip_runtime_api.h File Reference
    +
    +
    + +

    Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h. +More...

    +
    #include <stdint.h>
    +#include <stddef.h>
    +#include <hcc_detail/host_defines.h>
    +#include <hc.hpp>
    +
    +

    Go to the source code of this file.

    + + + + + + +

    +Classes

    struct  dim3
     
    struct  hipEvent_t
     
    + + + + + + + + + + + + + + + + + + + +

    +Macros

    #define hipStreamDefault   0x00
     Flags that can be used with hipStreamCreateWithFlags. More...
     
    +#define hipStreamNonBlocking   0x01
     Stream does not implicitly synchronize with null stream.
     
    #define hipEventDefault   0x0
     Flags that can be used with hipEventCreateWithFlags: More...
     
    +#define hipEventBlockingSync   0x1
     Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.
     
    +#define hipEventDisableTiming   0x2
     Disable event's capability to record timing information. May improve performance.
     
    #define hipEventInterprocess   0x4
     Event can support IPC. More...
     
    + + + + + + + + + + + +

    +Typedefs

    typedef enum hipFuncCache hipFuncCache
     
    typedef enum hipSharedMemConfig hipSharedMemConfig
     
    typedef struct dim3 dim3
     
    +typedef struct ihipStream_thipStream_t
     
    +typedef struct hipEvent_t hipEvent_t
     
    + + + + + + + +

    +Enumerations

    enum  hipFuncCache { hipFuncCachePreferNone, +hipFuncCachePreferShared, +hipFuncCachePreferL1, +hipFuncCachePreferEqual + }
     
    enum  hipSharedMemConfig { hipSharedMemBankSizeDefault, +hipSharedMemBankSizeFourByte, +hipSharedMemBankSizeEightByte + }
     
    enum  hipMemcpyKind {
    +  hipMemcpyHostToHost = 0, +hipMemcpyHostToDevice = 1, +hipMemcpyDeviceToHost = 2, +hipMemcpyDeviceToDevice =3, +
    +  hipMemcpyDefault = 4 +
    + }
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    hipError_t hipDeviceSynchronize (void)
     Blocks until the default device has completed all preceding requested tasks. More...
     
    hipError_t hipDeviceReset (void)
     Destroy all resources and reset all state on the default device in the current process. More...
     
    hipError_t hipSetDevice (int device)
     Set default device to be used for subsequent hip API calls from this thread. More...
     
    hipError_t hipGetDevice (int *device)
     Return the default device id for the calling host thread. More...
     
    hipError_t hipGetDeviceCount (int *count)
     Return number of compute-capable devices. More...
     
    hipError_t hipDeviceGetAttribute (int *pi, hipDeviceAttribute_t attr, int device)
     Query device attribute. More...
     
    hipError_t hipDeviceGetProperties (hipDeviceProp_t *prop, int device)
     Returns device properties. More...
     
    hipError_t hipDeviceSetCacheConfig (hipFuncCache cacheConfig)
     Set L1/Shared cache partition. More...
     
    hipError_t hipDeviceGetCacheConfig (hipFuncCache *cacheConfig)
     Set Cache configuration for a specific function. More...
     
    hipError_t hipFuncSetCacheConfig (hipFuncCache config)
     Set Cache configuration for a specific function. More...
     
    hipError_t hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig)
     Get Shared memory bank configuration. More...
     
    hipError_t hipDeviceSetSharedMemConfig (hipSharedMemConfig config)
     Set Shared memory bank configuration. More...
     
    hipError_t hipGetLastError (void)
     Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess. More...
     
    hipError_t hipPeekAtLastError (void)
     Return last error returned by any HIP runtime API call. More...
     
    const char * hipGetErrorName (hipError_t hip_error)
     Return name of the specified error code in text form. More...
     
    const char * hipGetErrorString (hipError_t hip_error)
     Return handy text string message to explain the error which occurred. More...
     
    hipError_t hipStreamCreateWithFlags (hipStream_t *stream, unsigned int flags)
     Create an asynchronous stream. More...
     
    hipError_t hipStreamWaitEvent (hipStream_t stream, hipEvent_t event, unsigned int flags)
     Make the specified compute stream wait for an event. More...
     
    hipError_t hipStreamSynchronize (hipStream_t stream)
     Wait for all commands in stream to complete. More...
     
    hipError_t hipStreamDestroy (hipStream_t stream)
     Destroys the specified stream. More...
     
    hipError_t hipStreamGetFlags (hipStream_t stream, unsigned int *flags)
     Return flags associated with this stream. More...
     
    hipError_t hipEventCreateWithFlags (hipEvent_t *event, unsigned flags)
     Create an event with the specified flags. More...
     
    hipError_t hipEventRecord (hipEvent_t event, hipStream_t stream=NULL)
     Record an event in the specified stream. More...
     
    hipError_t hipEventDestroy (hipEvent_t event)
     Destroy the specified event. More...
     
    hipError_t hipEventSynchronize (hipEvent_t event)
     : Wait for an event to complete. More...
     
    hipError_t hipEventElapsedTime (float *ms, hipEvent_t start, hipEvent_t stop)
     Return the elapsed time between two events. More...
     
    hipError_t hipEventQuery (hipEvent_t event)
     Query event status. More...
     
    hipError_t hipMalloc (void **ptr, size_t size)
     Allocate memory on the default accelerator. More...
     
    hipError_t hipMallocHost (void **ptr, size_t size)
     Allocate pinned host memory. More...
     
    hipError_t hipFree (void *ptr)
     Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSynchronize() call. More...
     
    hipError_t hipFreeHost (void *ptr)
     Free memory allocated by the hcc hip host memory allocation API. More...
     
    hipError_t hipMemcpy (void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
     Copy data from src to dst. More...
     
    hipError_t hipMemcpyToSymbol (const char *symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind)
     Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset bytes from the start of symbol symbol. More...
     
    hipError_t hipMemcpyAsync (void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0)
     Copy data from src to dst asynchronously. More...
     
    hipError_t hipMemset (void *dst, int value, size_t sizeBytes)
     Copy data from src to dst asynchronously. More...
     
    hipError_t hipMemsetAsync (void *dst, int value, size_t sizeBytes, hipStream_t=0)
     Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value value. More...
     
    +hipError_t hipMemGetInfo (size_t *free, size_t *total)
     
    hipError_t hipDeviceCanAccessPeer (int *canAccessPeer, int device, int peerDevice)
     Determine if a device can access a peer's memory. More...
     
    hipError_t hipDeviceDisablePeerAccess (int peerDevice)
     Disables registering memory on peerDevice for direct access from the current device. More...
     
    hipError_t hipDeviceEnablePeerAccess (int peerDevice, unsigned int flags)
     Enables registering memory on peerDevice for direct access from the current device. More...
     
    hipError_t hipMemcpyPeer (void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes)
     Copies memory from one device to memory on another device. More...
     
    hipError_t hipMemcpyPeerAsync (void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream=0)
     Copies memory from one device to memory on another device. More...
     
    hipError_t hipDriverGetVersion (int *driverVersion)
     Returns the approximate HIP driver version. More...
     
    hipError_t hipHccGetAccelerator (int deviceId, hc::accelerator *acc)
     Return hc::accelerator associated with the specified deviceId. More...
     
    hipError_t hipHccGetAcceleratorView (hipStream_t stream, hc::accelerator_view **av)
     Return hc::accelerator_view associated with the specified stream. More...
     
    +

    Detailed Description

    +

    Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h.

    +
    + + + + diff --git a/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime__api_8h_source.html b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime__api_8h_source.html index 45bbf45deb..876afac7f7 100644 --- a/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime__api_8h_source.html +++ b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__runtime__api_8h_source.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hcc_detail/hip_runtime_api.h Source File +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail/hip_runtime_api.h Source File @@ -89,7 +89,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    hip_runtime_api.h
    -
    1 /*
    +Go to the documentation of this file.
    1 /*
    2 Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
    3 
    4 Permission is hereby granted, free of charge, to any person obtaining a copy
    @@ -112,356 +112,345 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    21 */
    22 #pragma once
    23 
    -
    24 
    -
    25 #include <stdint.h>
    -
    26 #include <stddef.h>
    -
    27 
    -
    28 #include <hcc_detail/host_defines.h>
    -
    29 
    -
    30 #if defined (__HCC__) && (__hcc_workweek__ < 1602)
    -
    31 #error("This version of HIP requires a newer version of HCC.");
    -
    32 #endif
    -
    33 
    -
    34 
    -
    35 // hip_api_hcc.h
    -
    36 // Contains C function APIs for HIP runtime.
    -
    37 // This file does not use any HCC builtins or special language extensions (-hc mode) ; those functions in hip_hcc.h.
    -
    38 
    -
    39 
    -
    40 // Structure definitions:
    -
    41 #ifdef __cplusplus
    -
    42 extern "C" {
    -
    43 #endif
    -
    44 
    -
    49 #define hipStreamDefault 0x00
    -
    51 #define hipStreamNonBlocking 0x01
    -
    52 
    -
    53 
    -
    55 #define hipEventDefault 0x0
    -
    56 #define hipEventBlockingSync 0x1
    -
    57 #define hipEventDisableTiming 0x2
    -
    58 #define hipEventInterprocess 0x4
    -
    59 
    -
    60 
    -
    64 typedef enum hipFuncCache {
    - - - - -
    69 } hipFuncCache;
    -
    70 
    -
    71 
    -
    75 typedef enum hipSharedMemConfig {
    - - - - +
    29 #include <stdint.h>
    +
    30 #include <stddef.h>
    +
    31 
    + +
    33 
    +
    34 #if defined (__HCC__) && (__hcc_workweek__ < 1602)
    +
    35 #error("This version of HIP requires a newer version of HCC.");
    +
    36 #endif
    +
    37 
    +
    38 // Structure definitions:
    +
    39 #ifdef __cplusplus
    +
    40 extern "C" {
    +
    41 #endif
    +
    42 
    +
    47 #define hipStreamDefault 0x00
    +
    49 #define hipStreamNonBlocking 0x01
    +
    50 
    +
    51 
    +
    53 #define hipEventDefault 0x0
    +
    54 #define hipEventBlockingSync 0x1
    +
    55 #define hipEventDisableTiming 0x2
    +
    56 #define hipEventInterprocess 0x4
    +
    57 
    +
    58 
    +
    62 typedef enum hipFuncCache {
    + + + + +
    67 } hipFuncCache;
    +
    68 
    +
    69 
    +
    73 typedef enum hipSharedMemConfig {
    + + + + +
    78 
    +
    79 
    80 
    -
    81 
    -
    82 
    -
    87 typedef struct dim3 {
    -
    88  uint32_t x;
    -
    89  uint32_t y;
    -
    90  uint32_t z;
    -
    91 
    -
    92  dim3(uint32_t _x=1, uint32_t _y=1, uint32_t _z=1) : x(_x), y(_y), z(_z) {};
    -
    93 } dim3;
    +
    85 typedef struct dim3 {
    +
    86  uint32_t x;
    +
    87  uint32_t y;
    +
    88  uint32_t z;
    +
    89 
    +
    90  dim3(uint32_t _x=1, uint32_t _y=1, uint32_t _z=1) : x(_x), y(_y), z(_z) {};
    +
    91 } dim3;
    +
    92 
    +
    93 
    94 
    -
    95 
    -
    96 
    - - - - - - -
    107 } ;
    -
    108 
    -
    109 
    -
    110 // Doxygen end group GlobalDefs
    -
    114 //-------------------------------------------------------------------------------------------------
    -
    115 
    -
    116 
    -
    117 // The handle allows the async commands to use the stream even if the parent hipStream_t goes out-of-scope.
    -
    118 typedef struct ihipStream_t * hipStream_t;
    -
    119 
    -
    120 
    -
    121 /*
    -
    122  * Opaque structure allows the true event (pointed at by the handle) to remain "live" even if the surrounding hipEvent_t goes out-of-scope.
    -
    123  * This is handy for cases where the hipEvent_t goes out-of-scope but the true event is being written by some async queue or device */
    -
    124 typedef struct hipEvent_t {
    -
    125  struct ihipEvent_t *_handle;
    -
    126 } hipEvent_t;
    + + + + + + +
    105 } ;
    +
    106 
    +
    107 
    +
    108 // Doxygen end group GlobalDefs
    +
    112 //-------------------------------------------------------------------------------------------------
    +
    113 
    +
    114 
    +
    115 // The handle allows the async commands to use the stream even if the parent hipStream_t goes out-of-scope.
    +
    116 typedef struct ihipStream_t * hipStream_t;
    +
    117 
    +
    118 
    +
    119 /*
    +
    120  * Opaque structure allows the true event (pointed at by the handle) to remain "live" even if the surrounding hipEvent_t goes out-of-scope.
    +
    121  * This is handy for cases where the hipEvent_t goes out-of-scope but the true event is being written by some async queue or device */
    +
    122 typedef struct hipEvent_t {
    +
    123  struct ihipEvent_t *_handle;
    +
    124 } hipEvent_t;
    +
    125 
    +
    126 
    127 
    128 
    129 
    130 
    -
    131 
    -
    132 
    -
    133 #ifdef __cplusplus
    -
    134 } /* extern "C" */
    -
    135 #endif
    -
    136 
    +
    131 #ifdef __cplusplus
    +
    132 } /* extern "C" */
    +
    133 #endif
    +
    134 
    +
    135 
    +
    136 
    137 
    -
    138 
    -
    139 
    -
    140 //==================================================================================================
    -
    141 #ifdef __cplusplus
    -
    142 extern "C" {
    -
    143 #endif
    -
    144 
    - +
    138 //==================================================================================================
    +
    139 #ifdef __cplusplus
    +
    140 extern "C" {
    +
    141 #endif
    +
    142 
    + +
    167 
    +
    168 
    169 
    -
    170 
    -
    171 
    - -
    183 
    -
    184 
    -
    209 hipError_t hipSetDevice(int device);
    -
    210 
    -
    211 
    -
    223 hipError_t hipGetDevice(int *device);
    -
    224 
    -
    225 
    -
    233 hipError_t hipGetDeviceCount(int *count);
    -
    234 
    -
    235 
    - -
    245 
    -
    246 
    -
    247 
    -
    248 //Cache partitioning functions:
    -
    249 
    - -
    257 
    -
    258 
    - -
    266 
    -
    267 
    - -
    275 
    -
    276 //---
    -
    277 //Shared bank config functions:
    -
    278 
    - -
    286 
    -
    287 
    - -
    295 
    -
    296 
    -
    297 // end doxygen Device
    - -
    318 
    -
    319 
    - -
    332 
    -
    333 
    -
    334 
    -
    343 const char *hipGetErrorName(hipError_t hip_error);
    -
    344 
    -
    345 
    -
    356 const char *hipGetErrorString(hipError_t hip_error);
    -
    357 
    -
    358 // end doxygen Error
    -
    388 hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags);
    -
    389 
    -
    390 
    -
    391 
    -
    401 static inline hipError_t hipStreamCreate(hipStream_t *stream)
    -
    402 {
    - -
    404 }
    -
    405 
    -
    406 
    -
    422 hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags);
    -
    423 
    -
    424 
    - -
    437 
    -
    438 
    - -
    453 
    -
    454 
    -
    468 hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags);
    -
    469 
    -
    470 
    -
    471 // end doxygen Stream
    -
    496 hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags);
    -
    497 
    -
    498 
    -
    505 static inline hipError_t hipEventCreate(hipEvent_t* event)
    -
    506 {
    -
    507  return hipEventCreateWithFlags(event, 0);
    -
    508 }
    -
    509 
    -
    510 
    -
    536 hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL);
    -
    537 
    -
    538 
    - -
    550 
    -
    551 
    - -
    566 
    -
    567 
    -
    592 hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop);
    -
    593 
    -
    594 
    - -
    608 
    -
    609 
    -
    610 // end doxygen Events
    -
    640 hipError_t hipMalloc(void** ptr, size_t size) ;
    -
    641 
    -
    642 
    -
    650 hipError_t hipMallocHost(void** ptr, size_t size) ;
    -
    651 
    -
    652 
    -
    653 
    -
    654 // TODO-doc (error codes)
    -
    662 hipError_t hipFree(void* ptr);
    -
    663 
    -
    664 
    -
    665 
    -
    666 // TODO-doc (error codes)
    -
    673 hipError_t hipFreeHost(void* ptr);
    -
    674 
    -
    675 
    + +
    181 
    +
    182 
    +
    207 hipError_t hipSetDevice(int device);
    +
    208 
    +
    209 
    +
    221 hipError_t hipGetDevice(int *device);
    +
    222 
    +
    223 
    +
    231 hipError_t hipGetDeviceCount(int *count);
    +
    232 
    +
    239 hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device);
    +
    240 
    + +
    250 
    +
    251 
    +
    252 
    +
    253 //Cache partitioning functions:
    +
    254 
    + +
    262 
    +
    263 
    + +
    271 
    +
    272 
    + +
    280 
    +
    281 //---
    +
    282 //Shared bank config functions:
    +
    283 
    + +
    291 
    +
    292 
    + +
    300 
    +
    301 
    +
    302 // end doxygen Device
    + +
    323 
    +
    324 
    + +
    337 
    +
    338 
    +
    339 
    +
    348 const char *hipGetErrorName(hipError_t hip_error);
    +
    349 
    +
    350 
    +
    361 const char *hipGetErrorString(hipError_t hip_error);
    +
    362 
    +
    363 // end doxygen Error
    +
    393 hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags);
    +
    394 
    +
    395 
    +
    396 
    +
    406 static inline hipError_t hipStreamCreate(hipStream_t *stream)
    +
    407 {
    + +
    409 }
    +
    410 
    +
    411 
    +
    427 hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags);
    +
    428 
    +
    429 
    + +
    442 
    +
    443 
    + +
    458 
    +
    459 
    +
    473 hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags);
    +
    474 
    +
    475 
    +
    476 // end doxygen Stream
    +
    501 hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags);
    +
    502 
    +
    503 
    +
    510 static inline hipError_t hipEventCreate(hipEvent_t* event)
    +
    511 {
    +
    512  return hipEventCreateWithFlags(event, 0);
    +
    513 }
    +
    514 
    +
    515 
    +
    541 hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL);
    +
    542 
    +
    543 
    + +
    555 
    +
    556 
    + +
    571 
    +
    572 
    +
    597 hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop);
    +
    598 
    +
    599 
    + +
    613 
    +
    614 
    +
    615 // end doxygen Events
    +
    645 hipError_t hipMalloc(void** ptr, size_t size) ;
    +
    646 
    +
    647 
    +
    655 hipError_t hipMallocHost(void** ptr, size_t size) ;
    +
    656 
    +
    657 
    +
    665 hipError_t hipFree(void* ptr);
    +
    666 
    +
    667 
    +
    668 
    +
    675 hipError_t hipFreeHost(void* ptr);
    676 
    677 
    -
    678 // TODO-doc (error codes)
    -
    694 hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind);
    +
    678 
    +
    693 hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind);
    +
    694 
    695 
    -
    696 
    -
    697 hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind);
    -
    698 
    -
    699 
    -
    700 
    -
    701 // TODO-doc (error codes)
    -
    712 hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0);
    -
    713 
    -
    714 
    -
    715 // TODO-doc
    -
    716 /*
    -
    717  * This function is host-asynchronous and may return before the memset operation completes.
    -
    718  * Same as hipMemsetAsync with null stream.
    -
    719  *
    -
    720  * */
    -
    721 hipError_t hipMemset(void* dst, int value, size_t sizeBytes );
    -
    722 hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t = 0 );
    -
    723 
    -
    724 
    -
    725 /*
    -
    726  * @brief Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
    -
    727  *
    -
    728  * Returns in *free a snapshot of the current free memory o
    -
    729  **/
    -
    730 hipError_t hipMemGetInfo (size_t * free, size_t * total) ;
    -
    731 
    -
    732 // doxygen end Memory
    -
    757 hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDevice );
    -
    758 
    -
    759 // TODO-DOC
    -
    760 hipError_t hipDeviceDisablePeerAccess ( int peerDevice );
    -
    761 
    -
    762 // TODO-DOC
    -
    763 hipError_t hipDeviceEnablePeerAccess ( int peerDevice, unsigned int flags );
    -
    764 
    -
    765 // TODO-DOC
    -
    766 hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes );
    -
    767 
    -
    768 // TODO-DOC
    -
    769 hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream=0 );
    -
    770 // doxygen end PeerToPeer
    -
    794 hipError_t hipDriverGetVersion(int *driverVersion) ;
    -
    795 
    -
    796 
    -
    797 
    -
    798 // doxygen end Version Management
    -
    825 #ifdef __cplusplus
    -
    826 } /* extern "c" */
    -
    827 #endif
    -
    828 
    -
    829 
    -
    847 #ifdef __HCC__
    -
    848 #include <hc.hpp>
    -
    852 hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc);
    -
    853 
    -
    857 hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av);
    -
    858 #endif
    -
    859 
    -
    860 
    -
    861 // end-group HCC_Specific
    -
    868 // doxygen end HIP API
    -
    hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags)
    Definition: hip_hcc.cpp:1575
    -
    hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice)
    Determine if a device can access a peer's memory.
    Definition: hip_hcc.cpp:1551
    +
    710 hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind);
    +
    711 
    +
    712 
    +
    724 hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0);
    +
    725 
    +
    726 
    +
    739 hipError_t hipMemset(void* dst, int value, size_t sizeBytes );
    +
    740 
    +
    741 
    +
    755 hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t = 0 );
    +
    756 
    +
    757 
    +
    758 /*
    +
    759  * @brief Query memory info.
    +
    760  * Return snapshot of free memory, and total allocatable memory on the device.
    +
    761  *
    +
    762  * Returns in *free a snapshot of the current free memory o
    +
    763  **/
    +
    764 hipError_t hipMemGetInfo (size_t * free, size_t * total) ;
    +
    765 
    +
    766 // doxygen end Memory
    +
    791 hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDevice );
    +
    792 
    +
    793 
    +
    794 
    +
    805 hipError_t hipDeviceDisablePeerAccess ( int peerDevice );
    +
    806 
    +
    816 hipError_t hipDeviceEnablePeerAccess ( int peerDevice, unsigned int flags );
    +
    817 
    +
    829 hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes );
    +
    830 
    +
    843 hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream=0 );
    +
    844 // doxygen end PeerToPeer
    +
    868 hipError_t hipDriverGetVersion(int *driverVersion) ;
    +
    869 
    +
    870 
    +
    871 
    +
    872 // doxygen end Version Management
    +
    899 #ifdef __cplusplus
    +
    900 } /* extern "c" */
    +
    901 #endif
    +
    902 
    +
    903 
    +
    921 #ifdef __HCC__
    +
    922 #include <hc.hpp>
    +
    926 hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc);
    +
    927 
    +
    931 hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av);
    +
    932 #endif
    +
    933 
    +
    934 
    +
    935 // end-group HCC_Specific
    +
    942 // doxygen end HIP API
    +
    hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags)
    Enables registering memory on peerDevice for direct access from the current device.
    Definition: hip_hcc.cpp:1658
    +
    hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice)
    Determine if a device can access a peer's memory.
    Definition: hip_hcc.cpp:1634
    hipError_t hipPeekAtLastError(void)
    Return last error returned by any HIP runtime API call.
    -
    hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av)
    Return hc::acclerator_view associated with the specified stream.
    Definition: hip_hcc.cpp:1647
    +
    hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av)
    Return hc::accelerator_view associated with the specified stream.
    Definition: hip_hcc.cpp:1730
    struct dim3 dim3
    -
    hipError_t hipFreeHost(void *ptr)
    Definition: hip_hcc.cpp:1529
    -
    hipError_t hipFuncSetCacheConfig(hipFuncCache config)
    Set Cache configuration for a specific function.
    Definition: hip_hcc.cpp:704
    -
    no preference for shared memory or L1 (default)
    Definition: hip_runtime_api.h:65
    -
    uint32_t x
    x
    Definition: hip_runtime_api.h:88
    -
    Host-to-Device Copy.
    Definition: hip_runtime_api.h:103
    -
    hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
    Get Shared memory bank configuration.
    Definition: hip_hcc.cpp:734
    -
    hipError_t hipSetDevice(int device)
    Set default device to be used for subsequent hip API calls from this thread.
    Definition: hip_hcc.cpp:747
    -
    Device-to-Host Copy.
    Definition: hip_runtime_api.h:104
    -
    hipError_t hipEventSynchronize(hipEvent_t event)
    : Wait for an event to complete.
    Definition: hip_hcc.cpp:1074
    -
    hipError_t hipDeviceGetProperties(hipDeviceProp_t *prop, int device)
    Returns device properties.
    Definition: hip_hcc.cpp:801
    -
    hipFuncCache
    Definition: hip_runtime_api.h:64
    -
    hipError_t hipEventQuery(hipEvent_t event)
    Query event status.
    Definition: hip_hcc.cpp:1169
    -
    hipError_t hipDeviceGetCacheConfig(hipFuncCache *cacheConfig)
    Set Cache configuration for a specific function.
    Definition: hip_hcc.cpp:690
    -
    hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream=NULL)
    Record an event in the specified stream.
    Definition: hip_hcc.cpp:1025
    -
    hipError_t hipGetDevice(int *device)
    Return the default device id for the calling host thread.
    Definition: hip_hcc.cpp:645
    -
    hipError_t hipEventDestroy(hipEvent_t event)
    Destroy the specified event.
    Definition: hip_hcc.cpp:1059
    -
    hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
    Create an asynchronous stream.
    Definition: hip_hcc.cpp:891
    -
    hipError_t hipDeviceDisablePeerAccess(int peerDevice)
    Definition: hip_hcc.cpp:1563
    -
    Definition: hip_runtime_api.h:87
    -
    uint32_t y
    y
    Definition: hip_runtime_api.h:89
    -
    prefer equal size L1 cache and shared memory
    Definition: hip_runtime_api.h:68
    -
    hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
    Create an event with the specified flags.
    Definition: hip_hcc.cpp:1002
    -
    hipError_t hipMallocHost(void **ptr, size_t size)
    Definition: hip_hcc.cpp:1289
    -
    hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
    Return the elapsed time between two events.
    Definition: hip_hcc.cpp:1124
    -
    hipError_t hipMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream=0)
    Definition: hip_hcc.cpp:1596
    -
    hipError_t hipGetDeviceCount(int *count)
    Return number of compute-capable devices.
    Definition: hip_hcc.cpp:658
    -
    hipError_t hipStreamDestroy(hipStream_t stream)
    Destroys the specified stream.
    Definition: hip_hcc.cpp:955
    -
    hipError_t hipStreamSynchronize(hipStream_t stream)
    Wait for all commands in stream to complete.
    Definition: hip_hcc.cpp:932
    +
    TODO-doc.
    +
    hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t=0)
    Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
    Definition: hip_hcc.cpp:1513
    +
    hipError_t hipFreeHost(void *ptr)
    Free memory allocated by the hcc hip host memory allocation API.
    Definition: hip_hcc.cpp:1612
    +
    hipError_t hipMemcpyToSymbol(const char *symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind)
    Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset...
    Definition: hip_hcc.cpp:1410
    +
    hipError_t hipFuncSetCacheConfig(hipFuncCache config)
    Set Cache configuration for a specific function.
    Definition: hip_hcc.cpp:721
    +
    no preference for shared memory or L1 (default)
    Definition: hip_runtime_api.h:63
    +
    uint32_t x
    x
    Definition: hip_runtime_api.h:86
    +
    Host-to-Device Copy.
    Definition: hip_runtime_api.h:101
    +
    hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
    Get Shared memory bank configuration.
    Definition: hip_hcc.cpp:751
    +
    hipError_t hipSetDevice(int device)
    Set default device to be used for subsequent hip API calls from this thread.
    Definition: hip_hcc.cpp:764
    +
    Device-to-Host Copy.
    Definition: hip_runtime_api.h:102
    +
    hipError_t hipEventSynchronize(hipEvent_t event)
    : Wait for an event to complete.
    Definition: hip_hcc.cpp:1157
    +
    hipError_t hipDeviceGetProperties(hipDeviceProp_t *prop, int device)
    Returns device properties.
    Definition: hip_hcc.cpp:884
    +
    hipFuncCache
    Definition: hip_runtime_api.h:62
    +
    hipError_t hipEventQuery(hipEvent_t event)
    Query event status.
    Definition: hip_hcc.cpp:1252
    +
    hipError_t hipDeviceGetCacheConfig(hipFuncCache *cacheConfig)
    Set Cache configuration for a specific function.
    Definition: hip_hcc.cpp:707
    +
    hipError_t hipMemcpyPeer(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes)
    Copies memory from one device to memory on another device.
    Definition: hip_hcc.cpp:1667
    +
    hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int device)
    Query device attribute.
    Definition: hip_hcc.cpp:812
    +
    hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream=NULL)
    Record an event in the specified stream.
    Definition: hip_hcc.cpp:1108
    +
    hipError_t hipGetDevice(int *device)
    Return the default device id for the calling host thread.
    Definition: hip_hcc.cpp:662
    +
    hipDeviceAttribute_t
    Definition: hip_runtime_api.h:134
    +
    hipError_t hipEventDestroy(hipEvent_t event)
    Destroy the specified event.
    Definition: hip_hcc.cpp:1142
    +
    hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
    Create an asynchronous stream.
    Definition: hip_hcc.cpp:974
    +
    hipError_t hipDeviceDisablePeerAccess(int peerDevice)
    Disables registering memory on peerDevice for direct access from the current device.
    Definition: hip_hcc.cpp:1646
    +
    Definition: hip_runtime_api.h:85
    +
    uint32_t y
    y
    Definition: hip_runtime_api.h:87
    +
    prefer equal size L1 cache and shared memory
    Definition: hip_runtime_api.h:66
    +
    hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
    Create an event with the specified flags.
    Definition: hip_hcc.cpp:1085
    +
    hipError_t hipMallocHost(void **ptr, size_t size)
    Allocate pinned host memory.
    Definition: hip_hcc.cpp:1372
    +
    hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
    Return the elapsed time between two events.
    Definition: hip_hcc.cpp:1207
    +
    hipError_t hipMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream=0)
    Copies memory from one device to memory on another device.
    Definition: hip_hcc.cpp:1679
    +
    hipError_t hipGetDeviceCount(int *count)
    Return number of compute-capable devices.
    Definition: hip_hcc.cpp:675
    +
    hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
    Copy data from src to dst asynchronously.
    Definition: hip_hcc.cpp:1558
    +
    hipError_t hipStreamDestroy(hipStream_t stream)
    Destroys the specified stream.
    Definition: hip_hcc.cpp:1038
    +
    hipError_t hipStreamSynchronize(hipStream_t stream)
    Wait for all commands in stream to complete.
    Definition: hip_hcc.cpp:1015
    Definition: hip_hcc.cpp:82
    -
    Shared mem is banked at 4-bytes intervals and performs best when adjacent threads access data 4 bytes...
    Definition: hip_runtime_api.h:77
    -
    hipError_t
    Definition: hip_runtime_api.h:112
    -
    hipMemcpyKind
    Definition: hip_runtime_api.h:101
    -
    prefer larger L1 cache and smaller shared memory
    Definition: hip_runtime_api.h:67
    -
    hipError_t hipDriverGetVersion(int *driverVersion)
    Returns the approximate HIP driver versin.
    Definition: hip_hcc.cpp:1608
    -
    hipError_t hipDeviceSynchronize(void)
    Blocks until the default device has completed all preceding requested tasks.
    Definition: hip_hcc.cpp:764
    -
    Definition: hip_runtime_api.h:124
    -
    hipError_t hipDeviceSetCacheConfig(hipFuncCache cacheConfig)
    Set L1/Shared cache partition.
    Definition: hip_hcc.cpp:676
    -
    hipError_t hipMalloc(void **ptr, size_t size)
    Definition: hip_hcc.cpp:1268
    -
    const char * hipGetErrorName(hipError_t hip_error)
    Return name of the specified error code in text form.
    Definition: hip_hcc.cpp:848
    -
    hipError_t hipGetLastError(void)
    Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
    Definition: hip_hcc.cpp:829
    -
    hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
    Make the specified compute stream wait for an event.
    Definition: hip_hcc.cpp:915
    -
    hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
    Return flags associated with this stream.
    Definition: hip_hcc.cpp:979
    -
    #define hipStreamDefault
    Flags that can be used with hipStreamCreateWithFlags.
    Definition: hip_runtime_api.h:50
    -
    hipError_t hipFree(void *ptr)
    Definition: hip_hcc.cpp:1513
    -
    uint32_t z
    z
    Definition: hip_runtime_api.h:90
    -
    hipError_t hipDeviceReset(void)
    Destroy all resources and reset all state on the default device in the current process.
    Definition: hip_hcc.cpp:779
    +
    Shared mem is banked at 4-bytes intervals and performs best when adjacent threads access data 4 bytes...
    Definition: hip_runtime_api.h:75
    +
    hipError_t
    Definition: hip_runtime_api.h:114
    +
    hipMemcpyKind
    Definition: hip_runtime_api.h:99
    +
    prefer larger L1 cache and smaller shared memory
    Definition: hip_runtime_api.h:65
    +
    hipError_t hipDriverGetVersion(int *driverVersion)
    Returns the approximate HIP driver version.
    Definition: hip_hcc.cpp:1691
    +
    hipError_t hipDeviceSynchronize(void)
    Blocks until the default device has completed all preceding requested tasks.
    Definition: hip_hcc.cpp:781
    +
    Definition: hip_runtime_api.h:122
    +
    hipError_t hipDeviceSetCacheConfig(hipFuncCache cacheConfig)
    Set L1/Shared cache partition.
    Definition: hip_hcc.cpp:693
    +
    hipError_t hipMalloc(void **ptr, size_t size)
    Allocate memory on the default accelerator.
    Definition: hip_hcc.cpp:1351
    +
    const char * hipGetErrorName(hipError_t hip_error)
    Return name of the specified error code in text form.
    Definition: hip_hcc.cpp:931
    +
    hipError_t hipGetLastError(void)
    Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
    Definition: hip_hcc.cpp:912
    +
    hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
    Make the specified compute stream wait for an event.
    Definition: hip_hcc.cpp:998
    +
    hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
    Return flags associated with this stream.
    Definition: hip_hcc.cpp:1062
    +
    #define hipStreamDefault
    Flags that can be used with hipStreamCreateWithFlags.
    Definition: hip_runtime_api.h:48
    +
    hipError_t hipFree(void *ptr)
    Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
    Definition: hip_hcc.cpp:1596
    +
    uint32_t z
    z
    Definition: hip_runtime_api.h:88
    +
    hipError_t hipDeviceReset(void)
    Destroy all resources and reset all state on the default device in the current process.
    Definition: hip_hcc.cpp:796
    Definition: hip_runtime_api.h:74
    -
    hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0)
    Definition: hip_hcc.cpp:1388
    -
    The compiler selects a device-specific value for the banking.
    Definition: hip_runtime_api.h:76
    -
    Device-to-Device Copy.
    Definition: hip_runtime_api.h:105
    +
    hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0)
    Copy data from src to dst asynchronously.
    Definition: hip_hcc.cpp:1471
    +
    The compiler selects a device-specific value for the banking.
    Definition: hip_runtime_api.h:74
    +
    Device-to-Device Copy.
    Definition: hip_runtime_api.h:103
    Definition: hip_hcc.cpp:107
    -
    Runtime will automatically determine copy-kind based on virtual addresses.
    Definition: hip_runtime_api.h:106
    -
    hipSharedMemConfig
    Definition: hip_runtime_api.h:75
    -
    hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
    Definition: hip_hcc.cpp:1346
    -
    hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
    Set Shared memory bank configuration.
    Definition: hip_hcc.cpp:719
    -
    prefer larger shared memory and smaller L1 cache
    Definition: hip_runtime_api.h:66
    -
    Host-to-Host Copy.
    Definition: hip_runtime_api.h:102
    -
    hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc)
    Return hc::acclerator associated with the specified deviceId.
    Definition: hip_hcc.cpp:1627
    -
    Shared mem is banked at 8-byte intervals and performs best when adjacent threads access data 4 bytes ...
    Definition: hip_runtime_api.h:78
    -
    const char * hipGetErrorString(hipError_t hip_error)
    Return handy text string message to explain the error which occurred.
    Definition: hip_hcc.cpp:875
    +
    Runtime will automatically determine copy-kind based on virtual addresses.
    Definition: hip_runtime_api.h:104
    +
    hipSharedMemConfig
    Definition: hip_runtime_api.h:73
    +
    hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
    Copy data from src to dst.
    Definition: hip_hcc.cpp:1429
    +
    hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
    Set Shared memory bank configuration.
    Definition: hip_hcc.cpp:736
    +
    prefer larger shared memory and smaller L1 cache
    Definition: hip_runtime_api.h:64
    +
    Host-to-Host Copy.
    Definition: hip_runtime_api.h:100
    +
    hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc)
    Return hc::accelerator associated with the specified deviceId.
    Definition: hip_hcc.cpp:1710
    +
    Shared mem is banked at 8-byte intervals and performs best when adjacent threads access data 4 bytes ...
    Definition: hip_runtime_api.h:76
    +
    const char * hipGetErrorString(hipError_t hip_error)
    Return handy text string message to explain the error which occurred.
    Definition: hip_hcc.cpp:958
    diff --git a/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__vector__types_8h.html b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__vector__types_8h.html new file mode 100644 index 0000000000..7e73a981b1 --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__vector__types_8h.html @@ -0,0 +1,452 @@ + + + + + + +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail/hip_vector_types.h File Reference + + + + + + + + + +
    +
    + + + + + + +
    +
    HIP: Heterogenous-computing Interface for Portability +
    +
    +
    + + + + + + + + + +
    + +
    + + +
    +
    + +
    +
    hip_vector_types.h File Reference
    +
    +
    + +

    Defines the different newt vector types for HIP runtime. +More...

    +
    #include <hc_short_vector.hpp>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + +

    +Macros

    +#define ONE_COMPONENT_ACCESS(T, VT)   inline VT make_ ##VT (T x) { VT t; t.x = x; return t; };
     
    +
     
    +#define TWO_COMPONENT_ACCESS(T, VT)   inline VT make_ ##VT (T x, T y) { VT t; t.x=x; t.y=y; return t; };
     
    +#define THREE_COMPONENT_ACCESS(T, VT)   inline VT make_ ##VT (T x, T y, T z) { VT t; t.x=x; t.y=y; t.z=z; return t; };
     
    +#define FOUR_COMPONENT_ACCESS(T, VT)   inline VT make_ ##VT (T x, T y, T z, T w) { VT t; t.x=x; t.y=y; t.z=z; t.w=w; return t; };
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Typedefs

    +typedef hc::short_vector::char1 char1
     
    +typedef hc::short_vector::char2 char2
     
    +typedef hc::short_vector::char3 char3
     
    +typedef hc::short_vector::char4 char4
     
    +typedef hc::short_vector::short1 short1
     
    +typedef hc::short_vector::short2 short2
     
    +typedef hc::short_vector::short3 short3
     
    +typedef hc::short_vector::short4 short4
     
    +typedef hc::short_vector::int1 int1
     
    +typedef hc::short_vector::int2 int2
     
    +typedef hc::short_vector::int3 int3
     
    +typedef hc::short_vector::int4 int4
     
    +typedef hc::short_vector::long1 long1
     
    +typedef hc::short_vector::long2 long2
     
    +typedef hc::short_vector::long3 long3
     
    +typedef hc::short_vector::long4 long4
     
    +typedef hc::short_vector::longlong1 longlong1
     
    +typedef hc::short_vector::longlong2 longlong2
     
    +typedef hc::short_vector::longlong3 longlong3
     
    +typedef hc::short_vector::longlong4 longlong4
     
    +typedef hc::short_vector::uchar1 uchar1
     
    +typedef hc::short_vector::uchar2 uchar2
     
    +typedef hc::short_vector::uchar3 uchar3
     
    +typedef hc::short_vector::uchar4 uchar4
     
    +typedef hc::short_vector::ushort1 ushort1
     
    +typedef hc::short_vector::ushort2 ushort2
     
    +typedef hc::short_vector::ushort3 ushort3
     
    +typedef hc::short_vector::ushort4 ushort4
     
    +typedef hc::short_vector::uint1 uint1
     
    +typedef hc::short_vector::uint2 uint2
     
    +typedef hc::short_vector::uint3 uint3
     
    +typedef hc::short_vector::uint4 uint4
     
    +typedef hc::short_vector::ulong1 ulong1
     
    +typedef hc::short_vector::ulong2 ulong2
     
    +typedef hc::short_vector::ulong3 ulong3
     
    +typedef hc::short_vector::ulong4 ulong4
     
    +typedef
    +hc::short_vector::ulonglong1 
    ulonglong1
     
    +typedef
    +hc::short_vector::ulonglong2 
    ulonglong2
     
    +typedef
    +hc::short_vector::ulonglong3 
    ulonglong3
     
    +typedef
    +hc::short_vector::ulonglong4 
    ulonglong4
     
    +typedef hc::short_vector::float1 float1
     
    +typedef hc::short_vector::float2 float2
     
    +typedef hc::short_vector::float3 float3
     
    +typedef hc::short_vector::float4 float4
     
    +typedef hc::short_vector::double1 double1
     
    +typedef hc::short_vector::double2 double2
     
    +typedef hc::short_vector::double3 double3
     
    +typedef hc::short_vector::double4 double4
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    ONE_COMPONENT_ACCESS (signed char, char1)
     
    TWO_COMPONENT_ACCESS (signed char, char2)
     
    THREE_COMPONENT_ACCESS (signed char, char3)
     
    FOUR_COMPONENT_ACCESS (signed char, char4)
     
    ONE_COMPONENT_ACCESS (short, short1)
     
    TWO_COMPONENT_ACCESS (short, short2)
     
    THREE_COMPONENT_ACCESS (short, short3)
     
    FOUR_COMPONENT_ACCESS (short, short4)
     
    ONE_COMPONENT_ACCESS (int, int1)
     
    TWO_COMPONENT_ACCESS (int, int2)
     
    THREE_COMPONENT_ACCESS (int, int3)
     
    FOUR_COMPONENT_ACCESS (int, int4)
     
    ONE_COMPONENT_ACCESS (long int, long1)
     
    TWO_COMPONENT_ACCESS (long int, long2)
     
    THREE_COMPONENT_ACCESS (long int, long3)
     
    FOUR_COMPONENT_ACCESS (long int, long4)
     
    ONE_COMPONENT_ACCESS (long long int, ulong1)
     
    TWO_COMPONENT_ACCESS (long long int, ulong2)
     
    THREE_COMPONENT_ACCESS (long long int, ulong3)
     
    FOUR_COMPONENT_ACCESS (long long int, ulong4)
     
    ONE_COMPONENT_ACCESS (long long int, longlong1)
     
    TWO_COMPONENT_ACCESS (long long int, longlong2)
     
    THREE_COMPONENT_ACCESS (long long int, longlong3)
     
    FOUR_COMPONENT_ACCESS (long long int, longlong4)
     
    ONE_COMPONENT_ACCESS (unsigned char, uchar1)
     
    TWO_COMPONENT_ACCESS (unsigned char, uchar2)
     
    THREE_COMPONENT_ACCESS (unsigned char, uchar3)
     
    FOUR_COMPONENT_ACCESS (unsigned char, uchar4)
     
    ONE_COMPONENT_ACCESS (unsigned short, ushort1)
     
    TWO_COMPONENT_ACCESS (unsigned short, ushort2)
     
    THREE_COMPONENT_ACCESS (unsigned short, ushort3)
     
    FOUR_COMPONENT_ACCESS (unsigned short, ushort4)
     
    ONE_COMPONENT_ACCESS (unsigned int, uint1)
     
    TWO_COMPONENT_ACCESS (unsigned int, uint2)
     
    THREE_COMPONENT_ACCESS (unsigned int, uint3)
     
    FOUR_COMPONENT_ACCESS (unsigned int, uint4)
     
    ONE_COMPONENT_ACCESS (unsigned long int, ulong1)
     
    TWO_COMPONENT_ACCESS (unsigned long int, ulong2)
     
    THREE_COMPONENT_ACCESS (unsigned long int, ulong3)
     
    FOUR_COMPONENT_ACCESS (unsigned long int, ulong4)
     
    ONE_COMPONENT_ACCESS (unsigned long long int, ulong1)
     
    TWO_COMPONENT_ACCESS (unsigned long long int, ulong2)
     
    THREE_COMPONENT_ACCESS (unsigned long long int, ulong3)
     
    FOUR_COMPONENT_ACCESS (unsigned long long int, ulong4)
     
    ONE_COMPONENT_ACCESS (unsigned long long int, ulonglong1)
     
    TWO_COMPONENT_ACCESS (unsigned long long int, ulonglong2)
     
    THREE_COMPONENT_ACCESS (unsigned long long int, ulonglong3)
     
    FOUR_COMPONENT_ACCESS (unsigned long long int, ulonglong4)
     
    ONE_COMPONENT_ACCESS (float, float1)
     
    TWO_COMPONENT_ACCESS (float, float2)
     
    THREE_COMPONENT_ACCESS (float, float3)
     
    FOUR_COMPONENT_ACCESS (float, float4)
     
    ONE_COMPONENT_ACCESS (double, double1)
     
    TWO_COMPONENT_ACCESS (double, double2)
     
    THREE_COMPONENT_ACCESS (double, double3)
     
    FOUR_COMPONENT_ACCESS (double, double4)
     
    +

    Detailed Description

    +

    Defines the different newt vector types for HIP runtime.

    +
    + + + + diff --git a/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__vector__types_8h_source.html b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__vector__types_8h_source.html index 73911b9082..9bd0f090ab 100644 --- a/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__vector__types_8h_source.html +++ b/projects/hip/docs/RuntimeAPI/html/hcc__detail_2hip__vector__types_8h_source.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hcc_detail/hip_vector_types.h Source File +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail/hip_vector_types.h Source File @@ -89,7 +89,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    hip_vector_types.h
    -
    1 /*
    +Go to the documentation of this file.
    1 /*
    2 Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
    3 
    4 Permission is hereby granted, free of charge, to any person obtaining a copy
    @@ -110,147 +110,177 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    20 THE SOFTWARE.
    21 */
    -
    22 #if defined (__HCC__) && (__hcc_workweek__ < 16032)
    -
    23 #error("This version of HIP requires a newer version of HCC.");
    -
    24 #endif
    -
    25 
    -
    26 #include <hc_short_vector.hpp>
    -
    27 
    -
    28 // Define char vector types
    -
    29 typedef hc::short_vector::char1 char1;
    -
    30 typedef hc::short_vector::char2 char2;
    -
    31 typedef hc::short_vector::char3 char3;
    -
    32 typedef hc::short_vector::char4 char4;
    +
    22 
    +
    28 #if defined (__HCC__) && (__hcc_workweek__ < 16032)
    +
    29 #error("This version of HIP requires a newer version of HCC.");
    +
    30 #endif
    +
    31 
    +
    32 #include <hc_short_vector.hpp>
    33 
    -
    34 // Define uchar vector types
    -
    35 typedef hc::short_vector::uchar1 uchar1;
    -
    36 typedef hc::short_vector::uchar2 uchar2;
    -
    37 typedef hc::short_vector::uchar3 uchar3;
    -
    38 typedef hc::short_vector::uchar4 uchar4;
    -
    39 
    -
    40 // Define short vector types
    -
    41 typedef hc::short_vector::short1 short1;
    -
    42 typedef hc::short_vector::short2 short2;
    -
    43 typedef hc::short_vector::short3 short3;
    -
    44 typedef hc::short_vector::short4 short4;
    -
    45 
    -
    46 // Define ushort vector types
    -
    47 typedef hc::short_vector::ushort1 ushort1;
    -
    48 typedef hc::short_vector::ushort2 ushort2;
    -
    49 typedef hc::short_vector::ushort3 ushort3;
    -
    50 typedef hc::short_vector::ushort4 ushort4;
    -
    51 
    -
    52 // Define int vector types
    -
    53 typedef hc::short_vector::int1 int1;
    -
    54 typedef hc::short_vector::int2 int2;
    -
    55 typedef hc::short_vector::int3 int3;
    -
    56 typedef hc::short_vector::int4 int4;
    -
    57 
    -
    58 // Define uint vector types
    -
    59 typedef hc::short_vector::uint1 uint1;
    -
    60 typedef hc::short_vector::uint2 uint2;
    -
    61 typedef hc::short_vector::uint3 uint3;
    -
    62 typedef hc::short_vector::uint4 uint4;
    -
    63 
    -
    64 // Define long vector types
    -
    65 typedef hc::short_vector::long1 long1;
    -
    66 typedef hc::short_vector::long2 long2;
    -
    67 typedef hc::short_vector::long3 long3;
    -
    68 typedef hc::short_vector::long4 long4;
    -
    69 
    -
    70 // Define ulong vector types
    -
    71 typedef hc::short_vector::ulong1 ulong1;
    -
    72 typedef hc::short_vector::ulong2 ulong2;
    -
    73 typedef hc::short_vector::ulong3 ulong3;
    -
    74 typedef hc::short_vector::ulong4 ulong4;
    -
    75 
    -
    76 // Define longlong vector types
    -
    77 typedef hc::short_vector::longlong1 longlong1;
    -
    78 typedef hc::short_vector::longlong2 longlong2;
    -
    79 typedef hc::short_vector::longlong3 longlong3;
    -
    80 typedef hc::short_vector::longlong4 longlong4;
    -
    81 
    -
    82 // Define ulonglong vector types
    -
    83 typedef hc::short_vector::ulonglong1 ulonglong1;
    -
    84 typedef hc::short_vector::ulonglong2 ulonglong2;
    -
    85 typedef hc::short_vector::ulonglong3 ulonglong3;
    -
    86 typedef hc::short_vector::ulonglong4 ulonglong4;
    -
    87 
    -
    88 // Define float vector types
    -
    89 typedef hc::short_vector::float1 float1;
    -
    90 typedef hc::short_vector::float2 float2;
    -
    91 typedef hc::short_vector::float3 float3;
    -
    92 typedef hc::short_vector::float4 float4;
    -
    93 
    -
    94 // Define double vector types
    -
    95 typedef hc::short_vector::double1 double1;
    -
    96 typedef hc::short_vector::double2 double2;
    -
    97 typedef hc::short_vector::double3 double3;
    -
    98 typedef hc::short_vector::double4 double4;
    -
    99 
    -
    100 // Inline functions for creating vector types from basic types
    -
    101 inline char1 make_char1(signed char x) { char1 t; t.x = x; return t; };
    -
    102 inline char2 make_char2(signed char x, signed char y) { char2 t; t.x = x; t.y = y; return t; };
    -
    103 inline char3 make_char3(signed char x, signed char y, signed char z) { char3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    104 inline char4 make_char4(signed char x, signed char y, signed char z, signed char w) { char4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    -
    105 
    -
    106 inline uchar1 make_uchar1(unsigned char x) { uchar1 t; t.x = x; return t; };
    -
    107 inline uchar2 make_uchar2(unsigned char x, unsigned char y) { uchar2 t; t.x = x; t.y = y; return t; };
    -
    108 inline uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z) { uchar3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    109 inline uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w) { uchar4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    +
    34 //-- Signed
    +
    35 // Define char vector types
    +
    36 typedef hc::short_vector::char1 char1;
    +
    37 typedef hc::short_vector::char2 char2;
    +
    38 typedef hc::short_vector::char3 char3;
    +
    39 typedef hc::short_vector::char4 char4;
    +
    40 
    +
    41 // Define short vector types
    +
    42 typedef hc::short_vector::short1 short1;
    +
    43 typedef hc::short_vector::short2 short2;
    +
    44 typedef hc::short_vector::short3 short3;
    +
    45 typedef hc::short_vector::short4 short4;
    +
    46 
    +
    47 // Define int vector types
    +
    48 typedef hc::short_vector::int1 int1;
    +
    49 typedef hc::short_vector::int2 int2;
    +
    50 typedef hc::short_vector::int3 int3;
    +
    51 typedef hc::short_vector::int4 int4;
    +
    52 
    +
    53 // Define long vector types
    +
    54 typedef hc::short_vector::long1 long1;
    +
    55 typedef hc::short_vector::long2 long2;
    +
    56 typedef hc::short_vector::long3 long3;
    +
    57 typedef hc::short_vector::long4 long4;
    +
    58 
    +
    59 // Define longlong vector types
    +
    60 typedef hc::short_vector::longlong1 longlong1;
    +
    61 typedef hc::short_vector::longlong2 longlong2;
    +
    62 typedef hc::short_vector::longlong3 longlong3;
    +
    63 typedef hc::short_vector::longlong4 longlong4;
    +
    64 
    +
    65 
    +
    66 //-- Unsigned
    +
    67 // Define uchar vector types
    +
    68 typedef hc::short_vector::uchar1 uchar1;
    +
    69 typedef hc::short_vector::uchar2 uchar2;
    +
    70 typedef hc::short_vector::uchar3 uchar3;
    +
    71 typedef hc::short_vector::uchar4 uchar4;
    +
    72 
    +
    73 // Define ushort vector types
    +
    74 typedef hc::short_vector::ushort1 ushort1;
    +
    75 typedef hc::short_vector::ushort2 ushort2;
    +
    76 typedef hc::short_vector::ushort3 ushort3;
    +
    77 typedef hc::short_vector::ushort4 ushort4;
    +
    78 
    +
    79 // Define uint vector types
    +
    80 typedef hc::short_vector::uint1 uint1;
    +
    81 typedef hc::short_vector::uint2 uint2;
    +
    82 typedef hc::short_vector::uint3 uint3;
    +
    83 typedef hc::short_vector::uint4 uint4;
    +
    84 
    +
    85 // Define ulong vector types
    +
    86 typedef hc::short_vector::ulong1 ulong1;
    +
    87 typedef hc::short_vector::ulong2 ulong2;
    +
    88 typedef hc::short_vector::ulong3 ulong3;
    +
    89 typedef hc::short_vector::ulong4 ulong4;
    +
    90 
    +
    91 // Define ulonglong vector types
    +
    92 typedef hc::short_vector::ulonglong1 ulonglong1;
    +
    93 typedef hc::short_vector::ulonglong2 ulonglong2;
    +
    94 typedef hc::short_vector::ulonglong3 ulonglong3;
    +
    95 typedef hc::short_vector::ulonglong4 ulonglong4;
    +
    96 
    +
    97 
    +
    98 //-- Floating point
    +
    99 // Define float vector types
    +
    100 typedef hc::short_vector::float1 float1;
    +
    101 typedef hc::short_vector::float2 float2;
    +
    102 typedef hc::short_vector::float3 float3;
    +
    103 typedef hc::short_vector::float4 float4;
    +
    104 
    +
    105 // Define double vector types
    +
    106 typedef hc::short_vector::double1 double1;
    +
    107 typedef hc::short_vector::double2 double2;
    +
    108 typedef hc::short_vector::double3 double3;
    +
    109 typedef hc::short_vector::double4 double4;
    110 
    -
    111 inline short1 make_short1(short x) { short1 t; t.x = x; return t; };
    -
    112 inline short2 make_short2(short x, short y) { short2 t; t.x = x; t.y = y; return t; };
    -
    113 inline short3 make_short3(short x,short y, short z) { short3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    114 inline short4 make_short4(short x, short y, short z, short w) { short4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    -
    115 
    -
    116 inline ushort1 make_ushort1(unsigned short x) { ushort1 t; t.x = x; return t; };
    -
    117 inline ushort2 make_ushort2(unsigned short x, unsigned short y) { ushort2 t; t.x = x; t.y = y; return t; };
    -
    118 inline ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z) { ushort3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    119 inline ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w) { ushort4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    -
    120 
    -
    121 inline int1 make_int1(int x) { int1 t; t.x = x; return t; };
    -
    122 inline int2 make_int2(int x, int y) { int2 t; t.x = x; t.y = y; return t; };
    -
    123 inline int3 make_int3(int x, int y, int z) { int3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    124 inline int4 make_int4(int x, int y, int z, int w) { int4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    -
    125 inline uint1 make_uint1(unsigned int x) { uint1 t; t.x = x; return t; };
    -
    126 inline uint2 make_uint2(unsigned int x, unsigned int y) { uint2 t; t.x = x; t.y = y; return t; };
    -
    127 inline uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z) { uint3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    128 inline uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w) { uint4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    -
    129 
    -
    130 inline long1 make_long1(long int x) { long1 t; t.x = x; return t; };
    -
    131 inline long2 make_long2(long int x, long int y) { long2 t; t.x = x; t.y = y; return t; };
    -
    132 inline long3 make_long3(long int x, long int y, long int z) { long3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    133 inline long4 make_long4(long int x, long int y, long int z, long int w) { long4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    -
    134 
    -
    135 inline ulong1 make_ulong1(unsigned long int x) { ulong1 t; t.x = x; return t; };
    -
    136 inline ulong2 make_ulong2(unsigned long int x, unsigned long int y) { ulong2 t; t.x = x; t.y = y; return t; };
    -
    137 inline ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z) { ulong3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    138 inline ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w) { ulong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    -
    139 
    -
    140 inline longlong1 make_longlong1(long long int x) { longlong1 t; t.x = x; return t; };
    -
    141 inline longlong2 make_longlong2(long long int x, long long int y) { longlong2 t; t.x = x; t.y = y; return t; };
    -
    142 inline longlong3 make_longlong3(long long int x, long long int y, long long int z) { longlong3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    143 inline longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w) { longlong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    -
    144 
    -
    145 inline ulonglong1 make_ulonglong1(unsigned long long int x) { ulonglong1 t; t.x = x; return t; };
    -
    146 inline ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y) { ulonglong2 t; t.x = x; t.y = y; return t; };
    -
    147 inline ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z) { ulonglong3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    148 inline ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w) { ulonglong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    -
    149 
    -
    150 inline float1 make_float1(float x) { float1 t; t.x = x; return t; };
    -
    151 inline float2 make_float2(float x, float y) { float2 t; t.x = x; t.y = y; return t; };
    -
    152 inline float3 make_float3(float x, float y, float z) { float3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    153 inline float4 make_float4(float x, float y, float z, float w) { float4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    -
    154 
    -
    155 inline double1 make_double1(double x) { double1 t; t.x = x; return t; };
    -
    156 inline double2 make_double2(double x, double y) { double2 t; t.x = x; t.y = y; return t; };
    -
    157 inline double3 make_double3(double x, double y, double z) { double3 t; t.x = x; t.y = y; t.z = z; return t; };
    -
    158 inline double4 make_double4(double x, double y, double z, double w) { double4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t; };
    +
    111 
    +
    113 // Inline functions for creating vector types from basic types
    +
    114 #define ONE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT (T x) { VT t; t.x = x; return t; };
    +
    115 #define TWO_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT (T x, T y) { VT t; t.x=x; t.y=y; return t; };
    +
    116 #define THREE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT (T x, T y, T z) { VT t; t.x=x; t.y=y; t.z=z; return t; };
    +
    117 #define FOUR_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT (T x, T y, T z, T w) { VT t; t.x=x; t.y=y; t.z=z; t.w=w; return t; };
    +
    118 
    +
    119 
    +
    120 //signed:
    +
    121 ONE_COMPONENT_ACCESS (signed char, char1);
    +
    122 TWO_COMPONENT_ACCESS (signed char, char2);
    +
    123 THREE_COMPONENT_ACCESS(signed char, char3);
    +
    124 FOUR_COMPONENT_ACCESS (signed char, char4);
    +
    125 
    +
    126 ONE_COMPONENT_ACCESS (short, short1);
    +
    127 TWO_COMPONENT_ACCESS (short, short2);
    +
    128 THREE_COMPONENT_ACCESS(short, short3);
    +
    129 FOUR_COMPONENT_ACCESS (short, short4);
    +
    130 
    +
    131 ONE_COMPONENT_ACCESS (int, int1);
    +
    132 TWO_COMPONENT_ACCESS (int, int2);
    +
    133 THREE_COMPONENT_ACCESS(int, int3);
    +
    134 FOUR_COMPONENT_ACCESS (int, int4);
    +
    135 
    +
    136 ONE_COMPONENT_ACCESS (long int, long1);
    +
    137 TWO_COMPONENT_ACCESS (long int, long2);
    +
    138 THREE_COMPONENT_ACCESS(long int, long3);
    +
    139 FOUR_COMPONENT_ACCESS (long int, long4);
    +
    140 
    +
    141 ONE_COMPONENT_ACCESS (long long int, ulong1);
    +
    142 TWO_COMPONENT_ACCESS (long long int, ulong2);
    +
    143 THREE_COMPONENT_ACCESS(long long int, ulong3);
    +
    144 FOUR_COMPONENT_ACCESS (long long int, ulong4);
    +
    145 
    +
    146 ONE_COMPONENT_ACCESS (long long int, longlong1);
    +
    147 TWO_COMPONENT_ACCESS (long long int, longlong2);
    +
    148 THREE_COMPONENT_ACCESS(long long int, longlong3);
    +
    149 FOUR_COMPONENT_ACCESS (long long int, longlong4);
    +
    150 
    +
    151 
    +
    152 // unsigned:
    +
    153 ONE_COMPONENT_ACCESS (unsigned char, uchar1);
    +
    154 TWO_COMPONENT_ACCESS (unsigned char, uchar2);
    +
    155 THREE_COMPONENT_ACCESS(unsigned char, uchar3);
    +
    156 FOUR_COMPONENT_ACCESS (unsigned char, uchar4);
    +
    157 
    +
    158 ONE_COMPONENT_ACCESS (unsigned short, ushort1);
    +
    159 TWO_COMPONENT_ACCESS (unsigned short, ushort2);
    +
    160 THREE_COMPONENT_ACCESS(unsigned short, ushort3);
    +
    161 FOUR_COMPONENT_ACCESS (unsigned short, ushort4);
    +
    162 
    +
    163 ONE_COMPONENT_ACCESS (unsigned int, uint1);
    +
    164 TWO_COMPONENT_ACCESS (unsigned int, uint2);
    +
    165 THREE_COMPONENT_ACCESS(unsigned int, uint3);
    +
    166 FOUR_COMPONENT_ACCESS (unsigned int, uint4);
    +
    167 
    +
    168 ONE_COMPONENT_ACCESS (unsigned long int, ulong1);
    +
    169 TWO_COMPONENT_ACCESS (unsigned long int, ulong2);
    +
    170 THREE_COMPONENT_ACCESS(unsigned long int, ulong3);
    +
    171 FOUR_COMPONENT_ACCESS (unsigned long int, ulong4);
    +
    172 
    +
    173 ONE_COMPONENT_ACCESS (unsigned long long int, ulong1);
    +
    174 TWO_COMPONENT_ACCESS (unsigned long long int, ulong2);
    +
    175 THREE_COMPONENT_ACCESS(unsigned long long int, ulong3);
    +
    176 FOUR_COMPONENT_ACCESS (unsigned long long int, ulong4);
    +
    177 
    +
    178 ONE_COMPONENT_ACCESS (unsigned long long int, ulonglong1);
    +
    179 TWO_COMPONENT_ACCESS (unsigned long long int, ulonglong2);
    +
    180 THREE_COMPONENT_ACCESS(unsigned long long int, ulonglong3);
    +
    181 FOUR_COMPONENT_ACCESS (unsigned long long int, ulonglong4);
    +
    182 
    +
    183 
    +
    184 //Floating point
    +
    185 ONE_COMPONENT_ACCESS (float, float1);
    +
    186 TWO_COMPONENT_ACCESS (float, float2);
    +
    187 THREE_COMPONENT_ACCESS(float, float3);
    +
    188 FOUR_COMPONENT_ACCESS (float, float4);
    +
    189 
    +
    190 ONE_COMPONENT_ACCESS (double, double1);
    +
    191 TWO_COMPONENT_ACCESS (double, double2);
    +
    192 THREE_COMPONENT_ACCESS(double, double3);
    +
    193 FOUR_COMPONENT_ACCESS (double, double4);
    +
    #define ONE_COMPONENT_ACCESS(T, VT)
    Definition: hip_vector_types.h:114
    diff --git a/projects/hip/docs/RuntimeAPI/html/hierarchy.html b/projects/hip/docs/RuntimeAPI/html/hierarchy.html index f10211f59f..801ddcd197 100644 --- a/projects/hip/docs/RuntimeAPI/html/hierarchy.html +++ b/projects/hip/docs/RuntimeAPI/html/hierarchy.html @@ -104,7 +104,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    diff --git a/projects/hip/docs/RuntimeAPI/html/hip__common_8h_source.html b/projects/hip/docs/RuntimeAPI/html/hip__common_8h_source.html index aa62aa5504..5a5a586bdf 100644 --- a/projects/hip/docs/RuntimeAPI/html/hip__common_8h_source.html +++ b/projects/hip/docs/RuntimeAPI/html/hip__common_8h_source.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hip_common.h Source File +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hip_common.h Source File @@ -123,61 +123,65 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    32 #define __HIP_PLATFORM_HCC__
    33 #define __HIPCC__
    34 
    -
    35 # if defined __HCC_ACCELERATOR__
    -
    36 # define __HIP_DEVICE_COMPILE__ 1
    -
    37 # endif
    -
    38 #endif
    -
    39 
    -
    40 // Auto enable __HIP_PLATFORM_NVCC__ if compiling with NVCC
    -
    41 #if defined(__NVCC__)
    -
    42 #define __HIP_PLATFORM_NVCC__
    -
    43 # ifdef __CUDACC__
    -
    44 # define __HIPCC__
    -
    45 # endif
    -
    46 
    -
    47 # ifdef __CUDA_ARCH__
    -
    48 # define __HIP_DEVICE_COMPILE__ 1
    -
    49 # endif
    -
    50 
    -
    51 #endif
    -
    52 
    -
    53 
    -
    54 
    -
    55 
    -
    56 #if __HIP_DEVICE_COMPILE__ == 0
    -
    57 // 32-bit Atomics
    -
    58 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0)
    -
    59 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0)
    -
    60 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0)
    -
    61 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0)
    -
    62 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
    -
    63 
    -
    64 // 64-bit Atomics
    -
    65 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0)
    -
    66 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
    +
    35 #if defined(__HCC_ACCELERATOR__) and (__HCC_ACCELERATOR__ != 0)
    +
    36 #define __HIP_DEVICE_COMPILE__ 1
    +
    37 #else
    +
    38 #define __HIP_DEVICE_COMPILE__ 0
    +
    39 #endif
    +
    40 #endif
    +
    41 
    +
    42 // Auto enable __HIP_PLATFORM_NVCC__ if compiling with NVCC
    +
    43 #if defined(__NVCC__)
    +
    44 #define __HIP_PLATFORM_NVCC__
    +
    45 # ifdef __CUDACC__
    +
    46 # define __HIPCC__
    +
    47 # endif
    +
    48 
    +
    49 #if defined(__CUDA_ARCH__) and (__CUDA_ARCH__ != 0)
    +
    50 #define __HIP_DEVICE_COMPILE__ 1
    +
    51 #else
    +
    52 #define __HIP_DEVICE_COMPILE__ 0
    +
    53 #endif
    +
    54 
    +
    55 #endif
    +
    56 
    +
    57 
    +
    58 
    +
    59 
    +
    60 #if __HIP_DEVICE_COMPILE__ == 0
    +
    61 // 32-bit Atomics
    +
    62 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0)
    +
    63 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0)
    +
    64 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0)
    +
    65 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0)
    +
    66 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
    67 
    -
    68 // Doubles
    -
    69 #define __HIP_ARCH_HAS_DOUBLES__ (0)
    -
    70 
    -
    71 // Warp cross-lane operations
    -
    72 #define __HIP_ARCH_HAS_WARP_VOTE__ (0)
    -
    73 #define __HIP_ARCH_HAS_WARP_BALLOT__ (0)
    -
    74 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0)
    -
    75 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
    -
    76 
    -
    77 // Sync
    -
    78 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0)
    -
    79 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
    +
    68 // 64-bit Atomics
    +
    69 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0)
    +
    70 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
    +
    71 
    +
    72 // Doubles
    +
    73 #define __HIP_ARCH_HAS_DOUBLES__ (0)
    +
    74 
    +
    75 // Warp cross-lane operations
    +
    76 #define __HIP_ARCH_HAS_WARP_VOTE__ (0)
    +
    77 #define __HIP_ARCH_HAS_WARP_BALLOT__ (0)
    +
    78 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0)
    +
    79 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
    80 
    -
    81 // Misc
    -
    82 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
    -
    83 #define __HIP_ARCH_HAS_3DGRID__ (0)
    -
    84 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
    -
    85 #endif
    +
    81 // Sync
    +
    82 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0)
    +
    83 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
    +
    84 
    +
    85 // Misc
    +
    86 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
    +
    87 #define __HIP_ARCH_HAS_3DGRID__ (0)
    +
    88 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
    +
    89 #endif
    diff --git a/projects/hip/docs/RuntimeAPI/html/hip__hcc_8cpp.html b/projects/hip/docs/RuntimeAPI/html/hip__hcc_8cpp.html index 733d1a79a7..adfc13a1df 100644 --- a/projects/hip/docs/RuntimeAPI/html/hip__hcc_8cpp.html +++ b/projects/hip/docs/RuntimeAPI/html/hip__hcc_8cpp.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/src/hip_hcc.cpp File Reference +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/src/hip_hcc.cpp File Reference @@ -223,6 +223,9 @@ hc::accelerator_view * ihi hipError_t hipDeviceReset (void)  Destroy all resources and reset all state on the default device in the current process. More...
      +hipError_t hipDeviceGetAttribute (int *pi, hipDeviceAttribute_t attr, int device) + Query device attribute. More...
    hipError_t hipDeviceGetProperties (hipDeviceProp_t *props, int device)  Returns device properties. More...
      @@ -283,49 +286,58 @@ template<typename T > hc::completion_future ihipMemsetKernel (hipStream_t stream, T *ptr, T val, size_t sizeBytes)   hipError_t hipMalloc (void **ptr, size_t sizeBytes) + Allocate memory on the default accelerator. More...
      hipError_t hipMallocHost (void **ptr, size_t sizeBytes) + Allocate pinned host memory. More...
      - -hipError_t hipMemcpyToSymbol (const char *symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind) +hipError_t hipMemcpyToSymbol (const char *symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind) + Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset bytes from the start of symbol symbol. More...
      hipError_t hipMemcpy (void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind) + Copy data from src to dst. More...
      hipError_t hipMemcpyAsync (void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) + Copy data from src to dst asynchronously. More...
      - -hipError_t hipMemsetAsync (void *dst, int value, size_t sizeBytes, hipStream_t stream) +hipError_t hipMemsetAsync (void *dst, int value, size_t sizeBytes, hipStream_t stream) + Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value value. More...
      - -hipError_t hipMemset (void *dst, int value, size_t sizeBytes) +hipError_t hipMemset (void *dst, int value, size_t sizeBytes) + Copy data from src to dst asynchronously. More...
      hipError_t hipMemGetInfo (size_t *free, size_t *total)   hipError_t hipFree (void *ptr) + Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSynchronize() call. More...
      hipError_t hipFreeHost (void *ptr) + Free memory allocated by the hcc hip host memory allocation API. More...
      hipError_t hipDeviceCanAccessPeer (int *canAccessPeer, int device, int peerDevice)  Determine if a device can access a peer's memory. More...
      hipError_t hipDeviceDisablePeerAccess (int peerDevice) + Disables registering memory on peerDevice for direct access from the current device. More...
      hipError_t hipDeviceEnablePeerAccess (int peerDevice, unsigned int flags) + Enables registering memory on peerDevice for direct access from the current device. More...
      - -hipError_t hipMemcpyPeer (void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes) +hipError_t hipMemcpyPeer (void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes) + Copies memory from one device to memory on another device. More...
      hipError_t hipMemcpyPeerAsync (void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream) + Copies memory from one device to memory on another device. More...
      hipError_t hipDriverGetVersion (int *driverVersion) - Returns the approximate HIP driver versin. More...
    + Returns the approximate HIP driver version. More...
      hipError_t hipHccGetAccelerator (int deviceId, hc::accelerator *acc) - Return hc::acclerator associated with the specified deviceId. More...
    + Return hc::accelerator associated with the specified deviceId. More...
      hipError_t hipHccGetAcceleratorView (hipStream_t stream, hc::accelerator_view **av) - Return hc::acclerator_view associated with the specified stream. More...
    + Return hc::accelerator_view associated with the specified stream. More...
     

    @@ -466,7 +478,7 @@ std::vector< ihipDevice_t & diff --git a/projects/hip/docs/RuntimeAPI/html/hip__runtime_8h_source.html b/projects/hip/docs/RuntimeAPI/html/hip__runtime_8h_source.html index d79618630c..40c09acfeb 100644 --- a/projects/hip/docs/RuntimeAPI/html/hip__runtime_8h_source.html +++ b/projects/hip/docs/RuntimeAPI/html/hip__runtime_8h_source.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hip_runtime.h Source File +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hip_runtime.h Source File @@ -135,14 +135,16 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    54 #endif
    55 
    56 
    -
    57 #include <hip_runtime_api.h>
    -
    58 #include <hip_vector_types.h>
    +
    57 #include <hip_runtime_api.h>
    +
    58 #include <hip_vector_types.h>
    59 
    - + +
    Contains definitions of APIs for HIP runtime.
    + diff --git a/projects/hip/docs/RuntimeAPI/html/hip__runtime__api_8h_source.html b/projects/hip/docs/RuntimeAPI/html/hip__runtime__api_8h_source.html index 66d3cc5cfa..4ef99226aa 100644 --- a/projects/hip/docs/RuntimeAPI/html/hip__runtime__api_8h_source.html +++ b/projects/hip/docs/RuntimeAPI/html/hip__runtime__api_8h_source.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hip_runtime_api.h Source File +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hip_runtime_api.h Source File @@ -117,21 +117,21 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    33 #include <hip_common.h>
    34 
    35 typedef struct {
    -
    36  // 32-bit Atomics:
    +
    36  // 32-bit Atomics
    37  unsigned hasGlobalInt32Atomics : 1;
    38  unsigned hasGlobalFloatAtomicExch : 1;
    39  unsigned hasSharedInt32Atomics : 1;
    40  unsigned hasSharedFloatAtomicExch : 1;
    41  unsigned hasFloatAtomicAdd : 1;
    42 
    -
    43  // 64-bit Atomics:
    +
    43  // 64-bit Atomics
    44  unsigned hasGlobalInt64Atomics : 1;
    45  unsigned hasSharedInt64Atomics : 1;
    46 
    47  // Doubles
    48  unsigned hasDoubles : 1;
    49 
    -
    50  // Warp cross-lane operations:
    +
    50  // Warp cross-lane operations
    51  unsigned hasWarpVote : 1;
    52  unsigned hasWarpBallot : 1;
    53  unsigned hasWarpShuffle : 1;
    @@ -152,115 +152,175 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    68 // Common headers for both NVCC and HCC paths:
    69 
    74 typedef struct hipDeviceProp_t {
    -
    75  char name[256];
    -
    76  size_t totalGlobalMem;
    - -
    78  int regsPerBlock ;
    -
    79  int warpSize ;
    - -
    81  int maxThreadsDim[3];
    -
    82  int maxGridSize[3];
    -
    83  int clockRate ;
    -
    84 
    -
    85  size_t totalConstMem;
    -
    86  int major ;
    -
    87  int minor;
    - - - - -
    92 
    - -
    94 
    - - -
    97 
    -
    98 
    -
    99 // hack to get these to show up in Doxygen:
    -
    107 /*
    -
    108  * @brief hipError_t
    -
    109  * @enum
    -
    110  * @ingroup Enumerations
    -
    111  */
    -
    112 typedef enum hipError_t {
    - - - - - - - - - - -
    123 
    - - -
    126 } hipError_t;
    -
    127 
    +
    75  char name[256];
    +
    76  size_t totalGlobalMem;
    + + +
    79  int warpSize;
    + +
    81  int maxThreadsDim[3];
    +
    82  int maxGridSize[3];
    +
    83  int clockRate;
    + +
    85  size_t totalConstMem;
    +
    86  int major;
    +
    87  int minor;
    + + + + + + + +
    95  int pciBusID;
    + + + +
    99 
    +
    100 
    +
    101 // hack to get these to show up in Doxygen:
    +
    109 /*
    +
    110  * @brief hipError_t
    +
    111  * @enum
    +
    112  * @ingroup Enumerations
    +
    113  */
    +
    114 typedef enum hipError_t {
    + + + + + + + + + + + + +
    127 } hipError_t;
    128 
    -
    129 
    -
    134 #if defined(__HIP_PLATFORM_HCC__) and not defined (__HIP_PLATFORM_NVCC__)
    -
    135 #include "hcc_detail/hip_runtime_api.h"
    -
    136 #elif defined(__HIP_PLATFORM_NVCC__) and not defined (__HIP_PLATFORM_HCC__)
    -
    137 #include "nvcc_detail/hip_runtime_api.h"
    -
    138 #else
    -
    139 #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
    -
    140 #endif
    -
    141 
    -
    142 
    -
    150 #ifdef __cplusplus
    -
    151 template<class T>
    -
    152 static inline hipError_t hipMalloc ( T** devPtr, size_t size)
    -
    153 {
    -
    154  return hipMalloc((void**)devPtr, size);
    -
    155 }
    -
    156 
    -
    157 template<class T>
    -
    158 static inline hipError_t hipMallocHost ( T** ptr, size_t size)
    -
    159 {
    -
    160  return hipMallocHost((void**)ptr, size);
    -
    161 }
    -
    162 #endif
    -
    Call to cudaGetDeviceCount returned 0 devices.
    Definition: hip_runtime_api.h:121
    -
    size_t totalConstMem
    Size of shared memory region (in bytes)
    Definition: hip_runtime_api.h:85
    -
    Unknown symbol.
    Definition: hip_runtime_api.h:116
    -
    Successful completion.
    Definition: hip_runtime_api.h:113
    +
    129 /*
    +
    130  * @brief hipDeviceAttribute_t
    +
    131  * @enum
    +
    132  * @ingroup Enumerations
    +
    133  */
    +
    134 typedef enum hipDeviceAttribute_t {
    + + + + + + + + + + + + + + + + + + + + + + + + +
    159 
    +
    164 #if defined(__HIP_PLATFORM_HCC__) and not defined (__HIP_PLATFORM_NVCC__)
    + +
    166 #elif defined(__HIP_PLATFORM_NVCC__) and not defined (__HIP_PLATFORM_HCC__)
    +
    167 #include "nvcc_detail/hip_runtime_api.h"
    +
    168 #else
    +
    169 #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
    +
    170 #endif
    +
    171 
    +
    172 
    +
    180 #ifdef __cplusplus
    +
    181 template<class T>
    +
    182 static inline hipError_t hipMalloc ( T** devPtr, size_t size)
    +
    183 {
    +
    184  return hipMalloc((void**)devPtr, size);
    +
    185 }
    +
    186 
    +
    187 template<class T>
    +
    188 static inline hipError_t hipMallocHost ( T** ptr, size_t size)
    +
    189 {
    +
    190  return hipMallocHost((void**)ptr, size);
    +
    191 }
    +
    192 #endif
    +
    Call to hipGetDeviceCount returned 0 devices.
    Definition: hip_runtime_api.h:123
    +
    size_t totalConstMem
    Size of shared memory region (in bytes).
    Definition: hip_runtime_api.h:85
    +
    Maximum Shared Memory Per Multiprocessor.
    Definition: hip_runtime_api.h:157
    +
    Maximum x-dimension of a block.
    Definition: hip_runtime_api.h:136
    +
    Maximum x-dimension of a grid.
    Definition: hip_runtime_api.h:139
    +
    Unknown symbol.
    Definition: hip_runtime_api.h:118
    +
    Successful completion.
    Definition: hip_runtime_api.h:115
    int minor
    Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC...
    Definition: hip_runtime_api.h:87
    -
    int regsPerBlock
    registers per block
    Definition: hip_runtime_api.h:78
    -
    DeviceID must be in range 0...#compute-devices.
    Definition: hip_runtime_api.h:120
    -
    int clockRate
    max clock frequency of the multiProcessors, in khz.
    Definition: hip_runtime_api.h:83
    -
    Out of resources error.
    Definition: hip_runtime_api.h:117
    -
    Unknown error.
    Definition: hip_runtime_api.h:124
    -
    int maxThreadsPerBlock
    max work items per work group or workgroup max size
    Definition: hip_runtime_api.h:80
    -
    size_t sharedMemPerBlock
    Size of shared memory region (in bytes)
    Definition: hip_runtime_api.h:77
    +
    Maximum number of 32-bit registers available to a thread block. This number is shared by all thread b...
    Definition: hip_runtime_api.h:145
    +
    int regsPerBlock
    Registers per block.
    Definition: hip_runtime_api.h:78
    +
    Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
    Definition: hip_runtime_api.h:150
    +
    DeviceID must be in range 0...#compute-devices.
    Definition: hip_runtime_api.h:122
    +
    Peak clock frequency in kilohertz.
    Definition: hip_runtime_api.h:146
    +
    int clockRate
    Max clock frequency of the multiProcessors in khz.
    Definition: hip_runtime_api.h:83
    +
    Maximum z-dimension of a grid.
    Definition: hip_runtime_api.h:141
    +
    Out of resources error.
    Definition: hip_runtime_api.h:119
    +
    Minor compute capability version number.
    Definition: hip_runtime_api.h:153
    +
    Maximum shared memory available per block in bytes.
    Definition: hip_runtime_api.h:142
    +
    int pciBusID
    PCI Bus ID.
    Definition: hip_runtime_api.h:95
    +
    Maximum y-dimension of a grid.
    Definition: hip_runtime_api.h:140
    +
    Unknown error.
    Definition: hip_runtime_api.h:125
    +
    int maxThreadsPerBlock
    Max work items per work group or workgroup max size.
    Definition: hip_runtime_api.h:80
    +
    Maximum y-dimension of a block.
    Definition: hip_runtime_api.h:137
    +
    size_t sharedMemPerBlock
    Size of shared memory region (in bytes).
    Definition: hip_runtime_api.h:77
    int maxThreadsPerMultiProcessor
    Maximum resident threads per multi-processor.
    Definition: hip_runtime_api.h:90
    int l2CacheSize
    L2 cache size.
    Definition: hip_runtime_api.h:89
    -
    Resource handle (hipEvent_t or hipStream_t) invalid.
    Definition: hip_runtime_api.h:119
    -
    Memory allocation error.
    Definition: hip_runtime_api.h:114
    -
    hipDeviceArch_t arch
    Architectural feature flags. New for HIP.
    Definition: hip_runtime_api.h:95
    -
    int maxGridSize[3]
    max grid dimensions (XYZ)
    Definition: hip_runtime_api.h:82
    +
    hipDeviceAttribute_t
    Definition: hip_runtime_api.h:134
    +
    Major compute capability version number.
    Definition: hip_runtime_api.h:152
    +
    Maximum number of threads per block.
    Definition: hip_runtime_api.h:135
    +
    Resource handle (hipEvent_t or hipStream_t) invalid.
    Definition: hip_runtime_api.h:121
    +
    Memory allocation error.
    Definition: hip_runtime_api.h:116
    +
    hipDeviceArch_t arch
    Architectural feature flags. New for HIP.
    Definition: hip_runtime_api.h:93
    +
    int maxGridSize[3]
    Max grid dimensions (XYZ).
    Definition: hip_runtime_api.h:82
    int computeMode
    Compute mode.
    Definition: hip_runtime_api.h:91
    -
    hipError_t hipMallocHost(void **ptr, size_t size)
    Definition: hip_hcc.cpp:1289
    -
    Marker that more error codes are needed.
    Definition: hip_runtime_api.h:125
    +
    Maximum z-dimension of a block.
    Definition: hip_runtime_api.h:138
    +
    PCI Bus ID.
    Definition: hip_runtime_api.h:155
    +
    hipError_t hipMallocHost(void **ptr, size_t size)
    Allocate pinned host memory.
    Definition: hip_hcc.cpp:1372
    +
    Marker that more error codes are needed.
    Definition: hip_runtime_api.h:126
    +
    Warp size in threads.
    Definition: hip_runtime_api.h:144
    int major
    Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC...
    Definition: hip_runtime_api.h:86
    -
    hipError_t
    Definition: hip_runtime_api.h:112
    -
    int clockInstructionRate
    Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP...
    Definition: hip_runtime_api.h:93
    -
    Memory free error.
    Definition: hip_runtime_api.h:115
    -
    int warpSize
    warp size
    Definition: hip_runtime_api.h:79
    -
    size_t totalGlobalMem
    Size of global memory region (in bytes)
    Definition: hip_runtime_api.h:76
    -
    hipError_t hipMalloc(void **ptr, size_t size)
    Definition: hip_hcc.cpp:1268
    -
    int maxThreadsDim[3]
    max number of threads in each dimension (XYZ) of a block
    Definition: hip_runtime_api.h:81
    -
    One or more of the paramters passed to the API call is NULL or not in an acceptable range...
    Definition: hip_runtime_api.h:118
    +
    Peak memory clock frequency in kilohertz.
    Definition: hip_runtime_api.h:147
    +
    Maximum resident threads per multiprocessor.
    Definition: hip_runtime_api.h:151
    +
    hipError_t
    Definition: hip_runtime_api.h:114
    +
    int clockInstructionRate
    Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP...
    Definition: hip_runtime_api.h:92
    +
    Constant memory size in bytes.
    Definition: hip_runtime_api.h:143
    +
    Memory free error.
    Definition: hip_runtime_api.h:117
    +
    int warpSize
    Warp size.
    Definition: hip_runtime_api.h:79
    +
    int concurrentKernels
    Device can possibly execute multiple kernels concurrently.
    Definition: hip_runtime_api.h:94
    +
    size_t totalGlobalMem
    Size of global memory region (in bytes).
    Definition: hip_runtime_api.h:76
    +
    hipError_t hipMalloc(void **ptr, size_t size)
    Allocate memory on the default accelerator.
    Definition: hip_hcc.cpp:1351
    +
    Compute mode that device is currently in.
    Definition: hip_runtime_api.h:149
    +
    PCI Device ID.
    Definition: hip_runtime_api.h:156
    +
    int maxThreadsDim[3]
    Max number of threads in each dimension (XYZ) of a block.
    Definition: hip_runtime_api.h:81
    +
    Number of multiprocessors on the device.
    Definition: hip_runtime_api.h:148
    +
    One or more of the parameters passed to the API call is NULL or not in an acceptable range...
    Definition: hip_runtime_api.h:120
    Definition: hip_runtime_api.h:74
    -
    indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error...
    Definition: hip_runtime_api.h:122
    +
    Indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error...
    Definition: hip_runtime_api.h:124
    +
    size_t maxSharedMemoryPerMultiProcessor
    Maximum Shared Memory Per Multiprocessor.
    Definition: hip_runtime_api.h:97
    +
    int pciDeviceID
    PCI Device ID.
    Definition: hip_runtime_api.h:96
    char name[256]
    Device name.
    Definition: hip_runtime_api.h:75
    Definition: hip_runtime_api.h:35
    -
    int multiProcessorCount
    number of multi-processors (compute units)
    Definition: hip_runtime_api.h:88
    +
    Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language ...
    +
    int memoryClockRate
    Max memory clock frequency in khz.
    Definition: hip_runtime_api.h:84
    +
    Device can possibly execute multiple kernels concurrently.
    Definition: hip_runtime_api.h:154
    +
    int multiProcessorCount
    Number of multi-processors (compute units).
    Definition: hip_runtime_api.h:88
    diff --git a/projects/hip/docs/RuntimeAPI/html/hip__texture_8h.html b/projects/hip/docs/RuntimeAPI/html/hip__texture_8h.html new file mode 100644 index 0000000000..d95b127f85 --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/hip__texture_8h.html @@ -0,0 +1,209 @@ + + + + + + +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail/hip_texture.h File Reference + + + + + + + + + +
    +
    + + + + + + +
    +
    HIP: Heterogenous-computing Interface for Portability +
    +
    +
    + + + + + + + + + +
    + +
    + + +
    +
    + +
    +
    hip_texture.h File Reference
    +
    +
    + +

    HIP C++ Texture API for hcc compiler. +More...

    +
    #include <limits.h>
    +#include <hip_runtime.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + +

    +Classes

    struct  hipChannelFormatDesc
     
    struct  textureReference
     
    struct  texture< T, texType, hipTextureReadMode >
     
    + + + + + +

    +Macros

    +#define hipTextureType1D   1
     
    +#define tex1Dfetch(_tex, _addr)   (_tex._dataPtr[_addr])
     
    + + + + + + + +

    +Typedefs

    +typedef struct hipChannelFormatDesc hipChannelFormatDesc
     
    +typedef enum hipTextureReadMode hipTextureReadMode
     
    +typedef enum hipTextureFilterMode hipTextureFilterMode
     
    + + + + + +

    +Enumerations

    enum  hipTextureReadMode { hipReadModeElementType + }
     
    enum  hipTextureFilterMode { hipFilterModePoint + }
     
    + + + + + + + + + + + + + +

    +Functions

    +template<class T >
    hipChannelFormatDesc hipCreateChannelDesc ()
     
    +template<class T , int dim, enum hipTextureReadMode readMode>
    hipError_t hipBindTexture (size_t *offset, struct texture< T, dim, readMode > &tex, const void *devPtr, const struct hipChannelFormatDesc *desc, size_t size=UINT_MAX)
     
    +template<class T , int dim, enum hipTextureReadMode readMode>
    hipError_t hipBindTexture (size_t *offset, struct texture< T, dim, readMode > &tex, const void *devPtr, size_t size=UINT_MAX)
     
    +template<class T , int dim, enum hipTextureReadMode readMode>
    hipError_t hipUnbindTexture (struct texture< T, dim, readMode > *tex)
     
    +

    Detailed Description

    +

    HIP C++ Texture API for hcc compiler.

    +

    Enumeration Type Documentation

    + +
    +
    + + + + +
    enum hipTextureFilterMode
    +
    + + +
    Enumerator
    hipFilterModePoint  +

    Point filter mode.

    +
    + +
    +
    + +
    +
    + + + + +
    enum hipTextureReadMode
    +
    + + +
    Enumerator
    hipReadModeElementType  +

    Read texture as specified element type

    +
    + +
    +
    +
    + + + + diff --git a/projects/hip/docs/RuntimeAPI/html/hip__texture_8h_source.html b/projects/hip/docs/RuntimeAPI/html/hip__texture_8h_source.html index 9888a5c9a5..cdf660b728 100644 --- a/projects/hip/docs/RuntimeAPI/html/hip__texture_8h_source.html +++ b/projects/hip/docs/RuntimeAPI/html/hip__texture_8h_source.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hcc_detail/hip_texture.h Source File +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail/hip_texture.h Source File @@ -89,7 +89,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    hip_texture.h
    -
    1 /*
    +Go to the documentation of this file.
    1 /*
    2 Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
    3 
    4 Permission is hereby granted, free of charge, to any person obtaining a copy
    @@ -112,7 +112,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    21 */
    22 #pragma once
    23 
    -
    28 #include <limits.h>
    +
    28 #include <limits.h>
    29 
    30 #include <hip_runtime.h>
    31 
    @@ -125,18 +125,18 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    38  int _dummy;
    40 
    -
    41 typedef enum hipTextureReadMode
    +
    41 typedef enum hipTextureReadMode
    42 {
    -
    43  hipReadModeElementType,
    -
    44 } hipTextureReadMode;
    + +
    46 
    -
    47 typedef enum hipTextureFilterMode
    +
    48 {
    -
    49  hipFilterModePoint,
    -
    50 } hipTextureFilterMode;
    + +
    52 
    -
    54  hipTextureFilterMode filterMode;
    +
    54  hipTextureFilterMode filterMode;
    55  bool normalized;
    56  hipChannelFormatDesc channelDesc;
    57 };
    @@ -160,7 +160,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    82 // These are C++ APIs - maybe belong in separate file.
    106 // C API:
    107 #if 0
    -
    108 hipChannelFormatDesc hipBindTexture(size_t *offset, struct textureReference *tex, const void *devPtr, const struct hipChannelFormatDesc *desc, size_t size=UINT_MAX)
    +
    108 hipChannelFormatDesc hipBindTexture(size_t *offset, struct textureReference *tex, const void *devPtr, const struct hipChannelFormatDesc *desc, size_t size=UINT_MAX)
    109 {
    110  tex->_dataPtr = devPtr;
    111 }
    @@ -171,7 +171,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    116  **/
    117 // TODO
    118 template <class T>
    -
    119 hipChannelFormatDesc hipCreateChannelDesc()
    +
    119 hipChannelFormatDesc hipCreateChannelDesc()
    120 {
    122  return desc;
    @@ -182,11 +182,11 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    127  **/
    128 // TODO-doc
    129 template <class T, int dim, enum hipTextureReadMode readMode>
    -
    130 hipError_t hipBindTexture(size_t *offset,
    -
    131  struct texture<T, dim, readMode> &tex,
    -
    132  const void *devPtr,
    -
    133  const struct hipChannelFormatDesc *desc,
    -
    134  size_t size=UINT_MAX)
    +
    130 hipError_t hipBindTexture(size_t *offset,
    +
    131  struct texture<T, dim, readMode> &tex,
    +
    132  const void *devPtr,
    +
    133  const struct hipChannelFormatDesc *desc,
    +
    134  size_t size=UINT_MAX)
    135 {
    136  tex._dataPtr = static_cast<const T*>(devPtr);
    137 
    @@ -199,10 +199,10 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    144  **/
    145 // TODO-doc
    146 template <class T, int dim, enum hipTextureReadMode readMode>
    -
    147 hipError_t hipBindTexture(size_t *offset,
    -
    148  struct texture<T, dim, readMode> &tex,
    -
    149  const void *devPtr,
    -
    150  size_t size=UINT_MAX)
    +
    147 hipError_t hipBindTexture(size_t *offset,
    +
    148  struct texture<T, dim, readMode> &tex,
    +
    149  const void *devPtr,
    +
    150  size_t size=UINT_MAX)
    151 {
    152  return hipBindTexture(offset, tex, devPtr, &tex.channelDesc, size);
    153 }
    @@ -224,16 +224,20 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    169 
    170 // doxygen end Texture
    176 // End doxygen API:
    -
    Successful completion.
    Definition: hip_runtime_api.h:113
    +
    Definition: hip_texture.h:43
    +
    Successful completion.
    Definition: hip_runtime_api.h:115
    +
    Definition: hip_texture.h:49
    Definition: hip_texture.h:53
    - -
    hipError_t
    Definition: hip_runtime_api.h:112
    +
    Contains definitions of APIs for HIP runtime.
    +
    hipError_t
    Definition: hip_runtime_api.h:114
    +
    hipTextureReadMode
    Definition: hip_texture.h:41
    +
    hipTextureFilterMode
    Definition: hip_texture.h:47
    Definition: hip_texture.h:36
    Definition: hip_texture.h:60
    diff --git a/projects/hip/docs/RuntimeAPI/html/hip__vector__types_8h_source.html b/projects/hip/docs/RuntimeAPI/html/hip__vector__types_8h_source.html index 15eba01435..b12e96e36d 100644 --- a/projects/hip/docs/RuntimeAPI/html/hip__vector__types_8h_source.html +++ b/projects/hip/docs/RuntimeAPI/html/hip__vector__types_8h_source.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hip_vector_types.h Source File +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hip_vector_types.h Source File @@ -117,16 +117,17 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    27 
    28 
    29 #if defined(__HIP_PLATFORM_HCC__) and not defined (__HIP_PLATFORM_NVCC__)
    -
    30 #include <hcc_detail/hip_vector_types.h>
    +
    31 #elif defined(__HIP_PLATFORM_NVCC__) and not defined (__HIP_PLATFORM_HCC__)
    32 #include <vector_types.h>
    33 #else
    34 #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
    35 #endif
    +
    Defines the different newt vector types for HIP runtime.
    diff --git a/projects/hip/docs/RuntimeAPI/html/host__defines_8h.html b/projects/hip/docs/RuntimeAPI/html/host__defines_8h.html new file mode 100644 index 0000000000..9d7b976271 --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/host__defines_8h.html @@ -0,0 +1,147 @@ + + + + + + +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail/host_defines.h File Reference + + + + + + + + + +
    +
    + + + + + + +
    +
    HIP: Heterogenous-computing Interface for Portability +
    +
    +
    + + + + + + + + + +
    + +
    + + +
    +
    + +
    +
    host_defines.h File Reference
    +
    +
    + +

    TODO-doc. +More...

    + +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + +

    +Macros

    #define __host__   __attribute__((cpu))
     
    +#define __device__   __attribute__((hc))
     
    +#define __global__   __attribute__((hc_grid_launch))
     
    +#define __noinline__   __attribute__((noinline))
     
    +#define __forceinline__   __attribute__((always_inline))
     
    +#define __shared__   tile_static
     
    +#define __constant__   __attribute__((address_space(2)))
     
    +

    Detailed Description

    +

    TODO-doc.

    +

    Macro Definition Documentation

    + +
    +
    + + + + +
    #define __host__   __attribute__((cpu))
    +
    +

    Function and kernel markers

    + +
    +
    +
    + + + + diff --git a/projects/hip/docs/RuntimeAPI/html/host__defines_8h_source.html b/projects/hip/docs/RuntimeAPI/html/host__defines_8h_source.html index c10f6c5202..f4430d5190 100644 --- a/projects/hip/docs/RuntimeAPI/html/host__defines_8h_source.html +++ b/projects/hip/docs/RuntimeAPI/html/host__defines_8h_source.html @@ -4,7 +4,7 @@ -HIP: Heterogenous-computing Interface for Portability: /home/fpadmin/ben/HIP6/include/hcc_detail/host_defines.h Source File +HIP: Heterogenous-computing Interface for Portability: /home/bensander/HIP.public/include/hcc_detail/host_defines.h Source File @@ -89,7 +89,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    host_defines.h
    -
    1 /*
    +Go to the documentation of this file.
    1 /*
    2 Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
    3 
    4 Permission is hereby granted, free of charge, to any person obtaining a copy
    @@ -110,47 +110,48 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    20 THE SOFTWARE.
    21 */
    -
    22 #ifdef __HCC__
    -
    23 
    -
    26 #define __host__ __attribute__((cpu))
    -
    27 #define __device__ __attribute__((hc))
    -
    28 
    -
    29 #ifndef DISABLE_GRID_LAUNCH
    -
    30 #define __global__ __attribute__((hc_grid_launch))
    -
    31 #else
    -
    32 #define __global__
    -
    33 #endif
    +
    22 
    +
    28 #ifdef __HCC__
    +
    29 
    +
    32 #define __host__ __attribute__((cpu))
    +
    33 #define __device__ __attribute__((hc))
    34 
    -
    35 #define __noinline__ __attribute__((noinline))
    -
    36 #define __forceinline__ __attribute__((always_inline))
    -
    37 
    -
    38 
    -
    39 
    -
    40 /*
    -
    41  * Variable Type Qualifiers:
    -
    42  */
    -
    43 // _restrict is supported by the compiler
    -
    44 #define __shared__ tile_static
    -
    45 #define __constant__ __attribute__((address_space(2)))
    -
    46 
    -
    47 #else
    -
    48 // Non-HCC compiler
    -
    52 #define __host__
    -
    53 #define __device__
    -
    54 
    -
    55 #define __global__
    -
    56 
    -
    57 #define __noinline__
    -
    58 #define __forceinline__
    -
    59 
    -
    60 #define __shared__
    -
    61 #define __constant__
    +
    35 #ifndef DISABLE_GRID_LAUNCH
    +
    36 #define __global__ __attribute__((hc_grid_launch))
    +
    37 #else
    +
    38 #define __global__
    +
    39 #endif
    +
    40 
    +
    41 #define __noinline__ __attribute__((noinline))
    +
    42 #define __forceinline__ __attribute__((always_inline))
    +
    43 
    +
    44 
    +
    45 
    +
    46 /*
    +
    47  * Variable Type Qualifiers:
    +
    48  */
    +
    49 // _restrict is supported by the compiler
    +
    50 #define __shared__ tile_static
    +
    51 #define __constant__ __attribute__((address_space(2)))
    +
    52 
    +
    53 #else
    +
    54 // Non-HCC compiler
    +
    58 #define __host__
    +
    59 #define __device__
    +
    60 
    +
    61 #define __global__
    62 
    -
    63 #endif
    +
    63 #define __noinline__
    +
    64 #define __forceinline__
    +
    65 
    +
    66 #define __shared__
    +
    67 #define __constant__
    +
    68 
    +
    69 #endif
    diff --git a/projects/hip/docs/RuntimeAPI/html/index.html b/projects/hip/docs/RuntimeAPI/html/index.html index 7fc6c60b4d..8ba01822a2 100644 --- a/projects/hip/docs/RuntimeAPI/html/index.html +++ b/projects/hip/docs/RuntimeAPI/html/index.html @@ -91,7 +91,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
    diff --git a/projects/hip/docs/RuntimeAPI/html/modules.html b/projects/hip/docs/RuntimeAPI/html/modules.html index d2d046519d..6c65a6cbdd 100644 --- a/projects/hip/docs/RuntimeAPI/html/modules.html +++ b/projects/hip/docs/RuntimeAPI/html/modules.html @@ -99,7 +99,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/pages.html b/projects/hip/docs/RuntimeAPI/html/pages.html index 1ac5403b3f..ecccd28e6d 100644 --- a/projects/hip/docs/RuntimeAPI/html/pages.html +++ b/projects/hip/docs/RuntimeAPI/html/pages.html @@ -88,7 +88,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_0.js b/projects/hip/docs/RuntimeAPI/html/search/all_0.js index 2fba32867b..00fe08ccff 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_0.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_0.js @@ -1,4 +1,4 @@ var searchData= [ - ['arch',['arch',['../structhipDeviceProp__t.html#afc58158e44bef6ad26f2be401434b049',1,'hipDeviceProp_t']]] + ['_5f_5fhost_5f_5f',['__host__',['../host__defines_8h.html#a803050db3c78e0db3ea59a0c35499622',1,'host_defines.h']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_1.js b/projects/hip/docs/RuntimeAPI/html/search/all_1.js index 8d42650d49..2fba32867b 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_1.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_1.js @@ -1,4 +1,4 @@ var searchData= [ - ['bug_20list',['Bug List',['../bug.html',1,'']]] + ['arch',['arch',['../structhipDeviceProp__t.html#afc58158e44bef6ad26f2be401434b049',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_10.js b/projects/hip/docs/RuntimeAPI/html/search/all_10.js index 133dd9dc6e..9043ae6945 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_10.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_10.js @@ -1,4 +1,7 @@ var searchData= [ - ['y',['y',['../structdim3.html#a83e60e072f7e8bdfde6ac05053cbb370',1,'dim3']]] + ['texture',['texture',['../structtexture.html',1,'texture< T, texType, hipTextureReadMode >'],['../group__Texture.html',1,'(Global Namespace)']]], + ['texturereference',['textureReference',['../structtextureReference.html',1,'']]], + ['totalconstmem',['totalConstMem',['../structhipDeviceProp__t.html#a29880232c56120be3455ce00d5379665',1,'hipDeviceProp_t']]], + ['totalglobalmem',['totalGlobalMem',['../structhipDeviceProp__t.html#acedd6a2d23423441e4bf51c4a1b719f9',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_11.js b/projects/hip/docs/RuntimeAPI/html/search/all_11.js index e8bf38b99c..46a1400a7b 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_11.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_11.js @@ -1,4 +1,4 @@ var searchData= [ - ['z',['z',['../structdim3.html#a866e38993ecc4e76fd47311236c16b04',1,'dim3']]] + ['warpsize',['warpSize',['../structhipDeviceProp__t.html#af3357d33c004608bf05bc21a352be81b',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_2.js b/projects/hip/docs/RuntimeAPI/html/search/all_2.js index f928b2e2ec..8d42650d49 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_2.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_2.js @@ -1,7 +1,4 @@ var searchData= [ - ['clockinstructionrate',['clockInstructionRate',['../structhipDeviceProp__t.html#a6fbf3b08a1a08ae700f1a06265f6666b',1,'hipDeviceProp_t']]], - ['clockrate',['clockRate',['../structhipDeviceProp__t.html#a1dd15bee43692b8649dfbdc1adbaaf96',1,'hipDeviceProp_t']]], - ['computemode',['computeMode',['../structhipDeviceProp__t.html#ae7d9216f8583a703359d0b9373823f5d',1,'hipDeviceProp_t']]], - ['control',['Control',['../group__Profiler.html',1,'']]] + ['bug_20list',['Bug List',['../bug.html',1,'']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_3.js b/projects/hip/docs/RuntimeAPI/html/search/all_3.js index d715da215b..e6f0e7edea 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_3.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_3.js @@ -1,6 +1,8 @@ var searchData= [ - ['device_20management',['Device Management',['../group__Device.html',1,'']]], - ['dim3',['dim3',['../structdim3.html',1,'dim3'],['../group__GlobalDefs.html#gacb37281795c3567d0b10a61c056d512b',1,'dim3(): hip_runtime_api.h']]], - ['device_20memory_20access',['Device Memory Access',['../group__PeerToPeer.html',1,'']]] + ['clockinstructionrate',['clockInstructionRate',['../structhipDeviceProp__t.html#a6fbf3b08a1a08ae700f1a06265f6666b',1,'hipDeviceProp_t']]], + ['clockrate',['clockRate',['../structhipDeviceProp__t.html#a1dd15bee43692b8649dfbdc1adbaaf96',1,'hipDeviceProp_t']]], + ['computemode',['computeMode',['../structhipDeviceProp__t.html#ae7d9216f8583a703359d0b9373823f5d',1,'hipDeviceProp_t']]], + ['concurrentkernels',['concurrentKernels',['../structhipDeviceProp__t.html#ad8461a28caf9c38c58cf358583b5bee3',1,'hipDeviceProp_t']]], + ['control',['Control',['../group__Profiler.html',1,'']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_4.js b/projects/hip/docs/RuntimeAPI/html/search/all_4.js index c9fd17e83e..d715da215b 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_4.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_4.js @@ -1,5 +1,6 @@ var searchData= [ - ['error_20handling',['Error Handling',['../group__Error.html',1,'']]], - ['event_20management',['Event Management',['../group__Event.html',1,'']]] + ['device_20management',['Device Management',['../group__Device.html',1,'']]], + ['dim3',['dim3',['../structdim3.html',1,'dim3'],['../group__GlobalDefs.html#gacb37281795c3567d0b10a61c056d512b',1,'dim3(): hip_runtime_api.h']]], + ['device_20memory_20access',['Device Memory Access',['../group__PeerToPeer.html',1,'']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_5.js b/projects/hip/docs/RuntimeAPI/html/search/all_5.js index 529e9394c3..c9fd17e83e 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_5.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_5.js @@ -1,4 +1,5 @@ var searchData= [ - ['global_20enum_20and_20defines',['Global enum and defines',['../group__GlobalDefs.html',1,'']]] + ['error_20handling',['Error Handling',['../group__Error.html',1,'']]], + ['event_20management',['Event Management',['../group__Event.html',1,'']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_6.js b/projects/hip/docs/RuntimeAPI/html/search/all_6.js index 84510111be..529e9394c3 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_6.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_6.js @@ -1,107 +1,4 @@ var searchData= [ - ['hip_20api',['HIP API',['../group__API.html',1,'']]], - ['has3dgrid',['has3dGrid',['../structhipDeviceArch__t.html#aa5e22d295cce0d9a34ee6e7e7e378c26',1,'hipDeviceArch_t']]], - ['hasdoubles',['hasDoubles',['../structhipDeviceArch__t.html#a2d624e3d85e615b71d1182f8912893b4',1,'hipDeviceArch_t']]], - ['hasdynamicparallelism',['hasDynamicParallelism',['../structhipDeviceArch__t.html#a9114bfc718bf0648b54ff9a319a36b35',1,'hipDeviceArch_t']]], - ['hasfloatatomicadd',['hasFloatAtomicAdd',['../structhipDeviceArch__t.html#a2321d6ef74aac91c044f3289d25b2d41',1,'hipDeviceArch_t']]], - ['hasfunnelshift',['hasFunnelShift',['../structhipDeviceArch__t.html#aaec4f2d983d4602858fae8e9ddeee3ff',1,'hipDeviceArch_t']]], - ['hasglobalfloatatomicexch',['hasGlobalFloatAtomicExch',['../structhipDeviceArch__t.html#a8f213ae9a4729dff1c636ac5de0e2fa2',1,'hipDeviceArch_t']]], - ['hasglobalint32atomics',['hasGlobalInt32Atomics',['../structhipDeviceArch__t.html#a8d00c3ab98869b602c714fe7abe68e93',1,'hipDeviceArch_t']]], - ['hasglobalint64atomics',['hasGlobalInt64Atomics',['../structhipDeviceArch__t.html#ad5aa54dbab22dbcd8cf98f57a96c6636',1,'hipDeviceArch_t']]], - ['hassharedfloatatomicexch',['hasSharedFloatAtomicExch',['../structhipDeviceArch__t.html#aff005558b4edabd27b27f286ac5b2f2b',1,'hipDeviceArch_t']]], - ['hassharedint32atomics',['hasSharedInt32Atomics',['../structhipDeviceArch__t.html#a1596330b1cb9cc73f142aee11b2ab853',1,'hipDeviceArch_t']]], - ['hassharedint64atomics',['hasSharedInt64Atomics',['../structhipDeviceArch__t.html#a00c2b930fcdcad9ea7b54b449db13966',1,'hipDeviceArch_t']]], - ['hassurfacefuncs',['hasSurfaceFuncs',['../structhipDeviceArch__t.html#a9eb2462148686d4c048b69b6e09f835e',1,'hipDeviceArch_t']]], - ['hassyncthreadsext',['hasSyncThreadsExt',['../structhipDeviceArch__t.html#ade6a3b21ad5f344dcd92c52102c274ba',1,'hipDeviceArch_t']]], - ['hasthreadfencesystem',['hasThreadFenceSystem',['../structhipDeviceArch__t.html#ac2818e3b91cba8beb36741e9867bb887',1,'hipDeviceArch_t']]], - ['haswarpballot',['hasWarpBallot',['../structhipDeviceArch__t.html#af1e934a8a5106995bcc256287585564c',1,'hipDeviceArch_t']]], - ['haswarpshuffle',['hasWarpShuffle',['../structhipDeviceArch__t.html#a3d922e8fc97ca1e8ecc39600b138fa2d',1,'hipDeviceArch_t']]], - ['haswarpvote',['hasWarpVote',['../structhipDeviceArch__t.html#a35bde017352eca1d4e0eceb3bf79f274',1,'hipDeviceArch_t']]], - ['hcc_2dspecific_20accessors',['HCC-Specific Accessors',['../group__HCC__Specific.html',1,'']]], - ['hip_20environment_20variables',['HIP Environment Variables',['../group__HIP-ENV.html',1,'']]], - ['hip_5fhcc_2ecpp',['hip_hcc.cpp',['../hip__hcc_8cpp.html',1,'']]], - ['hip_5flaunch_5fblocking',['HIP_LAUNCH_BLOCKING',['../group__HIP-ENV.html#ga8049b329f2663b4572d81e7a9aa8a155',1,'HIP_LAUNCH_BLOCKING(): hip_hcc.cpp'],['../group__HIP-ENV.html#ga8049b329f2663b4572d81e7a9aa8a155',1,'HIP_LAUNCH_BLOCKING(): hip_hcc.cpp']]], - ['hip_5fprint_5fenv',['HIP_PRINT_ENV',['../group__HIP-ENV.html#ga1e1c85dbb250f1acfb484c1be1f3b28a',1,'HIP_PRINT_ENV(): hip_hcc.cpp'],['../group__HIP-ENV.html#ga1e1c85dbb250f1acfb484c1be1f3b28a',1,'HIP_PRINT_ENV(): hip_hcc.cpp']]], - ['hip_5fruntime_2eh',['hip_runtime.h',['../hcc__detail_2hip__runtime_8h.html',1,'']]], - ['hip_5ftrace_5fapi',['HIP_TRACE_API',['../group__HIP-ENV.html#gaae9c541f3e25b8f002762337a03fec28',1,'HIP_TRACE_API(): hip_hcc.cpp'],['../group__HIP-ENV.html#gaae9c541f3e25b8f002762337a03fec28',1,'HIP_TRACE_API(): hip_hcc.cpp']]], - ['hipchannelformatdesc',['hipChannelFormatDesc',['../structhipChannelFormatDesc.html',1,'']]], - ['hipdevicearch_5ft',['hipDeviceArch_t',['../structhipDeviceArch__t.html',1,'']]], - ['hipdevicecanaccesspeer',['hipDeviceCanAccessPeer',['../group__PeerToPeer.html#gab53a55dbc087ff659918fd04287de3d3',1,'hipDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice): hip_hcc.cpp'],['../group__PeerToPeer.html#gab53a55dbc087ff659918fd04287de3d3',1,'hipDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice): hip_hcc.cpp']]], - ['hipdevicedisablepeeraccess',['hipDeviceDisablePeerAccess',['../group__PeerToPeer.html#ga41e60c01f63597529da1cd77bdd55379',1,'hipDeviceDisablePeerAccess(int peerDevice): hip_hcc.cpp'],['../group__PeerToPeer.html#ga41e60c01f63597529da1cd77bdd55379',1,'hipDeviceDisablePeerAccess(int peerDevice): hip_hcc.cpp']]], - ['hipdeviceenablepeeraccess',['hipDeviceEnablePeerAccess',['../group__PeerToPeer.html#ga098e0d626edbfb69b66d141a5a8b7dc6',1,'hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags): hip_hcc.cpp'],['../group__PeerToPeer.html#ga098e0d626edbfb69b66d141a5a8b7dc6',1,'hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags): hip_hcc.cpp']]], - ['hipdevicegetcacheconfig',['hipDeviceGetCacheConfig',['../group__Device.html#gaeeffa2456c5430400bea75ecd6ad1e68',1,'hipDeviceGetCacheConfig(hipFuncCache *cacheConfig): hip_hcc.cpp'],['../group__Device.html#gaeeffa2456c5430400bea75ecd6ad1e68',1,'hipDeviceGetCacheConfig(hipFuncCache *cacheConfig): hip_hcc.cpp']]], - ['hipdevicegetproperties',['hipDeviceGetProperties',['../group__Device.html#gad9ee6822e3e55431811fb6a00f7a1c10',1,'hipDeviceGetProperties(hipDeviceProp_t *prop, int device): hip_hcc.cpp'],['../group__Device.html#gad9ee6822e3e55431811fb6a00f7a1c10',1,'hipDeviceGetProperties(hipDeviceProp_t *props, int device): hip_hcc.cpp']]], - ['hipdevicegetsharedmemconfig',['hipDeviceGetSharedMemConfig',['../group__Device.html#ga1bb08f774a34a468d969a8a04791c9bb',1,'hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig): hip_hcc.cpp'],['../group__Device.html#ga1bb08f774a34a468d969a8a04791c9bb',1,'hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig): hip_hcc.cpp']]], - ['hipdeviceprop_5ft',['hipDeviceProp_t',['../structhipDeviceProp__t.html',1,'']]], - ['hipdevicereset',['hipDeviceReset',['../group__Device.html#ga8d57161ae56a8edc46eeda447417bf6c',1,'hipDeviceReset(void): hip_hcc.cpp'],['../group__Device.html#ga8d57161ae56a8edc46eeda447417bf6c',1,'hipDeviceReset(void): hip_hcc.cpp']]], - ['hipdevicesetcacheconfig',['hipDeviceSetCacheConfig',['../group__Device.html#gac2b282179f29c4c0ca7b5391242c6a4c',1,'hipDeviceSetCacheConfig(hipFuncCache cacheConfig): hip_hcc.cpp'],['../group__Device.html#gac2b282179f29c4c0ca7b5391242c6a4c',1,'hipDeviceSetCacheConfig(hipFuncCache cacheConfig): hip_hcc.cpp']]], - ['hipdevicesetsharedmemconfig',['hipDeviceSetSharedMemConfig',['../group__Device.html#ga9b1f279084e76691cedfbfadf9c717ee',1,'hipDeviceSetSharedMemConfig(hipSharedMemConfig config): hip_hcc.cpp'],['../group__Device.html#ga9b1f279084e76691cedfbfadf9c717ee',1,'hipDeviceSetSharedMemConfig(hipSharedMemConfig config): hip_hcc.cpp']]], - ['hipdevicesynchronize',['hipDeviceSynchronize',['../group__Device.html#gaefdc2847fb1d6c3fb1354e827a191ebd',1,'hipDeviceSynchronize(void): hip_hcc.cpp'],['../group__Device.html#gaefdc2847fb1d6c3fb1354e827a191ebd',1,'hipDeviceSynchronize(void): hip_hcc.cpp']]], - ['hipdrivergetversion',['hipDriverGetVersion',['../group__Version.html#gaf6c342f52d2a29a0aca5cdd89b4dd47c',1,'hipDriverGetVersion(int *driverVersion): hip_hcc.cpp'],['../group__Version.html#gaf6c342f52d2a29a0aca5cdd89b4dd47c',1,'hipDriverGetVersion(int *driverVersion): hip_hcc.cpp']]], - ['hiperror_5ft',['hipError_t',['../group__GlobalDefs.html#gadf5010f6e140a53ecbdf949e73e87594',1,'hip_runtime_api.h']]], - ['hiperrorinvaliddevice',['hipErrorInvalidDevice',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a07ab9b704ea693c1781a52741c60cd0d',1,'hip_runtime_api.h']]], - ['hiperrorinvalidresourcehandle',['hipErrorInvalidResourceHandle',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a88e525a7c8f35552dfada58e9f2f6d3a',1,'hip_runtime_api.h']]], - ['hiperrorinvalidvalue',['hipErrorInvalidValue',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a1e8215fe1108a508bad3944bce7b4d83',1,'hip_runtime_api.h']]], - ['hiperrormemoryallocation',['hipErrorMemoryAllocation',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a8293288a10109874749afe2562db09f2',1,'hip_runtime_api.h']]], - ['hiperrormemoryfree',['hipErrorMemoryFree',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a813b3f014e2a3932d1f0e3e712cf9d3c',1,'hip_runtime_api.h']]], - ['hiperrornodevice',['hipErrorNoDevice',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594ad4406972c318df36d231310a15131c24',1,'hip_runtime_api.h']]], - ['hiperrornotready',['hipErrorNotReady',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594aa9638063c8746a9d1fda2b2069a0a9f1',1,'hip_runtime_api.h']]], - ['hiperroroutofresources',['hipErrorOutOfResources',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a60c1c080b79bdde9ef5e808f974ac9ed',1,'hip_runtime_api.h']]], - ['hiperrortbd',['hipErrorTbd',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594ab556409e11ddb0c4cf77a2f4fc91ea9e',1,'hip_runtime_api.h']]], - ['hiperrorunknown',['hipErrorUnknown',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594aa74e64c5b2f5fb0d6a92681f5b234073',1,'hip_runtime_api.h']]], - ['hiperrorunknownsymbol',['hipErrorUnknownSymbol',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a45b297e6c3b2029dce1348658421481b',1,'hip_runtime_api.h']]], - ['hipevent_5ft',['hipEvent_t',['../structhipEvent__t.html',1,'']]], - ['hipeventblockingsync',['hipEventBlockingSync',['../group__GlobalDefs.html#gafa1c076a5b991763a98695063f1ea11d',1,'hip_runtime_api.h']]], - ['hipeventcreatewithflags',['hipEventCreateWithFlags',['../group__Event.html#gae86a5acb1b22b61bc9ecb9c28fc71b75',1,'hipEventCreateWithFlags(hipEvent_t *event, unsigned flags): hip_hcc.cpp'],['../group__Event.html#gae86a5acb1b22b61bc9ecb9c28fc71b75',1,'hipEventCreateWithFlags(hipEvent_t *event, unsigned flags): hip_hcc.cpp']]], - ['hipeventdefault',['hipEventDefault',['../group__GlobalDefs.html#ga122a5853359eba97cf047ddd153740f0',1,'hip_runtime_api.h']]], - ['hipeventdestroy',['hipEventDestroy',['../group__Event.html#ga83260357dce0c39e8c6a3c74ec97484c',1,'hipEventDestroy(hipEvent_t event): hip_hcc.cpp'],['../group__Event.html#ga83260357dce0c39e8c6a3c74ec97484c',1,'hipEventDestroy(hipEvent_t event): hip_hcc.cpp']]], - ['hipeventdisabletiming',['hipEventDisableTiming',['../group__GlobalDefs.html#ga3c0f44a85e36a4c67671da6bcdad0351',1,'hip_runtime_api.h']]], - ['hipeventelapsedtime',['hipEventElapsedTime',['../group__Event.html#gad4128b815cb475c8e13c7e66ff6250b7',1,'hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop): hip_hcc.cpp'],['../group__Event.html#gad4128b815cb475c8e13c7e66ff6250b7',1,'hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop): hip_hcc.cpp']]], - ['hipeventinterprocess',['hipEventInterprocess',['../group__GlobalDefs.html#ga0f01d74059baa704e42aeff8222166bb',1,'hip_runtime_api.h']]], - ['hipeventquery',['hipEventQuery',['../group__Event.html#ga5d12d7b798b5ceb5932d1ac21f5ac776',1,'hipEventQuery(hipEvent_t event): hip_hcc.cpp'],['../group__Event.html#ga5d12d7b798b5ceb5932d1ac21f5ac776',1,'hipEventQuery(hipEvent_t event): hip_hcc.cpp']]], - ['hipeventrecord',['hipEventRecord',['../group__Event.html#gace88ebd8c7ec42a6c2cebda2e8b0cb38',1,'hipEventRecord(hipEvent_t event, hipStream_t stream=NULL): hip_hcc.cpp'],['../group__Event.html#gace88ebd8c7ec42a6c2cebda2e8b0cb38',1,'hipEventRecord(hipEvent_t event, hipStream_t stream): hip_hcc.cpp']]], - ['hipeventsynchronize',['hipEventSynchronize',['../group__Event.html#ga1f72d98ba5d6f7dc3da54e0c41fe38b1',1,'hipEventSynchronize(hipEvent_t event): hip_hcc.cpp'],['../group__Event.html#ga1f72d98ba5d6f7dc3da54e0c41fe38b1',1,'hipEventSynchronize(hipEvent_t event): hip_hcc.cpp']]], - ['hipfree',['hipFree',['../group__Memory.html#ga740d08da65cae1441ba32f8fedb863d1',1,'hipFree(void *ptr): hip_hcc.cpp'],['../group__Memory.html#ga740d08da65cae1441ba32f8fedb863d1',1,'hipFree(void *ptr): hip_hcc.cpp']]], - ['hipfreehost',['hipFreeHost',['../group__Memory.html#ga28d7d92836116dfadeb62e416ee887d3',1,'hipFreeHost(void *ptr): hip_hcc.cpp'],['../group__Memory.html#ga28d7d92836116dfadeb62e416ee887d3',1,'hipFreeHost(void *ptr): hip_hcc.cpp']]], - ['hipfunccache',['hipFuncCache',['../group__GlobalDefs.html#gac7e4bfd88340fc06642136c839a3d822',1,'hipFuncCache(): hip_runtime_api.h'],['../group__GlobalDefs.html#gaad15dc7939a0a25b16e4aa161fb41eee',1,'hipFuncCache(): hip_runtime_api.h']]], - ['hipfunccachepreferequal',['hipFuncCachePreferEqual',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a0ddab0e840107634a152033103be44d7',1,'hip_runtime_api.h']]], - ['hipfunccachepreferl1',['hipFuncCachePreferL1',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a636a3c140db6b9d4a8bf7d5a61c398c5',1,'hip_runtime_api.h']]], - ['hipfunccacheprefernone',['hipFuncCachePreferNone',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a0813fbaa008ce1231ff9fed3911eb3af',1,'hip_runtime_api.h']]], - ['hipfunccacheprefershared',['hipFuncCachePreferShared',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a9b34337dfbadba25ed2aa270bbcabc43',1,'hip_runtime_api.h']]], - ['hipfuncsetcacheconfig',['hipFuncSetCacheConfig',['../group__Device.html#gadd94a910c2b840833cc325b1e5425702',1,'hipFuncSetCacheConfig(hipFuncCache config): hip_hcc.cpp'],['../group__Device.html#gadd94a910c2b840833cc325b1e5425702',1,'hipFuncSetCacheConfig(hipFuncCache cacheConfig): hip_hcc.cpp']]], - ['hipgetdevice',['hipGetDevice',['../group__Device.html#gaffc83567f2df3bbe2d37a19872d60f24',1,'hipGetDevice(int *device): hip_hcc.cpp'],['../group__Device.html#gaffc83567f2df3bbe2d37a19872d60f24',1,'hipGetDevice(int *device): hip_hcc.cpp']]], - ['hipgetdevicecount',['hipGetDeviceCount',['../group__Device.html#ga8555d5c76d88c50ddbf54ae70b568394',1,'hipGetDeviceCount(int *count): hip_hcc.cpp'],['../group__Device.html#ga8555d5c76d88c50ddbf54ae70b568394',1,'hipGetDeviceCount(int *count): hip_hcc.cpp']]], - ['hipgeterrorname',['hipGetErrorName',['../group__Error.html#ga88c474d77635523dbf6ca67be7b56999',1,'hipGetErrorName(hipError_t hip_error): hip_hcc.cpp'],['../group__Error.html#ga88c474d77635523dbf6ca67be7b56999',1,'hipGetErrorName(hipError_t hip_error): hip_hcc.cpp']]], - ['hipgeterrorstring',['hipGetErrorString',['../group__Error.html#ga5959779a654bbc98ffe6d36ab536740a',1,'hipGetErrorString(hipError_t hip_error): hip_hcc.cpp'],['../group__Error.html#ga5959779a654bbc98ffe6d36ab536740a',1,'hipGetErrorString(hipError_t hip_error): hip_hcc.cpp']]], - ['hipgetlasterror',['hipGetLastError',['../group__Error.html#ga533daeb9114d7fc2db8d867adf9e419b',1,'hipGetLastError(void): hip_hcc.cpp'],['../group__Error.html#ga533daeb9114d7fc2db8d867adf9e419b',1,'hipGetLastError(): hip_hcc.cpp']]], - ['hiphccgetaccelerator',['hipHccGetAccelerator',['../group__HCC__Specific.html#ga0d24b3157fd1b16d38672bb157ec4cd4',1,'hipHccGetAccelerator(int deviceId, hc::accelerator *acc): hip_hcc.cpp'],['../group__HCC__Specific.html#ga0d24b3157fd1b16d38672bb157ec4cd4',1,'hipHccGetAccelerator(int deviceId, hc::accelerator *acc): hip_hcc.cpp']]], - ['hiphccgetacceleratorview',['hipHccGetAcceleratorView',['../group__HCC__Specific.html#ga1a7087ea9c3c3323270d7cce73650b44',1,'hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av): hip_hcc.cpp'],['../group__HCC__Specific.html#ga1a7087ea9c3c3323270d7cce73650b44',1,'hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av): hip_hcc.cpp']]], - ['hipmalloc',['hipMalloc',['../group__Memory.html#ga4c6fcfe80010069d2792780d00dcead2',1,'hipMalloc(void **ptr, size_t size): hip_hcc.cpp'],['../group__Memory.html#ga4c6fcfe80010069d2792780d00dcead2',1,'hipMalloc(void **ptr, size_t sizeBytes): hip_hcc.cpp']]], - ['hipmallochost',['hipMallocHost',['../group__Memory.html#ga66399e729223ff5b66ffc16297c0710e',1,'hipMallocHost(void **ptr, size_t size): hip_hcc.cpp'],['../group__Memory.html#ga66399e729223ff5b66ffc16297c0710e',1,'hipMallocHost(void **ptr, size_t sizeBytes): hip_hcc.cpp']]], - ['hipmemcpy',['hipMemcpy',['../group__Memory.html#gac1a055d288302edd641c6d7416858e1e',1,'hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind): hip_hcc.cpp'],['../group__Memory.html#gac1a055d288302edd641c6d7416858e1e',1,'hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind): hip_hcc.cpp']]], - ['hipmemcpyasync',['hipMemcpyAsync',['../group__Memory.html#ga8ad5a0b13458917e1b9437732b21af54',1,'hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0): hip_hcc.cpp'],['../group__Memory.html#ga8ad5a0b13458917e1b9437732b21af54',1,'hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream): hip_hcc.cpp']]], - ['hipmemcpydefault',['hipMemcpyDefault',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18a4e37107e416f79a2edf2b6534163c823',1,'hip_runtime_api.h']]], - ['hipmemcpydevicetodevice',['hipMemcpyDeviceToDevice',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18abd05a09d3105e0ce25b34dd91cf83f88',1,'hip_runtime_api.h']]], - ['hipmemcpydevicetohost',['hipMemcpyDeviceToHost',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18aba2505e9ce1e5382f17730bc670917d1',1,'hip_runtime_api.h']]], - ['hipmemcpyhosttodevice',['hipMemcpyHostToDevice',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18aff32175ecb0c7113200286eff8211008',1,'hip_runtime_api.h']]], - ['hipmemcpyhosttohost',['hipMemcpyHostToHost',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18a9d66b705aa85a9c83f0f533cef70d0af',1,'hip_runtime_api.h']]], - ['hipmemcpykind',['hipMemcpyKind',['../group__GlobalDefs.html#ga232e222db36b1fc672ba98054d036a18',1,'hip_runtime_api.h']]], - ['hipmemcpypeerasync',['hipMemcpyPeerAsync',['../group__PeerToPeer.html#gab6211c18ca1e23252ef080cd6be855ca',1,'hipMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream=0): hip_hcc.cpp'],['../group__PeerToPeer.html#gab6211c18ca1e23252ef080cd6be855ca',1,'hipMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream): hip_hcc.cpp']]], - ['hippeekatlasterror',['hipPeekAtLastError',['../group__Error.html#ga1dd660bc739f7e13edd34615660f0148',1,'hip_runtime_api.h']]], - ['hipsetdevice',['hipSetDevice',['../group__Device.html#ga8ec0b093af0adadc7fe98bf33fa21620',1,'hipSetDevice(int device): hip_hcc.cpp'],['../group__Device.html#ga8ec0b093af0adadc7fe98bf33fa21620',1,'hipSetDevice(int device): hip_hcc.cpp']]], - ['hipsharedmembanksizedefault',['hipSharedMemBankSizeDefault',['../group__GlobalDefs.html#gga2e17b71d94ac350f2ccd914fd49d104eaf5b325c9b7bde878913f768eaba5014d',1,'hip_runtime_api.h']]], - ['hipsharedmembanksizeeightbyte',['hipSharedMemBankSizeEightByte',['../group__GlobalDefs.html#gga2e17b71d94ac350f2ccd914fd49d104ea64518b4f5a25f536c883330167e79258',1,'hip_runtime_api.h']]], - ['hipsharedmembanksizefourbyte',['hipSharedMemBankSizeFourByte',['../group__GlobalDefs.html#gga2e17b71d94ac350f2ccd914fd49d104ea0a95a6e0c33106c42d66ab9476ff954a',1,'hip_runtime_api.h']]], - ['hipsharedmemconfig',['hipSharedMemConfig',['../group__GlobalDefs.html#ga2e17b71d94ac350f2ccd914fd49d104e',1,'hipSharedMemConfig(): hip_runtime_api.h'],['../group__GlobalDefs.html#ga6b1ca424fa26a5fb718937d662eaee7f',1,'hipSharedMemConfig(): hip_runtime_api.h']]], - ['hipstreamcreatewithflags',['hipStreamCreateWithFlags',['../group__Stream.html#gaf2382e3cc6632332a8983a0f58e43494',1,'hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags): hip_hcc.cpp'],['../group__Stream.html#gaf2382e3cc6632332a8983a0f58e43494',1,'hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags): hip_hcc.cpp']]], - ['hipstreamdefault',['hipStreamDefault',['../group__GlobalDefs.html#ga6df5f70eb976836ab3598cacf0ffcdf9',1,'hip_runtime_api.h']]], - ['hipstreamdestroy',['hipStreamDestroy',['../group__Stream.html#ga3076a3499ed2c7821311006100bb95ec',1,'hipStreamDestroy(hipStream_t stream): hip_hcc.cpp'],['../group__Stream.html#ga3076a3499ed2c7821311006100bb95ec',1,'hipStreamDestroy(hipStream_t stream): hip_hcc.cpp']]], - ['hipstreamgetflags',['hipStreamGetFlags',['../group__Stream.html#ga3249555a26439591b8873f70b39bb116',1,'hipStreamGetFlags(hipStream_t stream, unsigned int *flags): hip_hcc.cpp'],['../group__Stream.html#ga3249555a26439591b8873f70b39bb116',1,'hipStreamGetFlags(hipStream_t stream, unsigned int *flags): hip_hcc.cpp']]], - ['hipstreamnonblocking',['hipStreamNonBlocking',['../group__GlobalDefs.html#gaaba9ae995d9b43b7d1ee70c6fa12c57d',1,'hip_runtime_api.h']]], - ['hipstreamsynchronize',['hipStreamSynchronize',['../group__Stream.html#gabbfb9f573a6ebe8c478605ecb5504a74',1,'hipStreamSynchronize(hipStream_t stream): hip_hcc.cpp'],['../group__Stream.html#gabbfb9f573a6ebe8c478605ecb5504a74',1,'hipStreamSynchronize(hipStream_t stream): hip_hcc.cpp']]], - ['hipstreamwaitevent',['hipStreamWaitEvent',['../group__Stream.html#gacdd84c8f8ef1539c96c57c1d5bcae633',1,'hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags): hip_hcc.cpp'],['../group__Stream.html#gacdd84c8f8ef1539c96c57c1d5bcae633',1,'hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags): hip_hcc.cpp']]], - ['hipsuccess',['hipSuccess',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594aadfbdb847b149723c684ebd764556063',1,'hip_runtime_api.h']]], - ['hipthreadidx_5fx',['hipThreadIdx_x',['../hcc__detail_2hip__runtime_8h.html#a48f5f9da77c5fab1fbcf0205bb347d89',1,'hip_runtime.h']]], - ['heterogeneous_2dcomputing_20interface_20for_20portability_20_28hip_29',['Heterogeneous-computing Interface for Portability (HIP)',['../index.html',1,'']]] + ['global_20enum_20and_20defines',['Global enum and defines',['../group__GlobalDefs.html',1,'']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_7.js b/projects/hip/docs/RuntimeAPI/html/search/all_7.js index 69beb95adf..be2e145b56 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_7.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_7.js @@ -1,6 +1,144 @@ var searchData= [ - ['ihipdevice_5ft',['ihipDevice_t',['../structihipDevice__t.html',1,'']]], - ['ihipevent_5ft',['ihipEvent_t',['../structihipEvent__t.html',1,'']]], - ['ihipstream_5ft',['ihipStream_t',['../structihipStream__t.html',1,'']]] + ['hip_20api',['HIP API',['../group__API.html',1,'']]], + ['has3dgrid',['has3dGrid',['../structhipDeviceArch__t.html#aa5e22d295cce0d9a34ee6e7e7e378c26',1,'hipDeviceArch_t']]], + ['hasdoubles',['hasDoubles',['../structhipDeviceArch__t.html#a2d624e3d85e615b71d1182f8912893b4',1,'hipDeviceArch_t']]], + ['hasdynamicparallelism',['hasDynamicParallelism',['../structhipDeviceArch__t.html#a9114bfc718bf0648b54ff9a319a36b35',1,'hipDeviceArch_t']]], + ['hasfloatatomicadd',['hasFloatAtomicAdd',['../structhipDeviceArch__t.html#a2321d6ef74aac91c044f3289d25b2d41',1,'hipDeviceArch_t']]], + ['hasfunnelshift',['hasFunnelShift',['../structhipDeviceArch__t.html#aaec4f2d983d4602858fae8e9ddeee3ff',1,'hipDeviceArch_t']]], + ['hasglobalfloatatomicexch',['hasGlobalFloatAtomicExch',['../structhipDeviceArch__t.html#a8f213ae9a4729dff1c636ac5de0e2fa2',1,'hipDeviceArch_t']]], + ['hasglobalint32atomics',['hasGlobalInt32Atomics',['../structhipDeviceArch__t.html#a8d00c3ab98869b602c714fe7abe68e93',1,'hipDeviceArch_t']]], + ['hasglobalint64atomics',['hasGlobalInt64Atomics',['../structhipDeviceArch__t.html#ad5aa54dbab22dbcd8cf98f57a96c6636',1,'hipDeviceArch_t']]], + ['hassharedfloatatomicexch',['hasSharedFloatAtomicExch',['../structhipDeviceArch__t.html#aff005558b4edabd27b27f286ac5b2f2b',1,'hipDeviceArch_t']]], + ['hassharedint32atomics',['hasSharedInt32Atomics',['../structhipDeviceArch__t.html#a1596330b1cb9cc73f142aee11b2ab853',1,'hipDeviceArch_t']]], + ['hassharedint64atomics',['hasSharedInt64Atomics',['../structhipDeviceArch__t.html#a00c2b930fcdcad9ea7b54b449db13966',1,'hipDeviceArch_t']]], + ['hassurfacefuncs',['hasSurfaceFuncs',['../structhipDeviceArch__t.html#a9eb2462148686d4c048b69b6e09f835e',1,'hipDeviceArch_t']]], + ['hassyncthreadsext',['hasSyncThreadsExt',['../structhipDeviceArch__t.html#ade6a3b21ad5f344dcd92c52102c274ba',1,'hipDeviceArch_t']]], + ['hasthreadfencesystem',['hasThreadFenceSystem',['../structhipDeviceArch__t.html#ac2818e3b91cba8beb36741e9867bb887',1,'hipDeviceArch_t']]], + ['haswarpballot',['hasWarpBallot',['../structhipDeviceArch__t.html#af1e934a8a5106995bcc256287585564c',1,'hipDeviceArch_t']]], + ['haswarpshuffle',['hasWarpShuffle',['../structhipDeviceArch__t.html#a3d922e8fc97ca1e8ecc39600b138fa2d',1,'hipDeviceArch_t']]], + ['haswarpvote',['hasWarpVote',['../structhipDeviceArch__t.html#a35bde017352eca1d4e0eceb3bf79f274',1,'hipDeviceArch_t']]], + ['hcc_2dspecific_20accessors',['HCC-Specific Accessors',['../group__HCC__Specific.html',1,'']]], + ['hip_20environment_20variables',['HIP Environment Variables',['../group__HIP-ENV.html',1,'']]], + ['hip_5fhcc_2ecpp',['hip_hcc.cpp',['../hip__hcc_8cpp.html',1,'']]], + ['hip_5flaunch_5fblocking',['HIP_LAUNCH_BLOCKING',['../group__HIP-ENV.html#ga8049b329f2663b4572d81e7a9aa8a155',1,'HIP_LAUNCH_BLOCKING(): hip_hcc.cpp'],['../group__HIP-ENV.html#ga8049b329f2663b4572d81e7a9aa8a155',1,'HIP_LAUNCH_BLOCKING(): hip_hcc.cpp']]], + ['hip_5fprint_5fenv',['HIP_PRINT_ENV',['../group__HIP-ENV.html#ga1e1c85dbb250f1acfb484c1be1f3b28a',1,'HIP_PRINT_ENV(): hip_hcc.cpp'],['../group__HIP-ENV.html#ga1e1c85dbb250f1acfb484c1be1f3b28a',1,'HIP_PRINT_ENV(): hip_hcc.cpp']]], + ['hip_5fruntime_2eh',['hip_runtime.h',['../hcc__detail_2hip__runtime_8h.html',1,'']]], + ['hip_5fruntime_5fapi_2eh',['hip_runtime_api.h',['../hcc__detail_2hip__runtime__api_8h.html',1,'']]], + ['hip_5ftexture_2eh',['hip_texture.h',['../hip__texture_8h.html',1,'']]], + ['hip_5ftrace_5fapi',['HIP_TRACE_API',['../group__HIP-ENV.html#gaae9c541f3e25b8f002762337a03fec28',1,'HIP_TRACE_API(): hip_hcc.cpp'],['../group__HIP-ENV.html#gaae9c541f3e25b8f002762337a03fec28',1,'HIP_TRACE_API(): hip_hcc.cpp']]], + ['hip_5fvector_5ftypes_2eh',['hip_vector_types.h',['../hcc__detail_2hip__vector__types_8h.html',1,'']]], + ['hipchannelformatdesc',['hipChannelFormatDesc',['../structhipChannelFormatDesc.html',1,'']]], + ['hipdevicearch_5ft',['hipDeviceArch_t',['../structhipDeviceArch__t.html',1,'']]], + ['hipdeviceattribute_5ft',['hipDeviceAttribute_t',['../group__GlobalDefs.html#gacc0acd7b9bda126c6bb3dfd6e2796d7c',1,'hip_runtime_api.h']]], + ['hipdeviceattributeclockrate',['hipDeviceAttributeClockRate',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca2300e077e020e7967592065561373b00',1,'hip_runtime_api.h']]], + ['hipdeviceattributecomputecapabilitymajor',['hipDeviceAttributeComputeCapabilityMajor',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca2735739cf977b7d303266f6781131e8d',1,'hip_runtime_api.h']]], + ['hipdeviceattributecomputecapabilityminor',['hipDeviceAttributeComputeCapabilityMinor',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca38edc4fcae456e47160d349da3249b85',1,'hip_runtime_api.h']]], + ['hipdeviceattributecomputemode',['hipDeviceAttributeComputeMode',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca4d0369a6ef7bd7890fdcabc16ed3385d',1,'hip_runtime_api.h']]], + ['hipdeviceattributeconcurrentkernels',['hipDeviceAttributeConcurrentKernels',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7cad9f45254d0d048677f560032532d5504',1,'hip_runtime_api.h']]], + ['hipdeviceattributel2cachesize',['hipDeviceAttributeL2CacheSize',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca582ae5a26a7148504878890028e4b64c',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxblockdimx',['hipDeviceAttributeMaxBlockDimX',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7cac1e4ac589db0d8adbbc241e3d0fcd594',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxblockdimy',['hipDeviceAttributeMaxBlockDimY',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca187dbffe12db09a56c0f75c340d879c9',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxblockdimz',['hipDeviceAttributeMaxBlockDimZ',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7caf811f51e03d1ffb025d80ac1da088675',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxgriddimx',['hipDeviceAttributeMaxGridDimX',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca03db8df0e7a9fbdaae683d97e8ac9c87',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxgriddimy',['hipDeviceAttributeMaxGridDimY',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca5b5cc49972679c5ccf62b79425ee99df',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxgriddimz',['hipDeviceAttributeMaxGridDimZ',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca6c206ac083999caf4640e5d91dae24f7',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxregistersperblock',['hipDeviceAttributeMaxRegistersPerBlock',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca82289b170192b6ea742be0efc6f95107',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxsharedmemoryperblock',['hipDeviceAttributeMaxSharedMemoryPerBlock',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca7bca3aa18b26d40eba043ae93e15c7e5',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxsharedmemorypermultiprocessor',['hipDeviceAttributeMaxSharedMemoryPerMultiprocessor',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7cad3e7f3d01533b32e12211172fcf410ba',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxthreadsperblock',['hipDeviceAttributeMaxThreadsPerBlock',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca8327aa23782d9c994bdef33a6d62e02e',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxthreadspermultiprocessor',['hipDeviceAttributeMaxThreadsPerMultiProcessor',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7caddc08922b491eb1f6a583833cbf4e2f0',1,'hip_runtime_api.h']]], + ['hipdeviceattributememoryclockrate',['hipDeviceAttributeMemoryClockRate',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca6b68deafd65f036b30dc8051573eb000',1,'hip_runtime_api.h']]], + ['hipdeviceattributemultiprocessorcount',['hipDeviceAttributeMultiprocessorCount',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca5c1519870733ccf0b83f722678240e5f',1,'hip_runtime_api.h']]], + ['hipdeviceattributepcibusid',['hipDeviceAttributePciBusId',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca572b29c44f1322aa7657fdd784832f88',1,'hip_runtime_api.h']]], + ['hipdeviceattributepcideviceid',['hipDeviceAttributePciDeviceId',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca955d90286e87be9e3528f0b817ab32ff',1,'hip_runtime_api.h']]], + ['hipdeviceattributetotalconstantmemory',['hipDeviceAttributeTotalConstantMemory',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7cac6089ac3a0f9c77cc382fb0eaa73ae9c',1,'hip_runtime_api.h']]], + ['hipdeviceattributewarpsize',['hipDeviceAttributeWarpSize',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7caffd94133e823247a6f1215343232f6ec',1,'hip_runtime_api.h']]], + ['hipdevicecanaccesspeer',['hipDeviceCanAccessPeer',['../group__PeerToPeer.html#gab53a55dbc087ff659918fd04287de3d3',1,'hipDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice): hip_hcc.cpp'],['../group__PeerToPeer.html#gab53a55dbc087ff659918fd04287de3d3',1,'hipDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice): hip_hcc.cpp']]], + ['hipdevicedisablepeeraccess',['hipDeviceDisablePeerAccess',['../group__PeerToPeer.html#ga41e60c01f63597529da1cd77bdd55379',1,'hipDeviceDisablePeerAccess(int peerDevice): hip_hcc.cpp'],['../group__PeerToPeer.html#ga41e60c01f63597529da1cd77bdd55379',1,'hipDeviceDisablePeerAccess(int peerDevice): hip_hcc.cpp']]], + ['hipdeviceenablepeeraccess',['hipDeviceEnablePeerAccess',['../group__PeerToPeer.html#ga098e0d626edbfb69b66d141a5a8b7dc6',1,'hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags): hip_hcc.cpp'],['../group__PeerToPeer.html#ga098e0d626edbfb69b66d141a5a8b7dc6',1,'hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags): hip_hcc.cpp']]], + ['hipdevicegetattribute',['hipDeviceGetAttribute',['../group__Device.html#gac49518ff2b26b98ea2ec9e9268761a24',1,'hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int device): hip_hcc.cpp'],['../group__Device.html#gac49518ff2b26b98ea2ec9e9268761a24',1,'hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int device): hip_hcc.cpp']]], + ['hipdevicegetcacheconfig',['hipDeviceGetCacheConfig',['../group__Device.html#gaeeffa2456c5430400bea75ecd6ad1e68',1,'hipDeviceGetCacheConfig(hipFuncCache *cacheConfig): hip_hcc.cpp'],['../group__Device.html#gaeeffa2456c5430400bea75ecd6ad1e68',1,'hipDeviceGetCacheConfig(hipFuncCache *cacheConfig): hip_hcc.cpp']]], + ['hipdevicegetproperties',['hipDeviceGetProperties',['../group__Device.html#gad9ee6822e3e55431811fb6a00f7a1c10',1,'hipDeviceGetProperties(hipDeviceProp_t *prop, int device): hip_hcc.cpp'],['../group__Device.html#gad9ee6822e3e55431811fb6a00f7a1c10',1,'hipDeviceGetProperties(hipDeviceProp_t *props, int device): hip_hcc.cpp']]], + ['hipdevicegetsharedmemconfig',['hipDeviceGetSharedMemConfig',['../group__Device.html#ga1bb08f774a34a468d969a8a04791c9bb',1,'hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig): hip_hcc.cpp'],['../group__Device.html#ga1bb08f774a34a468d969a8a04791c9bb',1,'hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig): hip_hcc.cpp']]], + ['hipdeviceprop_5ft',['hipDeviceProp_t',['../structhipDeviceProp__t.html',1,'']]], + ['hipdevicereset',['hipDeviceReset',['../group__Device.html#ga8d57161ae56a8edc46eeda447417bf6c',1,'hipDeviceReset(void): hip_hcc.cpp'],['../group__Device.html#ga8d57161ae56a8edc46eeda447417bf6c',1,'hipDeviceReset(void): hip_hcc.cpp']]], + ['hipdevicesetcacheconfig',['hipDeviceSetCacheConfig',['../group__Device.html#gac2b282179f29c4c0ca7b5391242c6a4c',1,'hipDeviceSetCacheConfig(hipFuncCache cacheConfig): hip_hcc.cpp'],['../group__Device.html#gac2b282179f29c4c0ca7b5391242c6a4c',1,'hipDeviceSetCacheConfig(hipFuncCache cacheConfig): hip_hcc.cpp']]], + ['hipdevicesetsharedmemconfig',['hipDeviceSetSharedMemConfig',['../group__Device.html#ga9b1f279084e76691cedfbfadf9c717ee',1,'hipDeviceSetSharedMemConfig(hipSharedMemConfig config): hip_hcc.cpp'],['../group__Device.html#ga9b1f279084e76691cedfbfadf9c717ee',1,'hipDeviceSetSharedMemConfig(hipSharedMemConfig config): hip_hcc.cpp']]], + ['hipdevicesynchronize',['hipDeviceSynchronize',['../group__Device.html#gaefdc2847fb1d6c3fb1354e827a191ebd',1,'hipDeviceSynchronize(void): hip_hcc.cpp'],['../group__Device.html#gaefdc2847fb1d6c3fb1354e827a191ebd',1,'hipDeviceSynchronize(void): hip_hcc.cpp']]], + ['hipdrivergetversion',['hipDriverGetVersion',['../group__Version.html#gaf6c342f52d2a29a0aca5cdd89b4dd47c',1,'hipDriverGetVersion(int *driverVersion): hip_hcc.cpp'],['../group__Version.html#gaf6c342f52d2a29a0aca5cdd89b4dd47c',1,'hipDriverGetVersion(int *driverVersion): hip_hcc.cpp']]], + ['hiperror_5ft',['hipError_t',['../group__GlobalDefs.html#gadf5010f6e140a53ecbdf949e73e87594',1,'hip_runtime_api.h']]], + ['hiperrorinvaliddevice',['hipErrorInvalidDevice',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a07ab9b704ea693c1781a52741c60cd0d',1,'hip_runtime_api.h']]], + ['hiperrorinvalidresourcehandle',['hipErrorInvalidResourceHandle',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a88e525a7c8f35552dfada58e9f2f6d3a',1,'hip_runtime_api.h']]], + ['hiperrorinvalidvalue',['hipErrorInvalidValue',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a1e8215fe1108a508bad3944bce7b4d83',1,'hip_runtime_api.h']]], + ['hiperrormemoryallocation',['hipErrorMemoryAllocation',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a8293288a10109874749afe2562db09f2',1,'hip_runtime_api.h']]], + ['hiperrormemoryfree',['hipErrorMemoryFree',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a813b3f014e2a3932d1f0e3e712cf9d3c',1,'hip_runtime_api.h']]], + ['hiperrornodevice',['hipErrorNoDevice',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594ad4406972c318df36d231310a15131c24',1,'hip_runtime_api.h']]], + ['hiperrornotready',['hipErrorNotReady',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594aa9638063c8746a9d1fda2b2069a0a9f1',1,'hip_runtime_api.h']]], + ['hiperroroutofresources',['hipErrorOutOfResources',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a60c1c080b79bdde9ef5e808f974ac9ed',1,'hip_runtime_api.h']]], + ['hiperrortbd',['hipErrorTbd',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594ab556409e11ddb0c4cf77a2f4fc91ea9e',1,'hip_runtime_api.h']]], + ['hiperrorunknown',['hipErrorUnknown',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594aa74e64c5b2f5fb0d6a92681f5b234073',1,'hip_runtime_api.h']]], + ['hiperrorunknownsymbol',['hipErrorUnknownSymbol',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a45b297e6c3b2029dce1348658421481b',1,'hip_runtime_api.h']]], + ['hipevent_5ft',['hipEvent_t',['../structhipEvent__t.html',1,'']]], + ['hipeventblockingsync',['hipEventBlockingSync',['../group__GlobalDefs.html#gafa1c076a5b991763a98695063f1ea11d',1,'hip_runtime_api.h']]], + ['hipeventcreatewithflags',['hipEventCreateWithFlags',['../group__Event.html#gae86a5acb1b22b61bc9ecb9c28fc71b75',1,'hipEventCreateWithFlags(hipEvent_t *event, unsigned flags): hip_hcc.cpp'],['../group__Event.html#gae86a5acb1b22b61bc9ecb9c28fc71b75',1,'hipEventCreateWithFlags(hipEvent_t *event, unsigned flags): hip_hcc.cpp']]], + ['hipeventdefault',['hipEventDefault',['../group__GlobalDefs.html#ga122a5853359eba97cf047ddd153740f0',1,'hip_runtime_api.h']]], + ['hipeventdestroy',['hipEventDestroy',['../group__Event.html#ga83260357dce0c39e8c6a3c74ec97484c',1,'hipEventDestroy(hipEvent_t event): hip_hcc.cpp'],['../group__Event.html#ga83260357dce0c39e8c6a3c74ec97484c',1,'hipEventDestroy(hipEvent_t event): hip_hcc.cpp']]], + ['hipeventdisabletiming',['hipEventDisableTiming',['../group__GlobalDefs.html#ga3c0f44a85e36a4c67671da6bcdad0351',1,'hip_runtime_api.h']]], + ['hipeventelapsedtime',['hipEventElapsedTime',['../group__Event.html#gad4128b815cb475c8e13c7e66ff6250b7',1,'hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop): hip_hcc.cpp'],['../group__Event.html#gad4128b815cb475c8e13c7e66ff6250b7',1,'hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop): hip_hcc.cpp']]], + ['hipeventinterprocess',['hipEventInterprocess',['../group__GlobalDefs.html#ga0f01d74059baa704e42aeff8222166bb',1,'hip_runtime_api.h']]], + ['hipeventquery',['hipEventQuery',['../group__Event.html#ga5d12d7b798b5ceb5932d1ac21f5ac776',1,'hipEventQuery(hipEvent_t event): hip_hcc.cpp'],['../group__Event.html#ga5d12d7b798b5ceb5932d1ac21f5ac776',1,'hipEventQuery(hipEvent_t event): hip_hcc.cpp']]], + ['hipeventrecord',['hipEventRecord',['../group__Event.html#gace88ebd8c7ec42a6c2cebda2e8b0cb38',1,'hipEventRecord(hipEvent_t event, hipStream_t stream=NULL): hip_hcc.cpp'],['../group__Event.html#gace88ebd8c7ec42a6c2cebda2e8b0cb38',1,'hipEventRecord(hipEvent_t event, hipStream_t stream): hip_hcc.cpp']]], + ['hipeventsynchronize',['hipEventSynchronize',['../group__Event.html#ga1f72d98ba5d6f7dc3da54e0c41fe38b1',1,'hipEventSynchronize(hipEvent_t event): hip_hcc.cpp'],['../group__Event.html#ga1f72d98ba5d6f7dc3da54e0c41fe38b1',1,'hipEventSynchronize(hipEvent_t event): hip_hcc.cpp']]], + ['hipfiltermodepoint',['hipFilterModePoint',['../hip__texture_8h.html#aa2f0b6002b81d0a43a808cb880bb21e6a56ede038ab7c805ec4b5b61d2b678dfc',1,'hip_texture.h']]], + ['hipfree',['hipFree',['../group__Memory.html#ga740d08da65cae1441ba32f8fedb863d1',1,'hipFree(void *ptr): hip_hcc.cpp'],['../group__Memory.html#ga740d08da65cae1441ba32f8fedb863d1',1,'hipFree(void *ptr): hip_hcc.cpp']]], + ['hipfreehost',['hipFreeHost',['../group__Memory.html#ga28d7d92836116dfadeb62e416ee887d3',1,'hipFreeHost(void *ptr): hip_hcc.cpp'],['../group__Memory.html#ga28d7d92836116dfadeb62e416ee887d3',1,'hipFreeHost(void *ptr): hip_hcc.cpp']]], + ['hipfunccache',['hipFuncCache',['../group__GlobalDefs.html#gac7e4bfd88340fc06642136c839a3d822',1,'hipFuncCache(): hip_runtime_api.h'],['../group__GlobalDefs.html#gaad15dc7939a0a25b16e4aa161fb41eee',1,'hipFuncCache(): hip_runtime_api.h']]], + ['hipfunccachepreferequal',['hipFuncCachePreferEqual',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a0ddab0e840107634a152033103be44d7',1,'hip_runtime_api.h']]], + ['hipfunccachepreferl1',['hipFuncCachePreferL1',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a636a3c140db6b9d4a8bf7d5a61c398c5',1,'hip_runtime_api.h']]], + ['hipfunccacheprefernone',['hipFuncCachePreferNone',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a0813fbaa008ce1231ff9fed3911eb3af',1,'hip_runtime_api.h']]], + ['hipfunccacheprefershared',['hipFuncCachePreferShared',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a9b34337dfbadba25ed2aa270bbcabc43',1,'hip_runtime_api.h']]], + ['hipfuncsetcacheconfig',['hipFuncSetCacheConfig',['../group__Device.html#gadd94a910c2b840833cc325b1e5425702',1,'hipFuncSetCacheConfig(hipFuncCache config): hip_hcc.cpp'],['../group__Device.html#gadd94a910c2b840833cc325b1e5425702',1,'hipFuncSetCacheConfig(hipFuncCache cacheConfig): hip_hcc.cpp']]], + ['hipgetdevice',['hipGetDevice',['../group__Device.html#gaffc83567f2df3bbe2d37a19872d60f24',1,'hipGetDevice(int *device): hip_hcc.cpp'],['../group__Device.html#gaffc83567f2df3bbe2d37a19872d60f24',1,'hipGetDevice(int *device): hip_hcc.cpp']]], + ['hipgetdevicecount',['hipGetDeviceCount',['../group__Device.html#ga8555d5c76d88c50ddbf54ae70b568394',1,'hipGetDeviceCount(int *count): hip_hcc.cpp'],['../group__Device.html#ga8555d5c76d88c50ddbf54ae70b568394',1,'hipGetDeviceCount(int *count): hip_hcc.cpp']]], + ['hipgeterrorname',['hipGetErrorName',['../group__Error.html#ga88c474d77635523dbf6ca67be7b56999',1,'hipGetErrorName(hipError_t hip_error): hip_hcc.cpp'],['../group__Error.html#ga88c474d77635523dbf6ca67be7b56999',1,'hipGetErrorName(hipError_t hip_error): hip_hcc.cpp']]], + ['hipgeterrorstring',['hipGetErrorString',['../group__Error.html#ga5959779a654bbc98ffe6d36ab536740a',1,'hipGetErrorString(hipError_t hip_error): hip_hcc.cpp'],['../group__Error.html#ga5959779a654bbc98ffe6d36ab536740a',1,'hipGetErrorString(hipError_t hip_error): hip_hcc.cpp']]], + ['hipgetlasterror',['hipGetLastError',['../group__Error.html#ga533daeb9114d7fc2db8d867adf9e419b',1,'hipGetLastError(void): hip_hcc.cpp'],['../group__Error.html#ga533daeb9114d7fc2db8d867adf9e419b',1,'hipGetLastError(): hip_hcc.cpp']]], + ['hiphccgetaccelerator',['hipHccGetAccelerator',['../group__HCC__Specific.html#ga0d24b3157fd1b16d38672bb157ec4cd4',1,'hipHccGetAccelerator(int deviceId, hc::accelerator *acc): hip_hcc.cpp'],['../group__HCC__Specific.html#ga0d24b3157fd1b16d38672bb157ec4cd4',1,'hipHccGetAccelerator(int deviceId, hc::accelerator *acc): hip_hcc.cpp']]], + ['hiphccgetacceleratorview',['hipHccGetAcceleratorView',['../group__HCC__Specific.html#ga1a7087ea9c3c3323270d7cce73650b44',1,'hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av): hip_hcc.cpp'],['../group__HCC__Specific.html#ga1a7087ea9c3c3323270d7cce73650b44',1,'hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av): hip_hcc.cpp']]], + ['hipmalloc',['hipMalloc',['../group__Memory.html#ga4c6fcfe80010069d2792780d00dcead2',1,'hipMalloc(void **ptr, size_t size): hip_hcc.cpp'],['../group__Memory.html#ga4c6fcfe80010069d2792780d00dcead2',1,'hipMalloc(void **ptr, size_t sizeBytes): hip_hcc.cpp']]], + ['hipmallochost',['hipMallocHost',['../group__Memory.html#ga66399e729223ff5b66ffc16297c0710e',1,'hipMallocHost(void **ptr, size_t size): hip_hcc.cpp'],['../group__Memory.html#ga66399e729223ff5b66ffc16297c0710e',1,'hipMallocHost(void **ptr, size_t sizeBytes): hip_hcc.cpp']]], + ['hipmemcpy',['hipMemcpy',['../group__Memory.html#gac1a055d288302edd641c6d7416858e1e',1,'hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind): hip_hcc.cpp'],['../group__Memory.html#gac1a055d288302edd641c6d7416858e1e',1,'hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind): hip_hcc.cpp']]], + ['hipmemcpyasync',['hipMemcpyAsync',['../group__Memory.html#ga8ad5a0b13458917e1b9437732b21af54',1,'hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0): hip_hcc.cpp'],['../group__Memory.html#ga8ad5a0b13458917e1b9437732b21af54',1,'hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream): hip_hcc.cpp']]], + ['hipmemcpydefault',['hipMemcpyDefault',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18a4e37107e416f79a2edf2b6534163c823',1,'hip_runtime_api.h']]], + ['hipmemcpydevicetodevice',['hipMemcpyDeviceToDevice',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18abd05a09d3105e0ce25b34dd91cf83f88',1,'hip_runtime_api.h']]], + ['hipmemcpydevicetohost',['hipMemcpyDeviceToHost',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18aba2505e9ce1e5382f17730bc670917d1',1,'hip_runtime_api.h']]], + ['hipmemcpyhosttodevice',['hipMemcpyHostToDevice',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18aff32175ecb0c7113200286eff8211008',1,'hip_runtime_api.h']]], + ['hipmemcpyhosttohost',['hipMemcpyHostToHost',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18a9d66b705aa85a9c83f0f533cef70d0af',1,'hip_runtime_api.h']]], + ['hipmemcpykind',['hipMemcpyKind',['../group__GlobalDefs.html#ga232e222db36b1fc672ba98054d036a18',1,'hip_runtime_api.h']]], + ['hipmemcpypeer',['hipMemcpyPeer',['../group__PeerToPeer.html#ga72ae9e7f498ab5684580892a5d7d8e2d',1,'hipMemcpyPeer(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes): hip_hcc.cpp'],['../group__PeerToPeer.html#ga72ae9e7f498ab5684580892a5d7d8e2d',1,'hipMemcpyPeer(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes): hip_hcc.cpp']]], + ['hipmemcpypeerasync',['hipMemcpyPeerAsync',['../group__PeerToPeer.html#gab6211c18ca1e23252ef080cd6be855ca',1,'hipMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream=0): hip_hcc.cpp'],['../group__PeerToPeer.html#gab6211c18ca1e23252ef080cd6be855ca',1,'hipMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream): hip_hcc.cpp']]], + ['hipmemcpytosymbol',['hipMemcpyToSymbol',['../group__Memory.html#ga131ac5c1ba04e186112491cb9bf964bc',1,'hipMemcpyToSymbol(const char *symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind): hip_hcc.cpp'],['../group__Memory.html#ga131ac5c1ba04e186112491cb9bf964bc',1,'hipMemcpyToSymbol(const char *symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind): hip_hcc.cpp']]], + ['hipmemset',['hipMemset',['../group__Memory.html#gac7441e74affcce4b8b69dba996c5ebc4',1,'hipMemset(void *dst, int value, size_t sizeBytes): hip_hcc.cpp'],['../group__Memory.html#gac7441e74affcce4b8b69dba996c5ebc4',1,'hipMemset(void *dst, int value, size_t sizeBytes): hip_hcc.cpp']]], + ['hipmemsetasync',['hipMemsetAsync',['../group__Memory.html#gaee4ed665ce0a60c661a809c175320a0c',1,'hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t=0): hip_hcc.cpp'],['../group__Memory.html#gaee4ed665ce0a60c661a809c175320a0c',1,'hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream): hip_hcc.cpp']]], + ['hippeekatlasterror',['hipPeekAtLastError',['../group__Error.html#ga1dd660bc739f7e13edd34615660f0148',1,'hip_runtime_api.h']]], + ['hipreadmodeelementtype',['hipReadModeElementType',['../hip__texture_8h.html#a442e950774f7306dc33692e358c92c94a829645801202174d052d667ffa4e1b8d',1,'hip_texture.h']]], + ['hipsetdevice',['hipSetDevice',['../group__Device.html#ga8ec0b093af0adadc7fe98bf33fa21620',1,'hipSetDevice(int device): hip_hcc.cpp'],['../group__Device.html#ga8ec0b093af0adadc7fe98bf33fa21620',1,'hipSetDevice(int device): hip_hcc.cpp']]], + ['hipsharedmembanksizedefault',['hipSharedMemBankSizeDefault',['../group__GlobalDefs.html#gga2e17b71d94ac350f2ccd914fd49d104eaf5b325c9b7bde878913f768eaba5014d',1,'hip_runtime_api.h']]], + ['hipsharedmembanksizeeightbyte',['hipSharedMemBankSizeEightByte',['../group__GlobalDefs.html#gga2e17b71d94ac350f2ccd914fd49d104ea64518b4f5a25f536c883330167e79258',1,'hip_runtime_api.h']]], + ['hipsharedmembanksizefourbyte',['hipSharedMemBankSizeFourByte',['../group__GlobalDefs.html#gga2e17b71d94ac350f2ccd914fd49d104ea0a95a6e0c33106c42d66ab9476ff954a',1,'hip_runtime_api.h']]], + ['hipsharedmemconfig',['hipSharedMemConfig',['../group__GlobalDefs.html#ga2e17b71d94ac350f2ccd914fd49d104e',1,'hipSharedMemConfig(): hip_runtime_api.h'],['../group__GlobalDefs.html#ga6b1ca424fa26a5fb718937d662eaee7f',1,'hipSharedMemConfig(): hip_runtime_api.h']]], + ['hipstreamcreatewithflags',['hipStreamCreateWithFlags',['../group__Stream.html#gaf2382e3cc6632332a8983a0f58e43494',1,'hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags): hip_hcc.cpp'],['../group__Stream.html#gaf2382e3cc6632332a8983a0f58e43494',1,'hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags): hip_hcc.cpp']]], + ['hipstreamdefault',['hipStreamDefault',['../group__GlobalDefs.html#ga6df5f70eb976836ab3598cacf0ffcdf9',1,'hip_runtime_api.h']]], + ['hipstreamdestroy',['hipStreamDestroy',['../group__Stream.html#ga3076a3499ed2c7821311006100bb95ec',1,'hipStreamDestroy(hipStream_t stream): hip_hcc.cpp'],['../group__Stream.html#ga3076a3499ed2c7821311006100bb95ec',1,'hipStreamDestroy(hipStream_t stream): hip_hcc.cpp']]], + ['hipstreamgetflags',['hipStreamGetFlags',['../group__Stream.html#ga3249555a26439591b8873f70b39bb116',1,'hipStreamGetFlags(hipStream_t stream, unsigned int *flags): hip_hcc.cpp'],['../group__Stream.html#ga3249555a26439591b8873f70b39bb116',1,'hipStreamGetFlags(hipStream_t stream, unsigned int *flags): hip_hcc.cpp']]], + ['hipstreamnonblocking',['hipStreamNonBlocking',['../group__GlobalDefs.html#gaaba9ae995d9b43b7d1ee70c6fa12c57d',1,'hip_runtime_api.h']]], + ['hipstreamsynchronize',['hipStreamSynchronize',['../group__Stream.html#gabbfb9f573a6ebe8c478605ecb5504a74',1,'hipStreamSynchronize(hipStream_t stream): hip_hcc.cpp'],['../group__Stream.html#gabbfb9f573a6ebe8c478605ecb5504a74',1,'hipStreamSynchronize(hipStream_t stream): hip_hcc.cpp']]], + ['hipstreamwaitevent',['hipStreamWaitEvent',['../group__Stream.html#gacdd84c8f8ef1539c96c57c1d5bcae633',1,'hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags): hip_hcc.cpp'],['../group__Stream.html#gacdd84c8f8ef1539c96c57c1d5bcae633',1,'hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags): hip_hcc.cpp']]], + ['hipsuccess',['hipSuccess',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594aadfbdb847b149723c684ebd764556063',1,'hip_runtime_api.h']]], + ['hiptexturefiltermode',['hipTextureFilterMode',['../hip__texture_8h.html#aa2f0b6002b81d0a43a808cb880bb21e6',1,'hip_texture.h']]], + ['hiptexturereadmode',['hipTextureReadMode',['../hip__texture_8h.html#a442e950774f7306dc33692e358c92c94',1,'hip_texture.h']]], + ['hipthreadidx_5fx',['hipThreadIdx_x',['../hcc__detail_2hip__runtime_8h.html#a48f5f9da77c5fab1fbcf0205bb347d89',1,'hip_runtime.h']]], + ['host_5fdefines_2eh',['host_defines.h',['../host__defines_8h.html',1,'']]], + ['heterogeneous_2dcomputing_20interface_20for_20portability_20_28hip_29',['Heterogeneous-computing Interface for Portability (HIP)',['../index.html',1,'']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_8.js b/projects/hip/docs/RuntimeAPI/html/search/all_8.js index 41a7c59602..69beb95adf 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_8.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_8.js @@ -1,4 +1,6 @@ var searchData= [ - ['l2cachesize',['l2CacheSize',['../structhipDeviceProp__t.html#a24404decccc16833973c803ced6f3a51',1,'hipDeviceProp_t']]] + ['ihipdevice_5ft',['ihipDevice_t',['../structihipDevice__t.html',1,'']]], + ['ihipevent_5ft',['ihipEvent_t',['../structihipEvent__t.html',1,'']]], + ['ihipstream_5ft',['ihipStream_t',['../structihipStream__t.html',1,'']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_9.js b/projects/hip/docs/RuntimeAPI/html/search/all_9.js index fd7cb073be..41a7c59602 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_9.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_9.js @@ -1,12 +1,4 @@ var searchData= [ - ['major',['major',['../structhipDeviceProp__t.html#aec9e4173c2e34cc232300c415dbd5e4f',1,'hipDeviceProp_t']]], - ['maxgridsize',['maxGridSize',['../structhipDeviceProp__t.html#ae529c23929f592120081fed31d877a55',1,'hipDeviceProp_t']]], - ['maxthreadsdim',['maxThreadsDim',['../structhipDeviceProp__t.html#a8ebba6fc12f80c9a9cf9b9193f0da465',1,'hipDeviceProp_t']]], - ['maxthreadsperblock',['maxThreadsPerBlock',['../structhipDeviceProp__t.html#af971cf1ca3ec1f68ad09036c0cc672e0',1,'hipDeviceProp_t']]], - ['maxthreadspermultiprocessor',['maxThreadsPerMultiProcessor',['../structhipDeviceProp__t.html#a23a39f4fd795addb3b125e9c3f6295ea',1,'hipDeviceProp_t']]], - ['memory_20management',['Memory Management',['../group__Memory.html',1,'']]], - ['minor',['minor',['../structhipDeviceProp__t.html#abb51208e2509a7a1d107f0da69108938',1,'hipDeviceProp_t']]], - ['multiprocessorcount',['multiProcessorCount',['../structhipDeviceProp__t.html#add8d9d2ad52aece9fd1dbe25c18d9d57',1,'hipDeviceProp_t']]], - ['management',['Management',['../group__Version.html',1,'']]] + ['l2cachesize',['l2CacheSize',['../structhipDeviceProp__t.html#a24404decccc16833973c803ced6f3a51',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_a.js b/projects/hip/docs/RuntimeAPI/html/search/all_a.js index 124bf0ddb8..ea8ffca597 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_a.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_a.js @@ -1,4 +1,14 @@ var searchData= [ - ['name',['name',['../structhipDeviceProp__t.html#a5b44bf8fa46faefcde989942b1d11a5e',1,'hipDeviceProp_t']]] + ['major',['major',['../structhipDeviceProp__t.html#aec9e4173c2e34cc232300c415dbd5e4f',1,'hipDeviceProp_t']]], + ['maxgridsize',['maxGridSize',['../structhipDeviceProp__t.html#ae529c23929f592120081fed31d877a55',1,'hipDeviceProp_t']]], + ['maxsharedmemorypermultiprocessor',['maxSharedMemoryPerMultiProcessor',['../structhipDeviceProp__t.html#aa1a32a7f387f6da845db7b228711fce8',1,'hipDeviceProp_t']]], + ['maxthreadsdim',['maxThreadsDim',['../structhipDeviceProp__t.html#a8ebba6fc12f80c9a9cf9b9193f0da465',1,'hipDeviceProp_t']]], + ['maxthreadsperblock',['maxThreadsPerBlock',['../structhipDeviceProp__t.html#af971cf1ca3ec1f68ad09036c0cc672e0',1,'hipDeviceProp_t']]], + ['maxthreadspermultiprocessor',['maxThreadsPerMultiProcessor',['../structhipDeviceProp__t.html#a23a39f4fd795addb3b125e9c3f6295ea',1,'hipDeviceProp_t']]], + ['memory_20management',['Memory Management',['../group__Memory.html',1,'']]], + ['memoryclockrate',['memoryClockRate',['../structhipDeviceProp__t.html#a6db0ab8e7e8cc13c84d7bb7f70226d5e',1,'hipDeviceProp_t']]], + ['minor',['minor',['../structhipDeviceProp__t.html#abb51208e2509a7a1d107f0da69108938',1,'hipDeviceProp_t']]], + ['multiprocessorcount',['multiProcessorCount',['../structhipDeviceProp__t.html#add8d9d2ad52aece9fd1dbe25c18d9d57',1,'hipDeviceProp_t']]], + ['management',['Management',['../group__Version.html',1,'']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_b.js b/projects/hip/docs/RuntimeAPI/html/search/all_b.js index 44ba50e0b7..124bf0ddb8 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_b.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_b.js @@ -1,4 +1,4 @@ var searchData= [ - ['regsperblock',['regsPerBlock',['../structhipDeviceProp__t.html#a73c1c21648a901799ff6bef83c11135b',1,'hipDeviceProp_t']]] + ['name',['name',['../structhipDeviceProp__t.html#a5b44bf8fa46faefcde989942b1d11a5e',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_c.js b/projects/hip/docs/RuntimeAPI/html/search/all_c.js index 559f8252a3..3eaae3688b 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_c.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_c.js @@ -1,5 +1,4 @@ var searchData= [ - ['sharedmemperblock',['sharedMemPerBlock',['../structhipDeviceProp__t.html#a3b9138678a0795c2677eddcfb1c67156',1,'hipDeviceProp_t']]], - ['stream_20management',['Stream Management',['../group__Stream.html',1,'']]] + ['one_5fcomponent_5faccess',['ONE_COMPONENT_ACCESS',['../hcc__detail_2hip__vector__types_8h.html#add5d9c0f058c5a52c2b9165a66035d0e',1,'hip_vector_types.h']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_d.js b/projects/hip/docs/RuntimeAPI/html/search/all_d.js index 9043ae6945..71b6a5df56 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_d.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_d.js @@ -1,7 +1,5 @@ var searchData= [ - ['texture',['texture',['../structtexture.html',1,'texture< T, texType, hipTextureReadMode >'],['../group__Texture.html',1,'(Global Namespace)']]], - ['texturereference',['textureReference',['../structtextureReference.html',1,'']]], - ['totalconstmem',['totalConstMem',['../structhipDeviceProp__t.html#a29880232c56120be3455ce00d5379665',1,'hipDeviceProp_t']]], - ['totalglobalmem',['totalGlobalMem',['../structhipDeviceProp__t.html#acedd6a2d23423441e4bf51c4a1b719f9',1,'hipDeviceProp_t']]] + ['pcibusid',['pciBusID',['../structhipDeviceProp__t.html#a1350f64d49b717ed3a06458f7549ccb0',1,'hipDeviceProp_t']]], + ['pcideviceid',['pciDeviceID',['../structhipDeviceProp__t.html#ae6aa845dc2d540f85098ea30be35f4eb',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_e.js b/projects/hip/docs/RuntimeAPI/html/search/all_e.js index 46a1400a7b..44ba50e0b7 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_e.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_e.js @@ -1,4 +1,4 @@ var searchData= [ - ['warpsize',['warpSize',['../structhipDeviceProp__t.html#af3357d33c004608bf05bc21a352be81b',1,'hipDeviceProp_t']]] + ['regsperblock',['regsPerBlock',['../structhipDeviceProp__t.html#a73c1c21648a901799ff6bef83c11135b',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/all_f.js b/projects/hip/docs/RuntimeAPI/html/search/all_f.js index 250c203caf..559f8252a3 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/all_f.js +++ b/projects/hip/docs/RuntimeAPI/html/search/all_f.js @@ -1,4 +1,5 @@ var searchData= [ - ['x',['x',['../structdim3.html#ac866c05f83a28dac20a153fc65b3b16c',1,'dim3']]] + ['sharedmemperblock',['sharedMemPerBlock',['../structhipDeviceProp__t.html#a3b9138678a0795c2677eddcfb1c67156',1,'hipDeviceProp_t']]], + ['stream_20management',['Stream Management',['../group__Stream.html',1,'']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/defines_0.js b/projects/hip/docs/RuntimeAPI/html/search/defines_0.js index c7c61558ca..00fe08ccff 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/defines_0.js +++ b/projects/hip/docs/RuntimeAPI/html/search/defines_0.js @@ -1,4 +1,4 @@ var searchData= [ - ['hipthreadidx_5fx',['hipThreadIdx_x',['../hcc__detail_2hip__runtime_8h.html#a48f5f9da77c5fab1fbcf0205bb347d89',1,'hip_runtime.h']]] + ['_5f_5fhost_5f_5f',['__host__',['../host__defines_8h.html#a803050db3c78e0db3ea59a0c35499622',1,'host_defines.h']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/defines_1.html b/projects/hip/docs/RuntimeAPI/html/search/defines_1.html new file mode 100644 index 0000000000..9e1d9fa116 --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/search/defines_1.html @@ -0,0 +1,26 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/projects/hip/docs/RuntimeAPI/html/search/defines_1.js b/projects/hip/docs/RuntimeAPI/html/search/defines_1.js new file mode 100644 index 0000000000..c7c61558ca --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/search/defines_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['hipthreadidx_5fx',['hipThreadIdx_x',['../hcc__detail_2hip__runtime_8h.html#a48f5f9da77c5fab1fbcf0205bb347d89',1,'hip_runtime.h']]] +]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/defines_2.html b/projects/hip/docs/RuntimeAPI/html/search/defines_2.html new file mode 100644 index 0000000000..6ef4b980d7 --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/search/defines_2.html @@ -0,0 +1,26 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/projects/hip/docs/RuntimeAPI/html/search/defines_2.js b/projects/hip/docs/RuntimeAPI/html/search/defines_2.js new file mode 100644 index 0000000000..3eaae3688b --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/search/defines_2.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['one_5fcomponent_5faccess',['ONE_COMPONENT_ACCESS',['../hcc__detail_2hip__vector__types_8h.html#add5d9c0f058c5a52c2b9165a66035d0e',1,'hip_vector_types.h']]] +]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/enums_0.js b/projects/hip/docs/RuntimeAPI/html/search/enums_0.js index c47574066e..8258fd4f05 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/enums_0.js +++ b/projects/hip/docs/RuntimeAPI/html/search/enums_0.js @@ -1,7 +1,10 @@ var searchData= [ + ['hipdeviceattribute_5ft',['hipDeviceAttribute_t',['../group__GlobalDefs.html#gacc0acd7b9bda126c6bb3dfd6e2796d7c',1,'hip_runtime_api.h']]], ['hiperror_5ft',['hipError_t',['../group__GlobalDefs.html#gadf5010f6e140a53ecbdf949e73e87594',1,'hip_runtime_api.h']]], ['hipfunccache',['hipFuncCache',['../group__GlobalDefs.html#gac7e4bfd88340fc06642136c839a3d822',1,'hip_runtime_api.h']]], ['hipmemcpykind',['hipMemcpyKind',['../group__GlobalDefs.html#ga232e222db36b1fc672ba98054d036a18',1,'hip_runtime_api.h']]], - ['hipsharedmemconfig',['hipSharedMemConfig',['../group__GlobalDefs.html#ga2e17b71d94ac350f2ccd914fd49d104e',1,'hip_runtime_api.h']]] + ['hipsharedmemconfig',['hipSharedMemConfig',['../group__GlobalDefs.html#ga2e17b71d94ac350f2ccd914fd49d104e',1,'hip_runtime_api.h']]], + ['hiptexturefiltermode',['hipTextureFilterMode',['../hip__texture_8h.html#aa2f0b6002b81d0a43a808cb880bb21e6',1,'hip_texture.h']]], + ['hiptexturereadmode',['hipTextureReadMode',['../hip__texture_8h.html#a442e950774f7306dc33692e358c92c94',1,'hip_texture.h']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/enumvalues_0.js b/projects/hip/docs/RuntimeAPI/html/search/enumvalues_0.js index bb82e15f9b..5fadbd65af 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/enumvalues_0.js +++ b/projects/hip/docs/RuntimeAPI/html/search/enumvalues_0.js @@ -1,5 +1,28 @@ var searchData= [ + ['hipdeviceattributeclockrate',['hipDeviceAttributeClockRate',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca2300e077e020e7967592065561373b00',1,'hip_runtime_api.h']]], + ['hipdeviceattributecomputecapabilitymajor',['hipDeviceAttributeComputeCapabilityMajor',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca2735739cf977b7d303266f6781131e8d',1,'hip_runtime_api.h']]], + ['hipdeviceattributecomputecapabilityminor',['hipDeviceAttributeComputeCapabilityMinor',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca38edc4fcae456e47160d349da3249b85',1,'hip_runtime_api.h']]], + ['hipdeviceattributecomputemode',['hipDeviceAttributeComputeMode',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca4d0369a6ef7bd7890fdcabc16ed3385d',1,'hip_runtime_api.h']]], + ['hipdeviceattributeconcurrentkernels',['hipDeviceAttributeConcurrentKernels',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7cad9f45254d0d048677f560032532d5504',1,'hip_runtime_api.h']]], + ['hipdeviceattributel2cachesize',['hipDeviceAttributeL2CacheSize',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca582ae5a26a7148504878890028e4b64c',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxblockdimx',['hipDeviceAttributeMaxBlockDimX',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7cac1e4ac589db0d8adbbc241e3d0fcd594',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxblockdimy',['hipDeviceAttributeMaxBlockDimY',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca187dbffe12db09a56c0f75c340d879c9',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxblockdimz',['hipDeviceAttributeMaxBlockDimZ',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7caf811f51e03d1ffb025d80ac1da088675',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxgriddimx',['hipDeviceAttributeMaxGridDimX',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca03db8df0e7a9fbdaae683d97e8ac9c87',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxgriddimy',['hipDeviceAttributeMaxGridDimY',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca5b5cc49972679c5ccf62b79425ee99df',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxgriddimz',['hipDeviceAttributeMaxGridDimZ',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca6c206ac083999caf4640e5d91dae24f7',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxregistersperblock',['hipDeviceAttributeMaxRegistersPerBlock',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca82289b170192b6ea742be0efc6f95107',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxsharedmemoryperblock',['hipDeviceAttributeMaxSharedMemoryPerBlock',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca7bca3aa18b26d40eba043ae93e15c7e5',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxsharedmemorypermultiprocessor',['hipDeviceAttributeMaxSharedMemoryPerMultiprocessor',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7cad3e7f3d01533b32e12211172fcf410ba',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxthreadsperblock',['hipDeviceAttributeMaxThreadsPerBlock',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca8327aa23782d9c994bdef33a6d62e02e',1,'hip_runtime_api.h']]], + ['hipdeviceattributemaxthreadspermultiprocessor',['hipDeviceAttributeMaxThreadsPerMultiProcessor',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7caddc08922b491eb1f6a583833cbf4e2f0',1,'hip_runtime_api.h']]], + ['hipdeviceattributememoryclockrate',['hipDeviceAttributeMemoryClockRate',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca6b68deafd65f036b30dc8051573eb000',1,'hip_runtime_api.h']]], + ['hipdeviceattributemultiprocessorcount',['hipDeviceAttributeMultiprocessorCount',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca5c1519870733ccf0b83f722678240e5f',1,'hip_runtime_api.h']]], + ['hipdeviceattributepcibusid',['hipDeviceAttributePciBusId',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca572b29c44f1322aa7657fdd784832f88',1,'hip_runtime_api.h']]], + ['hipdeviceattributepcideviceid',['hipDeviceAttributePciDeviceId',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7ca955d90286e87be9e3528f0b817ab32ff',1,'hip_runtime_api.h']]], + ['hipdeviceattributetotalconstantmemory',['hipDeviceAttributeTotalConstantMemory',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7cac6089ac3a0f9c77cc382fb0eaa73ae9c',1,'hip_runtime_api.h']]], + ['hipdeviceattributewarpsize',['hipDeviceAttributeWarpSize',['../group__GlobalDefs.html#ggacc0acd7b9bda126c6bb3dfd6e2796d7caffd94133e823247a6f1215343232f6ec',1,'hip_runtime_api.h']]], ['hiperrorinvaliddevice',['hipErrorInvalidDevice',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a07ab9b704ea693c1781a52741c60cd0d',1,'hip_runtime_api.h']]], ['hiperrorinvalidresourcehandle',['hipErrorInvalidResourceHandle',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a88e525a7c8f35552dfada58e9f2f6d3a',1,'hip_runtime_api.h']]], ['hiperrorinvalidvalue',['hipErrorInvalidValue',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a1e8215fe1108a508bad3944bce7b4d83',1,'hip_runtime_api.h']]], @@ -11,6 +34,7 @@ var searchData= ['hiperrortbd',['hipErrorTbd',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594ab556409e11ddb0c4cf77a2f4fc91ea9e',1,'hip_runtime_api.h']]], ['hiperrorunknown',['hipErrorUnknown',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594aa74e64c5b2f5fb0d6a92681f5b234073',1,'hip_runtime_api.h']]], ['hiperrorunknownsymbol',['hipErrorUnknownSymbol',['../group__GlobalDefs.html#ggadf5010f6e140a53ecbdf949e73e87594a45b297e6c3b2029dce1348658421481b',1,'hip_runtime_api.h']]], + ['hipfiltermodepoint',['hipFilterModePoint',['../hip__texture_8h.html#aa2f0b6002b81d0a43a808cb880bb21e6a56ede038ab7c805ec4b5b61d2b678dfc',1,'hip_texture.h']]], ['hipfunccachepreferequal',['hipFuncCachePreferEqual',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a0ddab0e840107634a152033103be44d7',1,'hip_runtime_api.h']]], ['hipfunccachepreferl1',['hipFuncCachePreferL1',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a636a3c140db6b9d4a8bf7d5a61c398c5',1,'hip_runtime_api.h']]], ['hipfunccacheprefernone',['hipFuncCachePreferNone',['../group__GlobalDefs.html#ggac7e4bfd88340fc06642136c839a3d822a0813fbaa008ce1231ff9fed3911eb3af',1,'hip_runtime_api.h']]], @@ -20,6 +44,7 @@ var searchData= ['hipmemcpydevicetohost',['hipMemcpyDeviceToHost',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18aba2505e9ce1e5382f17730bc670917d1',1,'hip_runtime_api.h']]], ['hipmemcpyhosttodevice',['hipMemcpyHostToDevice',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18aff32175ecb0c7113200286eff8211008',1,'hip_runtime_api.h']]], ['hipmemcpyhosttohost',['hipMemcpyHostToHost',['../group__GlobalDefs.html#gga232e222db36b1fc672ba98054d036a18a9d66b705aa85a9c83f0f533cef70d0af',1,'hip_runtime_api.h']]], + ['hipreadmodeelementtype',['hipReadModeElementType',['../hip__texture_8h.html#a442e950774f7306dc33692e358c92c94a829645801202174d052d667ffa4e1b8d',1,'hip_texture.h']]], ['hipsharedmembanksizedefault',['hipSharedMemBankSizeDefault',['../group__GlobalDefs.html#gga2e17b71d94ac350f2ccd914fd49d104eaf5b325c9b7bde878913f768eaba5014d',1,'hip_runtime_api.h']]], ['hipsharedmembanksizeeightbyte',['hipSharedMemBankSizeEightByte',['../group__GlobalDefs.html#gga2e17b71d94ac350f2ccd914fd49d104ea64518b4f5a25f536c883330167e79258',1,'hip_runtime_api.h']]], ['hipsharedmembanksizefourbyte',['hipSharedMemBankSizeFourByte',['../group__GlobalDefs.html#gga2e17b71d94ac350f2ccd914fd49d104ea0a95a6e0c33106c42d66ab9476ff954a',1,'hip_runtime_api.h']]], diff --git a/projects/hip/docs/RuntimeAPI/html/search/files_0.js b/projects/hip/docs/RuntimeAPI/html/search/files_0.js index c6c9f7ce28..c60cd7e29f 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/files_0.js +++ b/projects/hip/docs/RuntimeAPI/html/search/files_0.js @@ -1,5 +1,9 @@ var searchData= [ ['hip_5fhcc_2ecpp',['hip_hcc.cpp',['../hip__hcc_8cpp.html',1,'']]], - ['hip_5fruntime_2eh',['hip_runtime.h',['../hcc__detail_2hip__runtime_8h.html',1,'']]] + ['hip_5fruntime_2eh',['hip_runtime.h',['../hcc__detail_2hip__runtime_8h.html',1,'']]], + ['hip_5fruntime_5fapi_2eh',['hip_runtime_api.h',['../hcc__detail_2hip__runtime__api_8h.html',1,'']]], + ['hip_5ftexture_2eh',['hip_texture.h',['../hip__texture_8h.html',1,'']]], + ['hip_5fvector_5ftypes_2eh',['hip_vector_types.h',['../hcc__detail_2hip__vector__types_8h.html',1,'']]], + ['host_5fdefines_2eh',['host_defines.h',['../host__defines_8h.html',1,'']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/functions_0.js b/projects/hip/docs/RuntimeAPI/html/search/functions_0.js index 4b4091e9d3..9e6f4be60d 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/functions_0.js +++ b/projects/hip/docs/RuntimeAPI/html/search/functions_0.js @@ -3,6 +3,7 @@ var searchData= ['hipdevicecanaccesspeer',['hipDeviceCanAccessPeer',['../group__PeerToPeer.html#gab53a55dbc087ff659918fd04287de3d3',1,'hipDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice): hip_hcc.cpp'],['../group__PeerToPeer.html#gab53a55dbc087ff659918fd04287de3d3',1,'hipDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice): hip_hcc.cpp']]], ['hipdevicedisablepeeraccess',['hipDeviceDisablePeerAccess',['../group__PeerToPeer.html#ga41e60c01f63597529da1cd77bdd55379',1,'hipDeviceDisablePeerAccess(int peerDevice): hip_hcc.cpp'],['../group__PeerToPeer.html#ga41e60c01f63597529da1cd77bdd55379',1,'hipDeviceDisablePeerAccess(int peerDevice): hip_hcc.cpp']]], ['hipdeviceenablepeeraccess',['hipDeviceEnablePeerAccess',['../group__PeerToPeer.html#ga098e0d626edbfb69b66d141a5a8b7dc6',1,'hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags): hip_hcc.cpp'],['../group__PeerToPeer.html#ga098e0d626edbfb69b66d141a5a8b7dc6',1,'hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags): hip_hcc.cpp']]], + ['hipdevicegetattribute',['hipDeviceGetAttribute',['../group__Device.html#gac49518ff2b26b98ea2ec9e9268761a24',1,'hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int device): hip_hcc.cpp'],['../group__Device.html#gac49518ff2b26b98ea2ec9e9268761a24',1,'hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int device): hip_hcc.cpp']]], ['hipdevicegetcacheconfig',['hipDeviceGetCacheConfig',['../group__Device.html#gaeeffa2456c5430400bea75ecd6ad1e68',1,'hipDeviceGetCacheConfig(hipFuncCache *cacheConfig): hip_hcc.cpp'],['../group__Device.html#gaeeffa2456c5430400bea75ecd6ad1e68',1,'hipDeviceGetCacheConfig(hipFuncCache *cacheConfig): hip_hcc.cpp']]], ['hipdevicegetproperties',['hipDeviceGetProperties',['../group__Device.html#gad9ee6822e3e55431811fb6a00f7a1c10',1,'hipDeviceGetProperties(hipDeviceProp_t *prop, int device): hip_hcc.cpp'],['../group__Device.html#gad9ee6822e3e55431811fb6a00f7a1c10',1,'hipDeviceGetProperties(hipDeviceProp_t *props, int device): hip_hcc.cpp']]], ['hipdevicegetsharedmemconfig',['hipDeviceGetSharedMemConfig',['../group__Device.html#ga1bb08f774a34a468d969a8a04791c9bb',1,'hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig): hip_hcc.cpp'],['../group__Device.html#ga1bb08f774a34a468d969a8a04791c9bb',1,'hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig): hip_hcc.cpp']]], @@ -31,7 +32,11 @@ var searchData= ['hipmallochost',['hipMallocHost',['../group__Memory.html#ga66399e729223ff5b66ffc16297c0710e',1,'hipMallocHost(void **ptr, size_t size): hip_hcc.cpp'],['../group__Memory.html#ga66399e729223ff5b66ffc16297c0710e',1,'hipMallocHost(void **ptr, size_t sizeBytes): hip_hcc.cpp']]], ['hipmemcpy',['hipMemcpy',['../group__Memory.html#gac1a055d288302edd641c6d7416858e1e',1,'hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind): hip_hcc.cpp'],['../group__Memory.html#gac1a055d288302edd641c6d7416858e1e',1,'hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind): hip_hcc.cpp']]], ['hipmemcpyasync',['hipMemcpyAsync',['../group__Memory.html#ga8ad5a0b13458917e1b9437732b21af54',1,'hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0): hip_hcc.cpp'],['../group__Memory.html#ga8ad5a0b13458917e1b9437732b21af54',1,'hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream): hip_hcc.cpp']]], + ['hipmemcpypeer',['hipMemcpyPeer',['../group__PeerToPeer.html#ga72ae9e7f498ab5684580892a5d7d8e2d',1,'hipMemcpyPeer(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes): hip_hcc.cpp'],['../group__PeerToPeer.html#ga72ae9e7f498ab5684580892a5d7d8e2d',1,'hipMemcpyPeer(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes): hip_hcc.cpp']]], ['hipmemcpypeerasync',['hipMemcpyPeerAsync',['../group__PeerToPeer.html#gab6211c18ca1e23252ef080cd6be855ca',1,'hipMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream=0): hip_hcc.cpp'],['../group__PeerToPeer.html#gab6211c18ca1e23252ef080cd6be855ca',1,'hipMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream): hip_hcc.cpp']]], + ['hipmemcpytosymbol',['hipMemcpyToSymbol',['../group__Memory.html#ga131ac5c1ba04e186112491cb9bf964bc',1,'hipMemcpyToSymbol(const char *symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind): hip_hcc.cpp'],['../group__Memory.html#ga131ac5c1ba04e186112491cb9bf964bc',1,'hipMemcpyToSymbol(const char *symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind): hip_hcc.cpp']]], + ['hipmemset',['hipMemset',['../group__Memory.html#gac7441e74affcce4b8b69dba996c5ebc4',1,'hipMemset(void *dst, int value, size_t sizeBytes): hip_hcc.cpp'],['../group__Memory.html#gac7441e74affcce4b8b69dba996c5ebc4',1,'hipMemset(void *dst, int value, size_t sizeBytes): hip_hcc.cpp']]], + ['hipmemsetasync',['hipMemsetAsync',['../group__Memory.html#gaee4ed665ce0a60c661a809c175320a0c',1,'hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t=0): hip_hcc.cpp'],['../group__Memory.html#gaee4ed665ce0a60c661a809c175320a0c',1,'hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream): hip_hcc.cpp']]], ['hippeekatlasterror',['hipPeekAtLastError',['../group__Error.html#ga1dd660bc739f7e13edd34615660f0148',1,'hip_runtime_api.h']]], ['hipsetdevice',['hipSetDevice',['../group__Device.html#ga8ec0b093af0adadc7fe98bf33fa21620',1,'hipSetDevice(int device): hip_hcc.cpp'],['../group__Device.html#ga8ec0b093af0adadc7fe98bf33fa21620',1,'hipSetDevice(int device): hip_hcc.cpp']]], ['hipstreamcreatewithflags',['hipStreamCreateWithFlags',['../group__Stream.html#gaf2382e3cc6632332a8983a0f58e43494',1,'hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags): hip_hcc.cpp'],['../group__Stream.html#gaf2382e3cc6632332a8983a0f58e43494',1,'hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags): hip_hcc.cpp']]], diff --git a/projects/hip/docs/RuntimeAPI/html/search/search.js b/projects/hip/docs/RuntimeAPI/html/search/search.js index ef361551ef..57684da009 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/search.js +++ b/projects/hip/docs/RuntimeAPI/html/search/search.js @@ -7,15 +7,15 @@ var indexSectionsWithContent = { - 0: "abcdeghilmnrstwxyz", + 0: "_abcdeghilmnoprstwxyz", 1: "dhit", 2: "h", 3: "h", - 4: "achlmnrstwxyz", + 4: "achlmnprstwxyz", 5: "dh", 6: "h", 7: "h", - 8: "h", + 8: "_ho", 9: "cdeghmst", 10: "bh" }; diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_1.js b/projects/hip/docs/RuntimeAPI/html/search/variables_1.js index 1ba67673c4..06748f6da1 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/variables_1.js +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_1.js @@ -2,5 +2,6 @@ var searchData= [ ['clockinstructionrate',['clockInstructionRate',['../structhipDeviceProp__t.html#a6fbf3b08a1a08ae700f1a06265f6666b',1,'hipDeviceProp_t']]], ['clockrate',['clockRate',['../structhipDeviceProp__t.html#a1dd15bee43692b8649dfbdc1adbaaf96',1,'hipDeviceProp_t']]], - ['computemode',['computeMode',['../structhipDeviceProp__t.html#ae7d9216f8583a703359d0b9373823f5d',1,'hipDeviceProp_t']]] + ['computemode',['computeMode',['../structhipDeviceProp__t.html#ae7d9216f8583a703359d0b9373823f5d',1,'hipDeviceProp_t']]], + ['concurrentkernels',['concurrentKernels',['../structhipDeviceProp__t.html#ad8461a28caf9c38c58cf358583b5bee3',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_4.js b/projects/hip/docs/RuntimeAPI/html/search/variables_4.js index 037341d156..12f7d72a4a 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/variables_4.js +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_4.js @@ -2,9 +2,11 @@ var searchData= [ ['major',['major',['../structhipDeviceProp__t.html#aec9e4173c2e34cc232300c415dbd5e4f',1,'hipDeviceProp_t']]], ['maxgridsize',['maxGridSize',['../structhipDeviceProp__t.html#ae529c23929f592120081fed31d877a55',1,'hipDeviceProp_t']]], + ['maxsharedmemorypermultiprocessor',['maxSharedMemoryPerMultiProcessor',['../structhipDeviceProp__t.html#aa1a32a7f387f6da845db7b228711fce8',1,'hipDeviceProp_t']]], ['maxthreadsdim',['maxThreadsDim',['../structhipDeviceProp__t.html#a8ebba6fc12f80c9a9cf9b9193f0da465',1,'hipDeviceProp_t']]], ['maxthreadsperblock',['maxThreadsPerBlock',['../structhipDeviceProp__t.html#af971cf1ca3ec1f68ad09036c0cc672e0',1,'hipDeviceProp_t']]], ['maxthreadspermultiprocessor',['maxThreadsPerMultiProcessor',['../structhipDeviceProp__t.html#a23a39f4fd795addb3b125e9c3f6295ea',1,'hipDeviceProp_t']]], + ['memoryclockrate',['memoryClockRate',['../structhipDeviceProp__t.html#a6db0ab8e7e8cc13c84d7bb7f70226d5e',1,'hipDeviceProp_t']]], ['minor',['minor',['../structhipDeviceProp__t.html#abb51208e2509a7a1d107f0da69108938',1,'hipDeviceProp_t']]], ['multiprocessorcount',['multiProcessorCount',['../structhipDeviceProp__t.html#add8d9d2ad52aece9fd1dbe25c18d9d57',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_6.js b/projects/hip/docs/RuntimeAPI/html/search/variables_6.js index 44ba50e0b7..71b6a5df56 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/variables_6.js +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_6.js @@ -1,4 +1,5 @@ var searchData= [ - ['regsperblock',['regsPerBlock',['../structhipDeviceProp__t.html#a73c1c21648a901799ff6bef83c11135b',1,'hipDeviceProp_t']]] + ['pcibusid',['pciBusID',['../structhipDeviceProp__t.html#a1350f64d49b717ed3a06458f7549ccb0',1,'hipDeviceProp_t']]], + ['pcideviceid',['pciDeviceID',['../structhipDeviceProp__t.html#ae6aa845dc2d540f85098ea30be35f4eb',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_7.js b/projects/hip/docs/RuntimeAPI/html/search/variables_7.js index a582d60c6a..44ba50e0b7 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/variables_7.js +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_7.js @@ -1,4 +1,4 @@ var searchData= [ - ['sharedmemperblock',['sharedMemPerBlock',['../structhipDeviceProp__t.html#a3b9138678a0795c2677eddcfb1c67156',1,'hipDeviceProp_t']]] + ['regsperblock',['regsPerBlock',['../structhipDeviceProp__t.html#a73c1c21648a901799ff6bef83c11135b',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_8.js b/projects/hip/docs/RuntimeAPI/html/search/variables_8.js index b356d6e142..a582d60c6a 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/variables_8.js +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_8.js @@ -1,5 +1,4 @@ var searchData= [ - ['totalconstmem',['totalConstMem',['../structhipDeviceProp__t.html#a29880232c56120be3455ce00d5379665',1,'hipDeviceProp_t']]], - ['totalglobalmem',['totalGlobalMem',['../structhipDeviceProp__t.html#acedd6a2d23423441e4bf51c4a1b719f9',1,'hipDeviceProp_t']]] + ['sharedmemperblock',['sharedMemPerBlock',['../structhipDeviceProp__t.html#a3b9138678a0795c2677eddcfb1c67156',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_9.js b/projects/hip/docs/RuntimeAPI/html/search/variables_9.js index 46a1400a7b..b356d6e142 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/variables_9.js +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_9.js @@ -1,4 +1,5 @@ var searchData= [ - ['warpsize',['warpSize',['../structhipDeviceProp__t.html#af3357d33c004608bf05bc21a352be81b',1,'hipDeviceProp_t']]] + ['totalconstmem',['totalConstMem',['../structhipDeviceProp__t.html#a29880232c56120be3455ce00d5379665',1,'hipDeviceProp_t']]], + ['totalglobalmem',['totalGlobalMem',['../structhipDeviceProp__t.html#acedd6a2d23423441e4bf51c4a1b719f9',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_a.js b/projects/hip/docs/RuntimeAPI/html/search/variables_a.js index 250c203caf..46a1400a7b 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/variables_a.js +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_a.js @@ -1,4 +1,4 @@ var searchData= [ - ['x',['x',['../structdim3.html#ac866c05f83a28dac20a153fc65b3b16c',1,'dim3']]] + ['warpsize',['warpSize',['../structhipDeviceProp__t.html#af3357d33c004608bf05bc21a352be81b',1,'hipDeviceProp_t']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_b.js b/projects/hip/docs/RuntimeAPI/html/search/variables_b.js index 133dd9dc6e..250c203caf 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/variables_b.js +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_b.js @@ -1,4 +1,4 @@ var searchData= [ - ['y',['y',['../structdim3.html#a83e60e072f7e8bdfde6ac05053cbb370',1,'dim3']]] + ['x',['x',['../structdim3.html#ac866c05f83a28dac20a153fc65b3b16c',1,'dim3']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_c.js b/projects/hip/docs/RuntimeAPI/html/search/variables_c.js index e8bf38b99c..133dd9dc6e 100644 --- a/projects/hip/docs/RuntimeAPI/html/search/variables_c.js +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_c.js @@ -1,4 +1,4 @@ var searchData= [ - ['z',['z',['../structdim3.html#a866e38993ecc4e76fd47311236c16b04',1,'dim3']]] + ['y',['y',['../structdim3.html#a83e60e072f7e8bdfde6ac05053cbb370',1,'dim3']]] ]; diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_d.html b/projects/hip/docs/RuntimeAPI/html/search/variables_d.html new file mode 100644 index 0000000000..f47799968f --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_d.html @@ -0,0 +1,26 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/projects/hip/docs/RuntimeAPI/html/search/variables_d.js b/projects/hip/docs/RuntimeAPI/html/search/variables_d.js new file mode 100644 index 0000000000..e8bf38b99c --- /dev/null +++ b/projects/hip/docs/RuntimeAPI/html/search/variables_d.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['z',['z',['../structdim3.html#a866e38993ecc4e76fd47311236c16b04',1,'dim3']]] +]; diff --git a/projects/hip/docs/RuntimeAPI/html/structdim3-members.html b/projects/hip/docs/RuntimeAPI/html/structdim3-members.html index 429ba0bf78..a11da0ba47 100644 --- a/projects/hip/docs/RuntimeAPI/html/structdim3-members.html +++ b/projects/hip/docs/RuntimeAPI/html/structdim3-members.html @@ -97,7 +97,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');

    diff --git a/projects/hip/docs/RuntimeAPI/html/structdim3.html b/projects/hip/docs/RuntimeAPI/html/structdim3.html index e305c9dfdc..9c5a6733d9 100644 --- a/projects/hip/docs/RuntimeAPI/html/structdim3.html +++ b/projects/hip/docs/RuntimeAPI/html/structdim3.html @@ -118,12 +118,12 @@ uint32_t 

    Detailed Description

    Struct for data in 3D


    The documentation for this struct was generated from the following file: diff --git a/projects/hip/docs/RuntimeAPI/html/structhipChannelFormatDesc-members.html b/projects/hip/docs/RuntimeAPI/html/structhipChannelFormatDesc-members.html index 87eaf5aa59..2670d6f1d7 100644 --- a/projects/hip/docs/RuntimeAPI/html/structhipChannelFormatDesc-members.html +++ b/projects/hip/docs/RuntimeAPI/html/structhipChannelFormatDesc-members.html @@ -94,7 +94,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/structhipChannelFormatDesc.html b/projects/hip/docs/RuntimeAPI/html/structhipChannelFormatDesc.html index d292794181..1ac6f4cde8 100644 --- a/projects/hip/docs/RuntimeAPI/html/structhipChannelFormatDesc.html +++ b/projects/hip/docs/RuntimeAPI/html/structhipChannelFormatDesc.html @@ -98,12 +98,12 @@ int _dummy  
    The documentation for this struct was generated from the following file: diff --git a/projects/hip/docs/RuntimeAPI/html/structhipDeviceArch__t-members.html b/projects/hip/docs/RuntimeAPI/html/structhipDeviceArch__t-members.html index 1b606771ad..5d14abe998 100644 --- a/projects/hip/docs/RuntimeAPI/html/structhipDeviceArch__t-members.html +++ b/projects/hip/docs/RuntimeAPI/html/structhipDeviceArch__t-members.html @@ -110,7 +110,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/structhipDeviceArch__t.html b/projects/hip/docs/RuntimeAPI/html/structhipDeviceArch__t.html index 7e28d4ee8e..8bd9d10bc6 100644 --- a/projects/hip/docs/RuntimeAPI/html/structhipDeviceArch__t.html +++ b/projects/hip/docs/RuntimeAPI/html/structhipDeviceArch__t.html @@ -95,59 +95,59 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); Public Attributes unsigned hasGlobalInt32Atomics: 1 - 32-bit integer atomics for global memory
    + 32-bit integer atomics for global memory.
      unsigned hasGlobalFloatAtomicExch: 1 - 32-bit float atomic exch for global memory
    + 32-bit float atomic exch for global memory.
      unsigned hasSharedInt32Atomics: 1 - 32-bit integer atomics for shared memory
    + 32-bit integer atomics for shared memory.
      unsigned hasSharedFloatAtomicExch: 1 - 32-bit float atomic exch for shared memory
    + 32-bit float atomic exch for shared memory.
      unsigned hasFloatAtomicAdd: 1 - 32-bit float atomic add in global and shared memory
    + 32-bit float atomic add in global and shared memory.
      unsigned hasGlobalInt64Atomics: 1 - 64-bit integer atomics for global memory
    + 64-bit integer atomics for global memory.
      unsigned hasSharedInt64Atomics: 1 - 64-bit integer atomics for shared memory
    + 64-bit integer atomics for shared memory.
      unsigned hasDoubles: 1 - double-precision floating point.
    + Double-precision floating point.
      unsigned hasWarpVote: 1 - warp vote instructions (__any, __all)
    + Warp vote instructions (__any, __all).
      unsigned hasWarpBallot: 1 - warp ballot instructions (__ballot)
    + Warp ballot instructions (__ballot).
      unsigned hasWarpShuffle: 1 - warp shuffle operations. (__shfl_*)
    + Warp shuffle operations. (__shfl_*).
      unsigned hasFunnelShift: 1 - funnel two words into one, with shift&mask caps
    + Funnel two words into one with shift&mask caps.
      unsigned hasThreadFenceSystem: 1 - __threadfence_system
    + __threadfence_system.
      unsigned hasSyncThreadsExt: 1 - __syncthreads_count, syncthreads_and, syncthreads_or
    + __syncthreads_count, syncthreads_and, syncthreads_or.
      unsigned hasSurfaceFuncs: 1 @@ -155,20 +155,20 @@ unsigned   unsigned has3dGrid: 1 - Grid and group dims are 3D (rather than 2D)
    + Grid and group dims are 3D (rather than 2D).
      unsigned hasDynamicParallelism: 1 - Dynamic parallellism.
    + Dynamic parallelism.
     
    The documentation for this struct was generated from the following file: diff --git a/projects/hip/docs/RuntimeAPI/html/structhipDeviceProp__t-members.html b/projects/hip/docs/RuntimeAPI/html/structhipDeviceProp__t-members.html index d37974762e..4df537b6cc 100644 --- a/projects/hip/docs/RuntimeAPI/html/structhipDeviceProp__t-members.html +++ b/projects/hip/docs/RuntimeAPI/html/structhipDeviceProp__t-members.html @@ -94,24 +94,29 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); clockInstructionRatehipDeviceProp_t clockRatehipDeviceProp_t computeModehipDeviceProp_t - l2CacheSizehipDeviceProp_t - majorhipDeviceProp_t - maxGridSizehipDeviceProp_t + concurrentKernelshipDeviceProp_t + l2CacheSizehipDeviceProp_t + majorhipDeviceProp_t + maxGridSizehipDeviceProp_t + maxSharedMemoryPerMultiProcessorhipDeviceProp_t maxThreadsDimhipDeviceProp_t maxThreadsPerBlockhipDeviceProp_t maxThreadsPerMultiProcessorhipDeviceProp_t - minorhipDeviceProp_t - multiProcessorCounthipDeviceProp_t - namehipDeviceProp_t - regsPerBlockhipDeviceProp_t - sharedMemPerBlockhipDeviceProp_t - totalConstMemhipDeviceProp_t - totalGlobalMemhipDeviceProp_t - warpSizehipDeviceProp_t + memoryClockRatehipDeviceProp_t + minorhipDeviceProp_t + multiProcessorCounthipDeviceProp_t + namehipDeviceProp_t + pciBusIDhipDeviceProp_t + pciDeviceIDhipDeviceProp_t + regsPerBlockhipDeviceProp_t + sharedMemPerBlockhipDeviceProp_t + totalConstMemhipDeviceProp_t + totalGlobalMemhipDeviceProp_t + warpSizehipDeviceProp_t diff --git a/projects/hip/docs/RuntimeAPI/html/structhipDeviceProp__t.html b/projects/hip/docs/RuntimeAPI/html/structhipDeviceProp__t.html index 09483a9577..06e419e325 100644 --- a/projects/hip/docs/RuntimeAPI/html/structhipDeviceProp__t.html +++ b/projects/hip/docs/RuntimeAPI/html/structhipDeviceProp__t.html @@ -101,39 +101,43 @@ char   size_t totalGlobalMem - Size of global memory region (in bytes)
    + Size of global memory region (in bytes).
      size_t sharedMemPerBlock - Size of shared memory region (in bytes)
    + Size of shared memory region (in bytes).
      int regsPerBlock - registers per block
    + Registers per block.
      int warpSize - warp size
    + Warp size.
      int maxThreadsPerBlock - max work items per work group or workgroup max size
    + Max work items per work group or workgroup max size.
      int maxThreadsDim [3] - max number of threads in each dimension (XYZ) of a block
    + Max number of threads in each dimension (XYZ) of a block.
      int maxGridSize [3] - max grid dimensions (XYZ)
    + Max grid dimensions (XYZ).
      int clockRate - max clock frequency of the multiProcessors, in khz.
    + Max clock frequency of the multiProcessors in khz.
      + +int memoryClockRate + Max memory clock frequency in khz.
    size_t totalConstMem - Size of shared memory region (in bytes)
    + Size of shared memory region (in bytes).
      int major @@ -145,7 +149,7 @@ int   int multiProcessorCount - number of multi-processors (compute units)
    + Number of multi-processors (compute units).
      int l2CacheSize @@ -167,16 +171,32 @@ int hipDeviceArch_t arch  Architectural feature flags. New for HIP.
      + +int concurrentKernels + Device can possibly execute multiple kernels concurrently.
    +  + +int pciBusID + PCI Bus ID.
    +  + +int pciDeviceID + PCI Device ID.
    +  + +size_t maxSharedMemoryPerMultiProcessor + Maximum Shared Memory Per Multiprocessor.

    Detailed Description

    hipDeviceProp


    The documentation for this struct was generated from the following file: diff --git a/projects/hip/docs/RuntimeAPI/html/structhipEvent__t-members.html b/projects/hip/docs/RuntimeAPI/html/structhipEvent__t-members.html index 22a528a157..1f7daca251 100644 --- a/projects/hip/docs/RuntimeAPI/html/structhipEvent__t-members.html +++ b/projects/hip/docs/RuntimeAPI/html/structhipEvent__t-members.html @@ -94,7 +94,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/structhipEvent__t.html b/projects/hip/docs/RuntimeAPI/html/structhipEvent__t.html index e904fd699b..202c20a0bb 100644 --- a/projects/hip/docs/RuntimeAPI/html/structhipEvent__t.html +++ b/projects/hip/docs/RuntimeAPI/html/structhipEvent__t.html @@ -98,12 +98,12 @@ struct ihipEvent_t *   
    The documentation for this struct was generated from the following file: diff --git a/projects/hip/docs/RuntimeAPI/html/structihipDevice__t-members.html b/projects/hip/docs/RuntimeAPI/html/structihipDevice__t-members.html index 5018e17cdd..5bc69024fc 100644 --- a/projects/hip/docs/RuntimeAPI/html/structihipDevice__t-members.html +++ b/projects/hip/docs/RuntimeAPI/html/structihipDevice__t-members.html @@ -102,7 +102,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/structihipDevice__t.html b/projects/hip/docs/RuntimeAPI/html/structihipDevice__t.html index 9b98f8364e..0a91addd72 100644 --- a/projects/hip/docs/RuntimeAPI/html/structihipDevice__t.html +++ b/projects/hip/docs/RuntimeAPI/html/structihipDevice__t.html @@ -126,12 +126,12 @@ unsigned _compute_units 
    The documentation for this struct was generated from the following file: diff --git a/projects/hip/docs/RuntimeAPI/html/structihipEvent__t-members.html b/projects/hip/docs/RuntimeAPI/html/structihipEvent__t-members.html index 0776e7e875..16bad989ee 100644 --- a/projects/hip/docs/RuntimeAPI/html/structihipEvent__t-members.html +++ b/projects/hip/docs/RuntimeAPI/html/structihipEvent__t-members.html @@ -98,7 +98,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/structihipEvent__t.html b/projects/hip/docs/RuntimeAPI/html/structihipEvent__t.html index 9c01e12246..a378a2a2d4 100644 --- a/projects/hip/docs/RuntimeAPI/html/structihipEvent__t.html +++ b/projects/hip/docs/RuntimeAPI/html/structihipEvent__t.html @@ -110,12 +110,12 @@ uint64_t _timestamp 
    The documentation for this struct was generated from the following file: diff --git a/projects/hip/docs/RuntimeAPI/html/structihipStream__t-members.html b/projects/hip/docs/RuntimeAPI/html/structihipStream__t-members.html index d46a3b9656..57df3ae1ba 100644 --- a/projects/hip/docs/RuntimeAPI/html/structihipStream__t-members.html +++ b/projects/hip/docs/RuntimeAPI/html/structihipStream__t-members.html @@ -98,7 +98,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/structihipStream__t.html b/projects/hip/docs/RuntimeAPI/html/structihipStream__t.html index ba671e68af..264d5ffd81 100644 --- a/projects/hip/docs/RuntimeAPI/html/structihipStream__t.html +++ b/projects/hip/docs/RuntimeAPI/html/structihipStream__t.html @@ -114,12 +114,12 @@ ihipCommand_t _last_comman  
    The documentation for this struct was generated from the following file: diff --git a/projects/hip/docs/RuntimeAPI/html/structtexture-members.html b/projects/hip/docs/RuntimeAPI/html/structtexture-members.html index 12b5319739..42ea0039e3 100644 --- a/projects/hip/docs/RuntimeAPI/html/structtexture-members.html +++ b/projects/hip/docs/RuntimeAPI/html/structtexture-members.html @@ -97,7 +97,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/structtexture.html b/projects/hip/docs/RuntimeAPI/html/structtexture.html index bafbc0f8e3..4f8d592be9 100644 --- a/projects/hip/docs/RuntimeAPI/html/structtexture.html +++ b/projects/hip/docs/RuntimeAPI/html/structtexture.html @@ -107,7 +107,7 @@ const T * _dataPtr  - Public Attributes inherited from textureReference -hipTextureFilterMode filterMode +hipTextureFilterMode filterMode   bool normalized @@ -117,12 +117,12 @@ bool normalized 
    The documentation for this struct was generated from the following file: diff --git a/projects/hip/docs/RuntimeAPI/html/structtextureReference-members.html b/projects/hip/docs/RuntimeAPI/html/structtextureReference-members.html index 8b3c890f40..89df171ea7 100644 --- a/projects/hip/docs/RuntimeAPI/html/structtextureReference-members.html +++ b/projects/hip/docs/RuntimeAPI/html/structtextureReference-members.html @@ -96,7 +96,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); diff --git a/projects/hip/docs/RuntimeAPI/html/structtextureReference.html b/projects/hip/docs/RuntimeAPI/html/structtextureReference.html index d5fd3910fc..46cb4d44e7 100644 --- a/projects/hip/docs/RuntimeAPI/html/structtextureReference.html +++ b/projects/hip/docs/RuntimeAPI/html/structtextureReference.html @@ -103,7 +103,7 @@ Inheritance diagram for textureReference:

    Public Attributes

    -hipTextureFilterMode filterMode +hipTextureFilterMode filterMode   bool normalized @@ -113,12 +113,12 @@ bool normalized 
    The documentation for this struct was generated from the following file: diff --git a/projects/hip/docs/markdown/hip_kernel_language.md b/projects/hip/docs/markdown/hip_kernel_language.md index e7a6baa1a9..4d3e72ce65 100644 --- a/projects/hip/docs/markdown/hip_kernel_language.md +++ b/projects/hip/docs/markdown/hip_kernel_language.md @@ -363,28 +363,50 @@ Following is the list of supported double precision mathematical functions. [1] __RETURN_TYPE is dependent on compiler. It is usually 'int' for C compilers and 'bool' for C++ compilers. [↩](#a2) ### Integer Intrinsics -Following is the list of supported integer intrinsics. +Following is the list of supported integer intrinsics. Note that intrinsics are supported on device only. -| **Function** | **Supported on Host** | **Supported on Device** | -| --- | --- | --- | -| unsigned int __brev ( unsigned int x )
    Reverse the bit order of a 32 bit unsigned integer. | ✓ | ✓ | -| unsigned long long int __brevll ( unsigned long long int x )
    Reverse the bit order of a 64 bit unsigned integer. | ✓ | ✓ | -| int __clz ( int x )
    Return the number of consecutive high-order zero bits in a 32 bit integer. | ✓ | ✓ | -| unsigned int __clz(unsigned int x)
    Return the number of consecutive high-order zero bits in 32 bit unsigned integer. | ✓ | ✗ | -| int __clzll ( long long int x )
    Count the number of consecutive high-order zero bits in a 64 bit integer. | ✓ | ✓ | -| unsigned int __clzll(long long int x)
    Return the number of consecutive high-order zero bits in 64 bit signed integer. | ✓ | ✗ | -| unsigned int __ffs(unsigned int x)
    Find the position of least signigicant bit set to 1 in a 32 bit unsigned integer.[1](#f3) | ✓ | ✓| -| unsigned int __ffs(int x)
    Find the position of least signigicant bit set to 1 in a 32 bit signed integer. | ✗ | ✓ | -| unsigned int __ffsll(unsigned long long int x)
    Find the position of least signigicant bit set to 1 in a 64 bit unsigned integer.[1](#f3) | ✓ | ✓ | -| unsigned int __ffsll(long long int x)
    Find the position of least signigicant bit set to 1 in a 64 bit signed integer. | ✗ | ✓ | -| unsigned int __popc ( unsigned int x )
    Count the number of bits that are set to 1 in a 32 bit integer. | ✓ | ✓ | -| int __popcll ( unsigned long long int x )
    Count the number of bits that are set to 1 in a 64 bit integer. | ✓ | ✓ | +| **Function** | +| --- | +| unsigned int __brev ( unsigned int x )
    Reverse the bit order of a 32 bit unsigned integer. | +| unsigned long long int __brevll ( unsigned long long int x )
    Reverse the bit order of a 64 bit unsigned integer. | +| int __clz ( int x )
    Return the number of consecutive high-order zero bits in a 32 bit integer. | +| unsigned int __clz(unsigned int x)
    Return the number of consecutive high-order zero bits in 32 bit unsigned integer. | +| int __clzll ( long long int x )
    Count the number of consecutive high-order zero bits in a 64 bit integer. | +| unsigned int __clzll(long long int x)
    Return the number of consecutive high-order zero bits in 64 bit signed integer. | +| unsigned int __ffs(unsigned int x)
    Find the position of least signigicant bit set to 1 in a 32 bit unsigned integer.[1](#f3) | +| unsigned int __ffs(int x)
    Find the position of least signigicant bit set to 1 in a 32 bit signed integer. | +| unsigned int __ffsll(unsigned long long int x)
    Find the position of least signigicant bit set to 1 in a 64 bit unsigned integer.[1](#f3) | +| unsigned int __ffsll(long long int x)
    Find the position of least signigicant bit set to 1 in a 64 bit signed integer. | +| unsigned int __popc ( unsigned int x )
    Count the number of bits that are set to 1 in a 32 bit integer. | +| int __popcll ( unsigned long long int x )
    Count the number of bits that are set to 1 in a 64 bit integer. | [1] The hcc implementation of __ffs() and __ffsll() contains code to add a constant +1 to produce the ffs result format. For the cases where this overhead is not acceptable and programmer is willing to specialize for the platform, hcc provides hc::__lastbit_u32_u32(unsigned int input) and hc::__lastbit_u32_u64(unsigned long long int input). -The index returned by __lastbit_ instructions starts at 0, while for ffs the index starts at 1. +The index returned by __lastbit_ instructions starts at -1, while for ffs the index starts at 0. +### Floating-point Intrinsics +Following is the list of supported floating-point intrinsics. Note that intrinsics are supported on device only. + +| **Function** | +| --- | +| float __cosf ( float x )
    Calculate the fast approximate cosine of the input argument. | +| float __expf ( float x )
    Calculate the fast approximate base e exponential of the input argument. | +| float __frsqrt_rn ( float x )
    Compute `1 / √x` in round-to-nearest-even mode. | +| float __fsqrt_rd ( float x )
    Compute `√x` in round-down mode. | +| float __fsqrt_rn ( float x )
    Compute `√x` in round-to-nearest-even mode. | +| float __fsqrt_ru ( float x )
    Compute `√x` in round-up mode. | +| float __fsqrt_rz ( float x )
    Compute `√x` in round-towards-zero mode. | +| float __log10f ( float x )
    Calculate the fast approximate base 10 logarithm of the input argument. | +| float __log2f ( float x )
    Calculate the fast approximate base 2 logarithm of the input argument. | +| float __logf ( float x )
    Calculate the fast approximate base e logarithm of the input argument. | +| float __powf ( float x, float y )
    Calculate the fast approximate of xy. | +| float __sinf ( float x )
    Calculate the fast approximate sine of the input argument. | +| float __tanf ( float x )
    Calculate the fast approximate tangent of the input argument. | +| double __dsqrt_rd ( double x )
    Compute `√x` in round-down mode. | +| double __dsqrt_rn ( double x )
    Compute `√x` in round-to-nearest-even mode. | +| double __dsqrt_ru ( double x )
    Compute `√x` in round-up mode. | +| double __dsqrt_rz ( double x )
    Compute `√x` in round-towards-zero mode. | ## Texture Functions Texture functions are not supported. @@ -435,8 +457,8 @@ HIP supports the following atomic operations. | int atomicMax(int* address, int val) | ✓ | ✓ | | unsigned int atomicMax(unsigned int* address,unsigned int val) | ✓ | ✓ | | unsigned long long int atomicMax(unsigned long long int* address,unsigned long long int val) | ✓ | ✓ | -| unsigned int atomicInc(unsigned int* address)| ✓
    Takes one argument. | ✓
    Wrapping increment,takes two arguments. | -| unsigned int atomicDec(unsigned int* address)| ✓
    Takes one argument. | ✓
    Wrapping decrement,takes two arguments. | +| unsigned int atomicInc(unsigned int* address)| ✗ | ✓ | +| unsigned int atomicDec(unsigned int* address)| ✗ | ✓ | | int atomicCAS(int* address, int compare, int val) | ✓ | ✓ | | unsigned int atomicCAS(unsigned int* address,unsigned int compare,unsigned int val) | ✓ | ✓ | | unsigned long long int atomicCAS(unsigned long long int* address,unsigned long long int compare,unsigned long long int val) | ✓ | ✓ | @@ -453,7 +475,6 @@ HIP supports the following atomic operations. ### Caveats and Features Under-Development: - HIP enables atomic operations on 32-bit integers. Additionally, it supports an atomic float add. AMD hardware, however, implements the float add using a CAS loop, so this function may not perform efficiently. -- hcc currently maps `__shared__` atomics to `__device__` atomics. Optimal support is under development. - wrapping increment and decrement are under development. ## Warp Cross-Lane Functions diff --git a/projects/hip/docs/markdown/hip_porting_guide.md b/projects/hip/docs/markdown/hip_porting_guide.md index 9f1c7c67bd..cb599a5c4a 100644 --- a/projects/hip/docs/markdown/hip_porting_guide.md +++ b/projects/hip/docs/markdown/hip_porting_guide.md @@ -290,7 +290,7 @@ hipcc adds the necessary libraries for HIP as well as for the accelerator compil ### -lm Option -hcc does not add “-lm” by default. If you see errors about missing math functions at link time (e.g., "sqrt@@GLIBC_2.2.5"), ensure that “-lm” is in the link options. +hipcc adds -lm by default to the link command. ## Linking Code With Other Compilers diff --git a/projects/hip/include/hcc_detail/hip_runtime.h b/projects/hip/include/hcc_detail/hip_runtime.h index 2a81415408..a9efa1db8b 100644 --- a/projects/hip/include/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hcc_detail/hip_runtime.h @@ -66,8 +66,8 @@ THE SOFTWARE. // 32-bit Atomics: #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1) #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1) -#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0) -#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0) +#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1) +#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1) #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0) // 64-bit Atomics: @@ -108,6 +108,12 @@ THE SOFTWARE. #define __HCC_C__ #endif + +// TODO - hipify-clang - change to use the function call. +//#define warpSize hc::__wavesize() +const int warpSize = 64; + + #define clock_t long long int __device__ inline long long int clock64() { return (long long int)hc::__clock_u64(); }; __device__ inline clock_t clock() { return (clock_t)hc::__clock_u64(); }; @@ -195,18 +201,6 @@ __device__ inline unsigned long long int atomicMax(unsigned long long int* addre return (long long int)hc::atomic_fetch_max((uint64_t*)address,(uint64_t)val); } -//atomicInc() -__device__ inline unsigned int atomicInc(unsigned int* address) -{ - return hc::atomic_fetch_inc(address); -} - -//atomicDec() -__device__ inline unsigned int atomicDec(unsigned int* address) -{ - return hc::atomic_fetch_dec(address); -} - //atomicCAS() __device__ inline int atomicCAS(int* address, int compare, int val) { @@ -351,49 +345,48 @@ __device__ inline int __any( int input) else return 0; } - __device__ inline unsigned long long int __ballot( int input) { return hc::__ballot( input); } // warp shuffle functions -__device__ inline int __shfl(int input, int lane, int width) +__device__ inline int __shfl(int input, int lane, int width=warpSize) { return hc::__shfl(input,lane,width); } -__device__ inline int __shfl_up(int input, unsigned int lane_delta, int width) +__device__ inline int __shfl_up(int input, unsigned int lane_delta, int width=warpSize) { return hc::__shfl_up(input,lane_delta,width); } -__device__ inline int __shfl_down(int input, unsigned int lane_delta, int width) +__device__ inline int __shfl_down(int input, unsigned int lane_delta, int width=warpSize) { return hc::__shfl_down(input,lane_delta,width); } -__device__ inline int __shfl_xor(int input, int lane_mask, int width) +__device__ inline int __shfl_xor(int input, int lane_mask, int width=warpSize) { return hc::__shfl_xor(input,lane_mask,width); } -__device__ inline float __shfl(float input, int lane, int width) +__device__ inline float __shfl(float input, int lane, int width=warpSize) { return hc::__shfl(input,lane,width); } -__device__ inline float __shfl_up(float input, unsigned int lane_delta, int width) +__device__ inline float __shfl_up(float input, unsigned int lane_delta, int width=warpSize) { return hc::__shfl_up(input,lane_delta,width); } -__device__ inline float __shfl_down(float input, unsigned int lane_delta, int width) +__device__ inline float __shfl_down(float input, unsigned int lane_delta, int width=warpSize) { return hc::__shfl_down(input,lane_delta,width); } -__device__ inline float __shfl_xor(float input, int lane_mask, int width) +__device__ inline float __shfl_xor(float input, int lane_mask, int width=warpSize) { return hc::__shfl_xor(input,lane_mask,width); } @@ -452,7 +445,6 @@ __device__ inline float __dsqrt_rz(double x) {return hc::fast_math::sqrt(x); }; #define hipGridDim_z (hc_get_num_groups(0)) -extern int warpSize ; #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) @@ -490,7 +482,8 @@ extern int warpSize ; #ifdef __HCC_CPP__ -hc::accelerator_view *ihipLaunchKernel(hipStream_t stream); +hipStream_t ihipPreLaunchKernel(hipStream_t stream, hc::accelerator_view **av); +void ihipPostLaunchKernel(hipStream_t stream, hc::completion_future &cf); #if not defined(DISABLE_GRID_LAUNCH) #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \ @@ -505,12 +498,13 @@ do {\ lp.groupMemBytes = _groupMemBytes;\ hc::completion_future cf;\ lp.cf = &cf; \ - lp.av = (ihipLaunchKernel(_stream)); \ + hipStream_t trueStream = (ihipPreLaunchKernel(_stream, &lp.av)); \ if (HIP_TRACE_API) {\ fprintf(stderr, "hiptrace1: launch '%s' gridDim:[%d.%d.%d] groupDim:[%d.%d.%d] groupMem:+%d stream=%p\n", \ #_kernelName, lp.gridDim.z, lp.gridDim.y, lp.gridDim.x, lp.groupDim.z, lp.groupDim.y, lp.groupDim.x, lp.groupMemBytes, (void*)(_stream));\ }\ _kernelName (lp, __VA_ARGS__);\ + ihipPostLaunchKernel(trueStream, cf);\ } while(0) #else @@ -528,12 +522,13 @@ do {\ lp.groupMemBytes = _groupMemBytes;\ hc::completion_future cf;\ lp.cf = &cf; \ - lp.av = (ihipLaunchKernel(_stream)); \ + hipStream_t trueStream = (ihipPreLaunchKernel(_stream, &lp.av)); \ if (HIP_TRACE_API) {\ fprintf(stderr, "hiptrace1: launch '%s' gridDim:[%d.%d.%d] groupDim:[%d.%d.%d] groupMem:+%d stream=%p\n", \ #_kernelName, lp.gridDim.z, lp.gridDim.y, lp.gridDim.x, lp.groupDim.z, lp.groupDim.y, lp.groupDim.x, lp.groupMemBytes, (void*)(_stream));\ }\ _kernelName (lp, __VA_ARGS__);\ + ihipPostLaunchKernel(trueStream, cf);\ } while(0) /*end hipLaunchKernel */ #endif diff --git a/projects/hip/include/hcc_detail/hip_runtime_api.h b/projects/hip/include/hcc_detail/hip_runtime_api.h index 225b065654..5fe398b84c 100644 --- a/projects/hip/include/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hcc_detail/hip_runtime_api.h @@ -105,6 +105,8 @@ enum hipMemcpyKind { } ; + + // Doxygen end group GlobalDefs /** @} */ @@ -113,7 +115,7 @@ enum hipMemcpyKind { // The handle allows the async commands to use the stream even if the parent hipStream_t goes out-of-scope. -typedef struct ihipStream_t * hipStream_t; +typedef class ihipStream_t * hipStream_t; /* @@ -128,6 +130,7 @@ typedef struct hipEvent_t { + #ifdef __cplusplus } /* extern "C" */ #endif @@ -634,6 +637,11 @@ hipError_t hipEventQuery(hipEvent_t event) ; */ +/** + * @brief Return attributes for the specified pointer + */ +hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr) ; + /** * @brief Allocate memory on the default accelerator diff --git a/projects/hip/include/hip_runtime.h b/projects/hip/include/hip_runtime.h index 59d3d6c4c9..de2a49d975 100644 --- a/projects/hip/include/hip_runtime.h +++ b/projects/hip/include/hip_runtime.h @@ -43,6 +43,11 @@ THE SOFTWARE. #include #include +#if __cplusplus > 199711L +#include +#endif + + #include #if defined(__HIP_PLATFORM_HCC__) and not defined (__HIP_PLATFORM_NVCC__) diff --git a/projects/hip/include/hip_runtime_api.h b/projects/hip/include/hip_runtime_api.h index 75f5807b6e..61e2b17407 100644 --- a/projects/hip/include/hip_runtime_api.h +++ b/projects/hip/include/hip_runtime_api.h @@ -33,34 +33,34 @@ THE SOFTWARE. #include typedef struct { - // 32-bit Atomics: - unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory - unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory - unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory - unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory - unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory + // 32-bit Atomics + unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory. + unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory. + unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory. + unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory. + unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory. - // 64-bit Atomics: - unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory - unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory + // 64-bit Atomics + unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory. + unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory. // Doubles - unsigned hasDoubles : 1; ///< double-precision floating point. + unsigned hasDoubles : 1; ///< Double-precision floating point. - // Warp cross-lane operations: - unsigned hasWarpVote : 1; ///< warp vote instructions (__any, __all) - unsigned hasWarpBallot : 1; ///< warp ballot instructions (__ballot) - unsigned hasWarpShuffle : 1; ///< warp shuffle operations. (__shfl_*) - unsigned hasFunnelShift : 1; ///< funnel two words into one, with shift&mask caps + // Warp cross-lane operations + unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all). + unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot). + unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*). + unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps. // Sync - unsigned hasThreadFenceSystem : 1; ///< __threadfence_system - unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or + unsigned hasThreadFenceSystem : 1; ///< __threadfence_system. + unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or. // Misc - unsigned hasSurfaceFuncs : 1; ///< Surface functions - unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D) - unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism + unsigned hasSurfaceFuncs : 1; ///< Surface functions. + unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D). + unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. } hipDeviceArch_t; @@ -72,31 +72,58 @@ typedef struct { * */ typedef struct hipDeviceProp_t { - char name[256]; ///< Device name - size_t totalGlobalMem; ///< Size of global memory region (in bytes) - size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes) - int regsPerBlock ; ///< registers per block - int warpSize ; ///< warp size - int maxThreadsPerBlock; ///< max work items per work group or workgroup max size - int maxThreadsDim[3]; ///< max number of threads in each dimension (XYZ) of a block - int maxGridSize[3]; ///< max grid dimensions (XYZ) - int clockRate ; ///< max clock frequency of the multiProcessors, in khz. - - size_t totalConstMem; ///< Size of shared memory region (in bytes) - int major ; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. - int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. - int multiProcessorCount; ///< number of multi-processors (compute units) - int l2CacheSize; ///< L2 cache size - int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor - int computeMode; ///< Compute mode - - int clockInstructionRate ; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP. - - hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. - int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently + char name[256]; ///< Device name. + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. + size_t totalConstMem; ///< Size of shared memory region (in bytes). + int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. + int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. + int multiProcessorCount; ///< Number of multi-processors (compute units). + int l2CacheSize; ///< L2 cache size. + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int computeMode; ///< Compute mode. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. + int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. } hipDeviceProp_t; +/** + * Memory type (for pointer attributes) + */ +enum hipMemoryType { + hipMemoryTypeHost, ///< Memory is physically located on host + hipMemoryTypeDevice ///< Memory is physically located on device. (see deviceId for specific device) +}; + + + +/** + * Pointer attributes + */ +typedef struct hipPointerAttribute_t { + enum hipMemoryType memoryType; + int device; + void *devicePointer; + void *hostPointer; + int isManaged; + unsigned allocationFlags; /* flags specified when memory was allocated*/ + /* peers? */ +} hipPointerAttribute_t; + + // hack to get these to show up in Doxygen: /** * @defgroup GlobalDefs Global enum and defines @@ -106,24 +133,26 @@ typedef struct hipDeviceProp_t { /* - * @brief hipError_t + asdasd* @brief hipError_t * @enum * @ingroup Enumerations */ +// Developer note - when updating these, update the hipErrorName and hipErrorString functions typedef enum hipError_t { - hipSuccess = 0 ///< Successful completion. - ,hipErrorMemoryAllocation ///< Memory allocation error. - ,hipErrorMemoryFree ///< Memory free error. - ,hipErrorUnknownSymbol ///< Unknown symbol - ,hipErrorOutOfResources ///< Out of resources error - ,hipErrorInvalidValue ///< One or more of the parameters passed to the API call is NULL or not in an acceptable range. - ,hipErrorInvalidResourceHandle ///< Resource handle (hipEvent_t or hipStream_t) invalid. - ,hipErrorInvalidDevice ///< DeviceID must be in range 0...#compute-devices. - ,hipErrorNoDevice ///< Call to hipGetDeviceCount returned 0 devices - ,hipErrorNotReady ///< indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion). APIs that return this error include hipEventQuery and hipStreamQuery. + hipSuccess = 0 ///< Successful completion. + ,hipErrorMemoryAllocation ///< Memory allocation error. + ,hipErrorMemoryFree ///< Memory free error. + ,hipErrorUnknownSymbol ///< Unknown symbol. + ,hipErrorOutOfResources ///< Out of resources error. + ,hipErrorInvalidValue ///< One or more of the parameters passed to the API call is NULL or not in an acceptable range. + ,hipErrorInvalidResourceHandle ///< Resource handle (hipEvent_t or hipStream_t) invalid. + ,hipErrorInvalidDevice ///< DeviceID must be in range 0...#compute-devices. + ,hipErrorInvalidMemcpyDirection ///< Invalid memory copy direction - ,hipErrorUnknown ///< Unknown error - ,hipErrorTbd ///< Marker that more error codes are needed. + ,hipErrorNoDevice ///< Call to hipGetDeviceCount returned 0 devices + ,hipErrorNotReady ///< Indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion). APIs that return this error include hipEventQuery and hipStreamQuery. + ,hipErrorUnknown ///< Unknown error. + ,hipErrorTbd ///< Marker that more error codes are needed. } hipError_t; /* @@ -132,24 +161,31 @@ typedef enum hipError_t { * @ingroup Enumerations */ typedef enum hipDeviceAttribute_t { - hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. - hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block. - hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block. - hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block. - hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid. - hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid. - hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid. - hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes. - hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. - hipDeviceAttributeWarpSize, ///< Warp size in threads. - hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. - hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. - hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. - hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. - hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. - hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. - hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. - hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. + hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. + hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block. + hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block. + hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block. + hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid. + hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid. + hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid. + hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes. + hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. + hipDeviceAttributeWarpSize, ///< Warp size in threads. + hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. + hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. + hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. + hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. + hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. + hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. + hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. + hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. + hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. + hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. + hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently. + hipDeviceAttributePciBusId, ///< PCI Bus ID. + hipDeviceAttributePciDeviceId, ///< PCI Device ID. + hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per Multiprocessor. + hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. } hipDeviceAttribute_t; /** diff --git a/projects/hip/include/nvcc_detail/hip_runtime_api.h b/projects/hip/include/nvcc_detail/hip_runtime_api.h index 4c9b35cab8..a8408211b2 100644 --- a/projects/hip/include/nvcc_detail/hip_runtime_api.h +++ b/projects/hip/include/nvcc_detail/hip_runtime_api.h @@ -242,6 +242,10 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att cdattr = cudaDevAttrMaxRegistersPerBlock; break; case hipDeviceAttributeClockRate: cdattr = cudaDevAttrClockRate; break; + case hipDeviceAttributeMemoryClockRate: + cdattr = cudaDevAttrMemoryClockRate; break; + case hipDeviceAttributeMemoryBusWidth: + cdattr = cudaDevAttrGlobalMemoryBusWidth; break; case hipDeviceAttributeMultiprocessorCount: cdattr = cudaDevAttrMultiProcessorCount; break; case hipDeviceAttributeComputeMode: @@ -252,8 +256,16 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; break; case hipDeviceAttributeComputeCapabilityMajor: cdattr = cudaDevAttrComputeCapabilityMajor; break; - case hipDeviceAttributeComputeCapabilityMinor: - cdattr = cudaDevAttrComputeCapabilityMinor; break; + case hipDeviceAttributeConcurrentKernels: + cdattr = cudaDevAttrConcurrentKernels; break; + case hipDeviceAttributePciBusId: + cdattr = cudaDevAttrPciBusId; break; + case hipDeviceAttributePciDeviceId: + cdattr = cudaDevAttrPciDeviceId; break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor; break; + case hipDeviceAttributeIsMultiGpuBoard: + cdattr = cudaDevAttrIsMultiGpuBoard; break; default: cerror = cudaErrorInvalidValue; break; } @@ -263,6 +275,29 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att return hipCUDAErrorTohipError(cerror); } + +inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr){ + cudaPointerAttributes cPA; + hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr)); + if(err == hipSuccess){ + switch (cPA.memoryType){ + case cudaMemoryTypeDevice: + attributes->memoryType = hipMemoryTypeDevice; break; + case cudaMemoryTypeHost: + attributes->memoryType = hipMemoryTypeHost; break; + default: + return hipErrorUnknownSymbol; + } + attributes->device = cPA.device; + attributes->devicePointer = cPA.devicePointer; + attributes->hostPointer = cPA.hostPointer; + attributes->isManaged = 0; + attributes->allocationFlags = 0; + } + return err; +} + + inline static hipError_t hipMemGetInfo( size_t* free, size_t* total) { return hipCUDAErrorTohipError(cudaMemGetInfo(free,total)); diff --git a/projects/hip/samples/1_Utils/hipBusBandwidth/LICENSE.txt b/projects/hip/samples/1_Utils/hipBusBandwidth/LICENSE.txt new file mode 100644 index 0000000000..5d0d603232 --- /dev/null +++ b/projects/hip/samples/1_Utils/hipBusBandwidth/LICENSE.txt @@ -0,0 +1,27 @@ + +Copyright (c) 2011, UT-Battelle, LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +* Neither the name of Oak Ridge National Laboratory, nor UT-Battelle, LLC, nor + the names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile b/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile new file mode 100644 index 0000000000..77a92fb1a6 --- /dev/null +++ b/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile @@ -0,0 +1,17 @@ +HIP_PATH?=$(shell hipconfig -p) +HIPCC=$(HIP_PATH)/bin/hipcc + +EXE=hipBusBandwidth +CXXFLAGS = -O3 -g + +all: install + +$(EXE): hipBusBandwidth.cpp ResultDatabase.cpp + $(HIPCC) $(CXXFLAGS) $^ -o $@ + +install: $(EXE) + cp $(EXE) $(HIP_PATH)/bin + + +clean: + rm -f *.o $(EXE) diff --git a/projects/hip/samples/1_Utils/hipBusBandwidth/ResultDatabase.cpp b/projects/hip/samples/1_Utils/hipBusBandwidth/ResultDatabase.cpp new file mode 100644 index 0000000000..7d2f3aef84 --- /dev/null +++ b/projects/hip/samples/1_Utils/hipBusBandwidth/ResultDatabase.cpp @@ -0,0 +1,523 @@ +#include "ResultDatabase.h" + +#include +#include +#include +#include + +using namespace std; + +bool ResultDatabase::Result::operator<(const Result &rhs) const +{ + if (test < rhs.test) + return true; + if (test > rhs.test) + return false; + if (atts < rhs.atts) + return true; + if (atts > rhs.atts) + return false; + return false; // less-operator returns false on equal +} + +double ResultDatabase::Result::GetMin() const +{ + double r = FLT_MAX; + for (int i=0; i= 100) + return value[n-1]; + + double index = ((n + 1.) * q / 100.) - 1; + + vector sorted = value; + sort(sorted.begin(), sorted.end()); + + if (n == 2) + return (sorted[0] * (1 - q/100.) + sorted[1] * (q/100.)); + + int index_lo = int(index); + double frac = index - index_lo; + if (frac == 0) + return sorted[index_lo]; + + double lo = sorted[index_lo]; + double hi = sorted[index_lo + 1]; + return lo + (hi-lo)*frac; +} + +double ResultDatabase::Result::GetMean() const +{ + double r = 0; + for (int i=0; i &values) +{ + for (int i=0; i= results.size()) + { + Result r; + r.test = test; + r.atts = atts; + r.unit = unit; + results.push_back(r); + } + + results[index].value.push_back(value); +} + +// **************************************************************************** +// Method: ResultDatabase::DumpDetailed +// +// Purpose: +// Writes the full results, including all trials. +// +// Arguments: +// out where to print +// +// Programmer: Jeremy Meredith +// Creation: August 14, 2009 +// +// Modifications: +// Jeremy Meredith, Wed Nov 10 14:25:17 EST 2010 +// Renamed to DumpDetailed to make room for a DumpSummary. +// +// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010 +// Added note about (*) missing value tag. +// +// Jeremy Meredith, Tue Nov 23 13:57:02 EST 2010 +// Changed note about missing values to be worded a little better. +// +// **************************************************************************** +void ResultDatabase::DumpDetailed(ostream &out) +{ + vector sorted(results); + + sort(sorted.begin(), sorted.end()); + + int maxtrials = 1; + for (int i=0; i maxtrials) + maxtrials = sorted[i].value.size(); + } + + // TODO: in big parallel runs, the "trials" are the procs + // and we really don't want to print them all out.... + out << "test\t" + << "atts\t" + << "units\t" + << "median\t" + << "mean\t" + << "stddev\t" + << "min\t" + << "max\t"; + for (int i=0; i sorted(results); + + int testW = 15 ; + const int fieldW = 9; + + sort(sorted.begin(), sorted.end()); + + out << std::fixed << right << std::setprecision(4); + + // TODO: in big parallel runs, the "trials" are the procs + // and we really don't want to print them all out.... + out << setw(testW) << "test\t" << setw(fieldW) + << "atts\t" + << "units\t" + << "median\t" + << "mean\t" + << "stddev\t" + << "min\t" + << "max\t"; + out << endl; + + for (int i=0; i sorted(results); + + sort(sorted.begin(), sorted.end()); + + //Check to see if the file is empty - if so, add the headers + emptyFile = this->IsFileEmpty(fileName); + + //Open file and append by default + ofstream out; + out.open(fileName.c_str(), std::ofstream::out | std::ofstream::app); + + //Add headers only for empty files + if(emptyFile) + { + // TODO: in big parallel runs, the "trials" are the procs + // and we really don't want to print them all out.... + out << "test, " + << "atts, " + << "units, " + << "median, " + << "mean, " + << "stddev, " + << "min, " + << "max, "; + out << endl; + } + + for (int i=0; i +ResultDatabase::GetResultsForTest(const string &test) +{ + // get only the given test results + vector retval; + for (int i=0; i & +ResultDatabase::GetResults() const +{ + return results; +} diff --git a/projects/hip/samples/1_Utils/hipBusBandwidth/ResultDatabase.h b/projects/hip/samples/1_Utils/hipBusBandwidth/ResultDatabase.h new file mode 100644 index 0000000000..4b63a02a1f --- /dev/null +++ b/projects/hip/samples/1_Utils/hipBusBandwidth/ResultDatabase.h @@ -0,0 +1,100 @@ +#ifndef RESULT_DATABASE_H +#define RESULT_DATABASE_H + +#include +#include +#include +#include +#include +using std::string; +using std::vector; +using std::ostream; +using std::ofstream; +using std::ifstream; + + +// **************************************************************************** +// Class: ResultDatabase +// +// Purpose: +// Track numerical results as they are generated. +// Print statistics of raw results. +// +// Programmer: Jeremy Meredith +// Creation: June 12, 2009 +// +// Modifications: +// Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010 +// Split timing reports into detailed and summary. E.g. for serial code, +// we might report all trial values, but skip them in parallel. +// +// Jeremy Meredith, Thu Nov 11 11:40:18 EST 2010 +// Added check for missing value tag. +// +// Jeremy Meredith, Mon Nov 22 13:37:10 EST 2010 +// Added percentile statistic. +// +// Jeremy Meredith, Fri Dec 3 16:30:31 EST 2010 +// Added a method to extract a subset of results based on test name. Also, +// the Result class is now public, so that clients can use them directly. +// Added a GetResults method as well, and made several functions const. +// +// **************************************************************************** +class ResultDatabase +{ + public: + // + // A performance result for a single SHOC benchmark run. + // + struct Result + { + string test; // e.g. "readback" + string atts; // e.g. "pagelocked 4k^2" + string unit; // e.g. "MB/sec" + vector value; // e.g. "837.14" + double GetMin() const; + double GetMax() const; + double GetMedian() const; + double GetPercentile(double q) const; + double GetMean() const; + double GetStdDev() const; + + bool operator<(const Result &rhs) const; + + bool HadAnyFLTMAXValues() const + { + for (int i=0; i= FLT_MAX) + return true; + } + return false; + } + }; + + protected: + vector results; + + public: + void AddResult(const string &test, + const string &atts, + const string &unit, + double value); + void AddResults(const string &test, + const string &atts, + const string &unit, + const vector &values); + vector GetResultsForTest(const string &test); + const vector &GetResults() const; + void ClearAllResults(); + void DumpDetailed(ostream&); + void DumpSummary(ostream&); + void DumpCsv(string fileName); + + private: + bool IsFileEmpty(string fileName); + +}; + + +#endif diff --git a/projects/hip/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp b/projects/hip/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp new file mode 100644 index 0000000000..d276725921 --- /dev/null +++ b/projects/hip/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp @@ -0,0 +1,387 @@ +#include +#include +#include + +#include "ResultDatabase.h" + +// Cmdline parms: +bool p_verbose = false; +bool p_pinned = true; +int p_iterations = 10; +int p_device = 0; +int p_detailed = 0; + +bool p_h2d = true; +bool p_d2h = true; + + +#define CHECK_HIP_ERROR() \ +{ \ + hipError_t err = hipGetLastError(); \ + if (err != hipSuccess) \ + { \ + printf("error=%d name=%s at " \ + "ln: %d\n ",err,hipGetErrorString(err),__LINE__); \ + exit(EXIT_FAILURE); \ + } \ +} + + +// **************************************************************************** +// Function: runBenchmark +// +// Purpose: +// Measures the bandwidth of the bus connecting the host processor to the +// OpenCL device. This benchmark repeatedly transfers data chunks of various +// sizes across the bus to the OpenCL device, and calculates the bandwidth. +// +// +// Arguments: +// +// Returns: nothing +// +// Programmer: Jeremy Meredith +// Creation: September 08, 2009 +// +// Modifications: +// Jeremy Meredith, Wed Dec 1 17:05:27 EST 2010 +// Added calculation of latency estimate. +// Ben Sander - moved to standalone test +// +// **************************************************************************** +void RunBenchmark_H2D(ResultDatabase &resultDB) +{ + // Sizes are in kb + int sizes[] = {1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384, 32768,65536,131072,262144,524288}; + int nSizes = sizeof(sizes) / sizeof(int); + + long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + + hipSetDevice(p_device); + + // Create some host memory pattern + float *hostMem = NULL; + if (p_pinned) + { + hipMallocHost((void**)&hostMem, sizeof(float) * numMaxFloats); + while (hipGetLastError() != hipSuccess) + { + // drop the size and try again + if (p_verbose) std::cout << " - dropping size allocating pinned mem\n"; + --nSizes; + if (nSizes < 1) + { + std::cerr << "Error: Couldn't allocated any pinned buffer\n"; + return; + } + numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + hipMallocHost((void**)&hostMem, sizeof(float) * numMaxFloats); + } + } + else + { + hostMem = new float[numMaxFloats]; + } + + for (int i = 0; i < numMaxFloats; i++) + { + hostMem[i] = i % 77; + } + + float *device; + hipMalloc((void**)&device, sizeof(float) * numMaxFloats); + while (hipGetLastError() != hipSuccess) + { + // drop the size and try again + if (p_verbose) std::cout << " - dropping size allocating device mem\n"; + --nSizes; + if (nSizes < 1) + { + std::cerr << "Error: Couldn't allocated any device buffer\n"; + return; + } + numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + hipMalloc((void**)&device, sizeof(float) * numMaxFloats); + } + + + hipEvent_t start, stop; + hipEventCreate(&start); + hipEventCreate(&stop); + CHECK_HIP_ERROR(); + + // Three passes, forward and backward both + for (int pass = 0; pass < p_iterations; pass++) + { + // store the times temporarily to estimate latency + //float times[nSizes]; + // Step through sizes forward on even passes and backward on odd + for (int i = 0; i < nSizes; i++) + { + int sizeIndex; + if ((pass % 2) == 0) + sizeIndex = i; + else + sizeIndex = (nSizes - 1) - i; + + int nbytes = sizes[sizeIndex] * 1024; + + hipEventRecord(start, 0); + hipMemcpy(device, hostMem, nbytes, hipMemcpyHostToDevice); + hipEventRecord(stop, 0); + hipEventSynchronize(stop); + float t = 0; + hipEventElapsedTime(&t, start, stop); + //times[sizeIndex] = t; + + // Convert to GB/sec + if (p_verbose) + { + std::cerr << "size " << sizes[sizeIndex] << "k took " << t << + " ms\n"; + } + + double speed = (double(sizes[sizeIndex]) * 1024. / (1000*1000)) / t; + char sizeStr[256]; + sprintf(sizeStr, "% 7dkB", sizes[sizeIndex]); + resultDB.AddResult("H2D_Bandwidth", sizeStr, "GB/sec", speed); + resultDB.AddResult("H2D_Time", sizeStr, "ms", t); + } + } + + // Cleanup + hipFree((void*)device); + CHECK_HIP_ERROR(); + if (p_pinned) + { + hipFreeHost((void*)hostMem); + CHECK_HIP_ERROR(); + } + else + { + delete[] hostMem; + } + hipEventDestroy(start); + hipEventDestroy(stop); +} + + +void RunBenchmark_D2H(ResultDatabase &resultDB) +{ + + // Sizes are in kb + int nSizes = 20; + int sizes[20] = {1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384, + 32768,65536,131072,262144,524288}; + long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + + // Create some host memory pattern + float *hostMem1; + float *hostMem2; + if (p_pinned) + { + hipMallocHost((void**)&hostMem1, sizeof(float)*numMaxFloats); + hipError_t err1 = hipGetLastError(); + hipMallocHost((void**)&hostMem2, sizeof(float)*numMaxFloats); + hipError_t err2 = hipGetLastError(); + while (err1 != hipSuccess || err2 != hipSuccess) + { + // free the first buffer if only the second failed + if (err1 == hipSuccess) + hipFreeHost((void*)hostMem1); + + // drop the size and try again + if (p_verbose) std::cout << " - dropping size allocating pinned mem\n"; + --nSizes; + if (nSizes < 1) + { + std::cerr << "Error: Couldn't allocated any pinned buffer\n"; + return; + } + numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + hipMallocHost((void**)&hostMem1, sizeof(float)*numMaxFloats); + err1 = hipGetLastError(); + hipMallocHost((void**)&hostMem2, sizeof(float)*numMaxFloats); + err2 = hipGetLastError(); + } + } + else + { + hostMem1 = new float[numMaxFloats]; + hostMem2 = new float[numMaxFloats]; + } + for (int i=0; i= argc || !parseInt(argv[i], &p_iterations)) { + failed("Bad iterations argument"); + } + } else if (!strcmp(arg, "--device") || (!strcmp(arg, "-d"))) { + if (++i >= argc || !parseInt(argv[i], &p_device)) { + failed("Bad device argument"); + } + } else if (!strcmp(arg, "--unpinned")) { + p_pinned = 0; + } else if (!strcmp(arg, "--h2d")) { + p_h2d = true; + p_d2h = false; + + } else if (!strcmp(arg, "--d2h")) { + p_h2d = false; + p_d2h = true; + + } else if (!strcmp(arg, "--help") || (!strcmp(arg, "-h"))) { + help(); + + } else if (!strcmp(arg, "--verbose")) { + p_verbose = 1; + } else if (!strcmp(arg, "--detailed")) { + p_detailed = 1; + } else { + failed("Bad argument '%s'", arg); + } + } + + return 0; +}; + + + +int main(int argc, char *argv[]) +{ + parseStandardArguments(argc, argv); + + if (p_h2d) { + ResultDatabase resultDB; + RunBenchmark_H2D(resultDB); + + resultDB.DumpSummary(std::cout); + + if (p_detailed) { + resultDB.DumpDetailed(std::cout); + } + } + + if (p_d2h) { + ResultDatabase resultDB; + RunBenchmark_D2H(resultDB); + + resultDB.DumpSummary(std::cout); + + if (p_detailed) { + resultDB.DumpDetailed(std::cout); + } + } +} diff --git a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp index bff2114f96..824ab17d37 100644 --- a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp @@ -41,9 +41,9 @@ THE SOFTWARE. #define HIPCHECK(error) \ if (error != hipSuccess) { \ - printf("%serror: '%s'(%d) at %s:%d%s\n", \ - KRED,hipGetErrorString(error), error,\ - __FILE__, __LINE__,KNRM); \ + printf("%serror: '%s'(%d) at %s:%d%s\n", \ + KRED, hipGetErrorString(error), error,\ + __FILE__, __LINE__,KNRM);\ failed("API returned error code.");\ } @@ -53,12 +53,11 @@ void printCompilerInfo () printf ("compiler: hcc version=%s, workweek (YYWWD) = %u\n", __hcc_version__, __hcc_workweek__); #endif #ifdef __NVCC__ - printf ("compiler: nvcc\n"); + printf ("compiler: nvcc\n"); #endif } - -double bytesToGB(size_t s) +double bytesToGB(size_t s) { return (double)s / (1024.0*1024.0*1024.0); } @@ -66,7 +65,6 @@ double bytesToGB(size_t s) void printDeviceProp (int deviceId) { using namespace std; - const int w1 = 30; cout << left; @@ -77,61 +75,59 @@ void printDeviceProp (int deviceId) hipDeviceProp_t props; HIPCHECK(hipDeviceGetProperties(&props, deviceId)); - cout << setw(w1) << "Name: " << props.name << endl; + cout << setw(w1) << "pciBusID: " << props.pciBusID << endl; + cout << setw(w1) << "pciDeviceID: " << props.pciDeviceID << endl; cout << setw(w1) << "multiProcessorCount: " << props.multiProcessorCount << endl; + cout << setw(w1) << "maxThreadsPerMultiProcessor: " << props.maxThreadsPerMultiProcessor << endl; + cout << setw(w1) << "isMultiGpuBoard: " << props.isMultiGpuBoard << endl; cout << setw(w1) << "clockRate: " << (float)props.clockRate / 1000.0 << " Mhz" << endl; - cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0<< " Mhz" << endl; - cout << setw(w1) << "totalGlobalMem" << fixed << setprecision(2) << bytesToGB(props.totalGlobalMem) << " GB" << endl; - - cout << setw(w1) << "sharedMemPerBlock" << (float)props.sharedMemPerBlock / 1024.0 << " KB" << endl; - cout << setw(w1) << "regsPerBlock" << props.regsPerBlock << endl; - cout << setw(w1) << "warpSize" << props.warpSize << endl; - cout << setw(w1) << "maxThreadsPerBlock" << props.maxThreadsPerBlock << endl; - cout << setw(w1) << "maxThreadsDim.x" << props.maxThreadsDim[0] << endl; - cout << setw(w1) << "maxThreadsDim.y" << props.maxThreadsDim[1] << endl; - cout << setw(w1) << "maxThreadsDim.z" << props.maxThreadsDim[2] << endl; - - cout << setw(w1) << "maxGridSize.x" << props.maxGridSize[0] << endl; - cout << setw(w1) << "maxGridSize.y" << props.maxGridSize[1] << endl; - cout << setw(w1) << "maxGridSize.z" << props.maxGridSize[2] << endl; - - - cout << setw(w1) << "totalConstMem" << props.totalConstMem << endl; - cout << setw(w1) << "major" << props.major << endl; - cout << setw(w1) << "minor" << props.minor << endl; - cout << setw(w1) << "l2CacheSize" << props.l2CacheSize << endl; - cout << setw(w1) << "maxThreadsPerMultiProcessor" << props.maxThreadsPerMultiProcessor << endl; - cout << setw(w1) << "computeMode" << props.computeMode << endl; - - cout << setw(w1) << "arch.hasGlobalInt32Atomics" << props.arch.hasGlobalInt32Atomics << endl; - cout << setw(w1) << "arch.hasGlobalFloatAtomicExch" << props.arch.hasGlobalFloatAtomicExch << endl; - cout << setw(w1) << "arch.hasSharedInt32Atomics" << props.arch.hasSharedInt32Atomics << endl; - cout << setw(w1) << "arch.hasSharedFloatAtomicExch" << props.arch.hasSharedFloatAtomicExch << endl; - cout << setw(w1) << "arch.hasFloatAtomicAdd" << props.arch.hasFloatAtomicAdd << endl; - cout << setw(w1) << "arch.hasGlobalInt64Atomics" << props.arch.hasGlobalInt64Atomics << endl; - cout << setw(w1) << "arch.hasSharedInt64Atomics" << props.arch.hasSharedInt64Atomics << endl; - cout << setw(w1) << "arch.hasDoubles" << props.arch.hasDoubles << endl; - cout << setw(w1) << "arch.hasWarpVote" << props.arch.hasWarpVote << endl; - cout << setw(w1) << "arch.hasWarpBallot" << props.arch.hasWarpBallot << endl; - cout << setw(w1) << "arch.hasWarpShuffle" << props.arch.hasWarpShuffle << endl; - cout << setw(w1) << "arch.hasFunnelShift" << props.arch.hasFunnelShift << endl; - cout << setw(w1) << "arch.hasThreadFenceSystem" << props.arch.hasThreadFenceSystem << endl; - cout << setw(w1) << "arch.hasSyncThreadsExt" << props.arch.hasSyncThreadsExt << endl; - cout << setw(w1) << "arch.hasSurfaceFuncs" << props.arch.hasSurfaceFuncs << endl; - cout << setw(w1) << "arch.has3dGrid" << props.arch.has3dGrid << endl; - cout << setw(w1) << "arch.hasDynamicParallelism" << props.arch.hasDynamicParallelism << endl; - + cout << setw(w1) << "memoryClockRate: " << (float)props.memoryClockRate / 1000.0 << " Mhz" << endl; + cout << setw(w1) << "memoryBusWidth: " << props.memoryBusWidth << endl; + cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0 << " Mhz" << endl; + cout << setw(w1) << "totalGlobalMem: " << fixed << setprecision(2) << bytesToGB(props.totalGlobalMem) << " GB" << endl; + cout << setw(w1) << "maxSharedMemoryPerMultiProcessor: " << fixed << setprecision(2) << bytesToGB(props.maxSharedMemoryPerMultiProcessor) << " GB" << endl; + cout << setw(w1) << "totalConstMem: " << props.totalConstMem << endl; + cout << setw(w1) << "sharedMemPerBlock: " << (float)props.sharedMemPerBlock / 1024.0 << " KB" << endl; + cout << setw(w1) << "regsPerBlock: " << props.regsPerBlock << endl; + cout << setw(w1) << "warpSize: " << props.warpSize << endl; + cout << setw(w1) << "l2CacheSize: " << props.l2CacheSize << endl; + cout << setw(w1) << "computeMode: " << props.computeMode << endl; + cout << setw(w1) << "maxThreadsPerBlock: " << props.maxThreadsPerBlock << endl; + cout << setw(w1) << "maxThreadsDim.x: " << props.maxThreadsDim[0] << endl; + cout << setw(w1) << "maxThreadsDim.y: " << props.maxThreadsDim[1] << endl; + cout << setw(w1) << "maxThreadsDim.z: " << props.maxThreadsDim[2] << endl; + cout << setw(w1) << "maxGridSize.x: " << props.maxGridSize[0] << endl; + cout << setw(w1) << "maxGridSize.y: " << props.maxGridSize[1] << endl; + cout << setw(w1) << "maxGridSize.z: " << props.maxGridSize[2] << endl; + cout << setw(w1) << "major: " << props.major << endl; + cout << setw(w1) << "minor: " << props.minor << endl; + cout << setw(w1) << "concurrentKernels: " << props.concurrentKernels << endl; + cout << setw(w1) << "arch.hasGlobalInt32Atomics: " << props.arch.hasGlobalInt32Atomics << endl; + cout << setw(w1) << "arch.hasGlobalFloatAtomicExch: " << props.arch.hasGlobalFloatAtomicExch << endl; + cout << setw(w1) << "arch.hasSharedInt32Atomics: " << props.arch.hasSharedInt32Atomics << endl; + cout << setw(w1) << "arch.hasSharedFloatAtomicExch: " << props.arch.hasSharedFloatAtomicExch << endl; + cout << setw(w1) << "arch.hasFloatAtomicAdd: " << props.arch.hasFloatAtomicAdd << endl; + cout << setw(w1) << "arch.hasGlobalInt64Atomics: " << props.arch.hasGlobalInt64Atomics << endl; + cout << setw(w1) << "arch.hasSharedInt64Atomics: " << props.arch.hasSharedInt64Atomics << endl; + cout << setw(w1) << "arch.hasDoubles: " << props.arch.hasDoubles << endl; + cout << setw(w1) << "arch.hasWarpVote: " << props.arch.hasWarpVote << endl; + cout << setw(w1) << "arch.hasWarpBallot: " << props.arch.hasWarpBallot << endl; + cout << setw(w1) << "arch.hasWarpShuffle: " << props.arch.hasWarpShuffle << endl; + cout << setw(w1) << "arch.hasFunnelShift: " << props.arch.hasFunnelShift << endl; + cout << setw(w1) << "arch.hasThreadFenceSystem: " << props.arch.hasThreadFenceSystem << endl; + cout << setw(w1) << "arch.hasSyncThreadsExt: " << props.arch.hasSyncThreadsExt << endl; + cout << setw(w1) << "arch.hasSurfaceFuncs: " << props.arch.hasSurfaceFuncs << endl; + cout << setw(w1) << "arch.has3dGrid: " << props.arch.has3dGrid << endl; + cout << setw(w1) << "arch.hasDynamicParallelism: " << props.arch.hasDynamicParallelism << endl; cout << endl; size_t free, total; - hipMemGetInfo(&free, &total); cout << fixed << setprecision(2); - cout << setw(w1) << "memInfo.total " << bytesToGB(total) << " GB" << endl; - cout << setw(w1) << "memInfo.free " << bytesToGB(free) << " GB (" << setprecision(0) << (float)free/total * 100.0 << "%)" << endl; - + cout << setw(w1) << "memInfo.total: " << bytesToGB(total) << " GB" << endl; + cout << setw(w1) << "memInfo.free: " << bytesToGB(free) << " GB (" << setprecision(0) << (float)free/total * 100.0 << "%)" << endl; } int main(int argc, char *argv[]) diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 4b7b53550d..a96227dc7f 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -28,9 +28,14 @@ THE SOFTWARE. #include #include #include +#include #include #include #include +#include +#include +#include + #include #include @@ -38,32 +43,63 @@ THE SOFTWARE. #include "hsa_ext_amd.h" -#define USE_PINNED_HOST (__hcc_workweek__ >= 1601) -//#define USE_ASYNC_COPY + +#define USE_AM_TRACKER 1 /* >0 = use new AM memory tracker features. */ +#define USE_ROCR_V2 0 /* use the ROCR v2 async copy API with dst and src agents */ + +#if (USE_AM_TRACKER) and (__hcc_workweek__ < 16074) +#error (USE_AM_TRACKER requries HCC version of 16074 or newer) +#endif + + +#if (USE_ROCR_V2) and (USE_AM_TRACKER == 0) +#error (USE_ROCR_V2 requires USE_AM_TRACKER>0) +#endif + #define INLINE static inline //--- // Environment variables: -// TODO-HCC - map this to the HC instruction that uses HSAIL to get the wave size. -int warpSize = 64; // Intended to distinguish whether an environment variable should be visible only in debug mode, or in debug+release. //static const int debug = 0; static const int release = 1; -int HIP_PRINT_ENV = 0; -int HIP_TRACE_API= 0; + int HIP_LAUNCH_BLOCKING = 0; -#define TRACE_API 0x1 /* trace API calls and return values */ -#define TRACE_SYNC 0x2 /* trace synchronization pieces */ -#define TRACE_MEM 0x4 /* trace memory allocation / deallocation */ +int HIP_PRINT_ENV = 0; +int HIP_TRACE_API= 0; +int HIP_STAGING_SIZE = 64; /* size of staging buffers, in KB */ +int HIP_STAGING_BUFFERS = 2; // TODO - remove, two buffers should be enough. +int HIP_PININPLACE = 0; +int HIP_STREAM_SIGNALS = 2; /* number of signals to allocate at stream creation */ +int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */ +std::vector g_hip_visible_devices; /* vector of integers that contains the visible device IDs */ + + +//--- +// Chicken bits for disabling functionality to work around potential issues: +int HIP_DISABLE_HW_KERNEL_DEP = 1; +int HIP_DISABLE_HW_COPY_DEP = 1; + +int HIP_DISABLE_BIDIR_MEMCPY = 0; +int HIP_ONESHOT_COPY_DEP = 1; // TODO - setting this =1 is a good thing, reduces input deps + + +//--- +//Debug flags: +#define TRACE_API 0x01 /* trace API calls and return values */ +#define TRACE_SYNC 0x02 /* trace synchronization pieces */ +#define TRACE_MEM 0x04 /* trace memory allocation / deallocation */ +#define TRACE_COPY2 0x08 /* trace memory copy commands. Detailed. */ +#define TRACE_SIGNAL 0x10 /* trace signal pool commands */ #define tprintf(trace_level, ...) {\ if (HIP_TRACE_API & trace_level) {\ - fprintf (stderr, "hiptrace%d: ", trace_level); \ + fprintf (stderr, "hiptrace%x: ", trace_level); \ fprintf (stderr, __VA_ARGS__);\ }\ } @@ -75,21 +111,93 @@ struct ihipDevice_t; enum ihipCommand_t { ihipCommandKernel, - ihipCommandData, + ihipCommandCopyH2D, + ihipCommandCopyD2H, }; +const char* ihipCommandName[] = { + "Kernel", "CopyH2D", "CopyD2H" +}; + + + +typedef uint64_t SIGSEQNUM; + +//--- +// Small wrapper around signals. +// Designed to be used from stream. +// TODO-someday refactor this class so it can be stored in a vector<> +// we already store the index here so we can use for garbage collection. +struct ihipSignal_t { + hsa_signal_t _hsa_signal; // hsa signal handle + int _index; // Index in pool, used for garbage collection. + SIGSEQNUM _sig_id; // unique sequentially increasing ID. + + ihipSignal_t(); + ~ihipSignal_t(); + + inline void release(); +}; + + +// Used to remove lock, for performance or stimulating bugs. +class FakeMutex +{ + public: + void lock() { } + bool try_lock() {return true; } + void unlock() { } +}; + + + // Internal stream structure. -struct ihipStream_t { - unsigned _device_index; +class ihipStream_t { +public: + + ihipStream_t(unsigned device_index, hc::accelerator_view av, unsigned int flags); + ~ihipStream_t(); + + inline void reclaimSignals(SIGSEQNUM sigNum); + inline void waitAndReclaimOlder(ihipSignal_t *signal); + inline void wait(); + + inline ihipDevice_t * getDevice() const; + + ihipSignal_t * getSignal() ; + + inline bool preKernelCommand(); + inline void postKernelCommand(hc::completion_future &kernel_future); + inline int copyCommand(ihipSignal_t *lastCopy, hsa_signal_t *waitSignal, ihipCommand_t copyType); + + inline void resetToEmpty(); + + inline SIGSEQNUM lastCopySeqId() { return _last_copy_signal ? _last_copy_signal->_sig_id : 0; }; + std::mutex & mutex() {return _mutex;}; + + //--- hc::accelerator_view _av; unsigned _flags; - ihipCommand_t _last_command; +private: + void enqueueBarrier(hsa_queue_t* queue, ihipSignal_t *depSignal); - //ihipStream_t() : _av(){ }; - ihipStream_t(unsigned device_index, hc::accelerator_view av, unsigned int flags) : - _device_index(device_index), _av(av), _flags(flags), _last_command(ihipCommandKernel) - {}; -} ; + unsigned _device_index; + ihipCommand_t _last_command_type; // type of the last command + + // signal of last copy command sent to the stream. + // May be NULL, indicating the previous command has completley finished and future commands don't need to create a dependency. + // Copy can be either H2D or D2H. + ihipSignal_t *_last_copy_signal; + hc::completion_future _last_kernel_future; // Completion future of last kernel command sent to GPU. + + int _signalCursor; + + SIGSEQNUM _stream_sig_id; // Monotonically increasing unique signal id. + SIGSEQNUM _oldest_live_sig_id; // oldest live seq_id, anything < this can be allocated. + std::deque _signalPool; // Pool of signals for use by this stream. + + std::mutex _mutex; +}; @@ -112,9 +220,38 @@ struct ihipEvent_t { hc::completion_future _marker; uint64_t _timestamp; // store timestamp, may be set on host or by marker. + + SIGSEQNUM _copy_seq_id; } ; +//------------------------------------------------------------------------------------------------- +struct StagingBuffer { + + static const int _max_buffers = 4; + + StagingBuffer(ihipDevice_t *device, size_t bufferSize, int numBuffers) ; + ~StagingBuffer(); + + void CopyHostToDevice(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); + void CopyHostToDevicePinInPlace(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); + + void CopyDeviceToHost (void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); + void CopyDeviceToHostPinInPlace(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); + + +private: + ihipDevice_t *_device; + size_t _bufferSize; // Size of the buffers. + int _numBuffers; + + char *_pinnedStagingBuffer[_max_buffers]; + hsa_signal_t _completion_signal[_max_buffers]; +}; + + + +//------------------------------------------------------------------------------------------------- struct ihipDevice_t { unsigned _device_index; // index into g_devices. @@ -131,20 +268,316 @@ struct ihipDevice_t unsigned _compute_units; + hsa_signal_t _copy_signal; // signal to use for synchronous memcopies + std::mutex _copy_lock[2]; // mutex for each direction. + StagingBuffer *_staging_buffer[2]; // one buffer for each direction. + public: - ihipDevice_t(unsigned device_index, hc::accelerator acc); + void reset(); + void init(unsigned device_index, hc::accelerator acc); hipError_t getProperties(hipDeviceProp_t* prop); - // TODO- create a copy constructor. - //~ihipDevice_t(); + ~ihipDevice_t(); }; //================================================================================================= -ihipDevice_t::ihipDevice_t(unsigned device_index, hc::accelerator acc) - : _device_index(device_index), - _acc(acc) +// Global Data Structures: +//================================================================================================= +//TLS - must be initialized here. +thread_local hipError_t tls_lastHipError = hipSuccess; +thread_local int tls_defaultDevice = 0; + +// Global initialization. +std::once_flag hip_initialized; +ihipDevice_t *g_devices; // Array of all non-emulated (ie GPU) accelerators in the system. +bool g_visible_device = false; // Set the flag when HIP_VISIBLE_DEVICES is set +unsigned g_deviceCnt; +//================================================================================================= + + +//================================================================================================= +//Forward Declarations: +//================================================================================================= +INLINE bool ihipIsValidDevice(unsigned deviceIndex); + +//================================================================================================= +// Implementation: +//================================================================================================= + + +//================================================================================================= +// ihipSignal_t: +//================================================================================================= +// +//--- +ihipSignal_t::ihipSignal_t() : _sig_id(0) { + if (hsa_signal_create(0/*value*/, 0, NULL, &_hsa_signal) != HSA_STATUS_SUCCESS) { + throw; + } + tprintf (TRACE_SIGNAL, " allocated hsa_signal=%lu\n", (_hsa_signal.handle)); +} + +//--- +ihipSignal_t::~ihipSignal_t() +{ + tprintf (TRACE_SIGNAL, " destroy hsa_signal #%lu (#%lu)\n", (_hsa_signal.handle), _sig_id); + if (hsa_signal_destroy(_hsa_signal) != HSA_STATUS_SUCCESS) { + throw; // TODO + } +}; + + + +//================================================================================================= +// ihipStream_t: +//================================================================================================= +//--- +ihipStream_t::ihipStream_t(unsigned device_index, hc::accelerator_view av, unsigned int flags) : + _av(av), + _flags(flags), + _device_index(device_index), + _last_copy_signal(0), + _signalCursor(0), + _stream_sig_id(0), + _oldest_live_sig_id(1) +{ + tprintf(TRACE_SYNC, " streamCreate: stream=%p\n", this); + _signalPool.resize(HIP_STREAM_SIGNALS > 0 ? HIP_STREAM_SIGNALS : 1); + + resetToEmpty(); +}; + + +//--- +ihipStream_t::~ihipStream_t() +{ + _signalPool.clear(); +} + + +//--- +// Reset the stream to "empty" - next command will not set up an inpute dependency on any older signal. +void ihipStream_t::resetToEmpty() +{ + _last_command_type = ihipCommandCopyH2D; + _last_copy_signal = NULL; +} + +//--- +void ihipStream_t::reclaimSignals(SIGSEQNUM sigNum) +{ + tprintf(TRACE_SIGNAL, "reclaim signal #%lu\n", sigNum); + // Mark all signals older and including this one as available for + _oldest_live_sig_id = sigNum+1; +} + + +//--- +void ihipStream_t::waitAndReclaimOlder(ihipSignal_t *signal) +{ + hsa_signal_wait_acquire(_last_copy_signal->_hsa_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + + reclaimSignals(_last_copy_signal->_sig_id); + +} + + +//--- +//Wait for all queues kernels in the associated accelerator_view to complete. +void ihipStream_t::wait() +{ + tprintf (TRACE_SYNC, "stream %p wait for queue-empty and lastCopy:#%lu...\n", this, _last_copy_signal ? _last_copy_signal->_sig_id: 0x0 ); + _av.wait(); + if (_last_copy_signal) { + this->waitAndReclaimOlder(_last_copy_signal); + } + + resetToEmpty(); +}; + + +//--- +inline ihipDevice_t * ihipStream_t::getDevice() const +{ + if (ihipIsValidDevice(_device_index)) { + return &g_devices[_device_index]; + } else { + return NULL; + } +}; + + +//--- +// Allocate a new signal from the signal pool. +// Returned signals have value of 0. +// Signals are intended for use in this stream and are always reclaimed "in-order". +ihipSignal_t *ihipStream_t::getSignal() +{ + int numToScan = _signalPool.size(); + do { + auto thisCursor = _signalCursor; + if (++_signalCursor == _signalPool.size()) { + _signalCursor = 0; + } + + if (_signalPool[thisCursor]._sig_id < _oldest_live_sig_id) { + _signalPool[thisCursor]._index = thisCursor; + _signalPool[thisCursor]._sig_id = ++_stream_sig_id; // allocate it. + + + return &_signalPool[thisCursor]; + } + + } while (--numToScan) ; + + assert(numToScan == 0); + + // Have to grow the pool: + _signalCursor = _signalPool.size(); // set to the beginning of the new entries: + _signalPool.resize(_signalPool.size() * 2); + tprintf (TRACE_SIGNAL, "grow signal pool to %zu entries, cursor=%d\n", _signalPool.size(), _signalCursor); + return getSignal(); // try again, + + // Should never reach here. + assert(0); +} + + +//--- +void ihipStream_t::enqueueBarrier(hsa_queue_t* queue, ihipSignal_t *depSignal) +{ + + // Obtain the write index for the command queue + uint64_t index = hsa_queue_load_write_index_relaxed(queue); + const uint32_t queueMask = queue->size - 1; + + // Define the barrier packet to be at the calculated queue index address + hsa_barrier_and_packet_t* barrier = &(((hsa_barrier_and_packet_t*)(queue->base_address))[index&queueMask]); + memset(barrier, 0, sizeof(hsa_barrier_and_packet_t)); + + // setup header + uint16_t header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; + header |= 1 << HSA_PACKET_HEADER_BARRIER; + //header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + //header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + barrier->header = header; + + barrier->dep_signal[0] = depSignal->_hsa_signal; + + barrier->completion_signal.handle = 0; + + // TODO - check queue overflow, return error: + // Increment write index and ring doorbell to dispatch the kernel + hsa_queue_store_write_index_relaxed(queue, index+1); + hsa_signal_store_relaxed(queue->doorbell_signal, index); +} + + +//-- +//When the commands in a stream change types (ie kernel command follows a data command, +//or data command follows a kernel command), then we need to add a barrier packet +//into the stream to mimic CUDA stream semantics. (some hardware uses separate +//queues for data commands and kernel commands, and no implicit ordering is provided). +// +inline bool ihipStream_t::preKernelCommand() +{ + _mutex.lock(); // will be unlocked in postKernelCommand + + bool addedSync = false; + // If switching command types, we need to add a barrier packet to synchronize things. + if (_last_command_type != ihipCommandKernel) { + if (_last_copy_signal) { + addedSync = true; + + hsa_queue_t * q = (hsa_queue_t*)_av.get_hsa_queue(); + if (! HIP_DISABLE_HW_KERNEL_DEP) { + this->enqueueBarrier(q, _last_copy_signal); + tprintf (TRACE_SYNC, "stream %p switch %s to %s (barrier pkt inserted with wait on #%lu)\n", + this, ihipCommandName[_last_command_type], ihipCommandName[ihipCommandKernel], _last_copy_signal->_sig_id) + + } else { + tprintf (TRACE_SYNC, "stream %p switch %s to %s (wait for previous...)\n", + this, ihipCommandName[_last_command_type], ihipCommandName[ihipCommandKernel]); + this->waitAndReclaimOlder(_last_copy_signal); + } + } + _last_command_type = ihipCommandKernel; + } + + return addedSync; +} + + +//--- +inline void ihipStream_t::postKernelCommand(hc::completion_future &kernelFuture) +{ + _last_kernel_future = kernelFuture; + + _mutex.unlock(); +}; + + + +//--- +// Called whenever a copy command is set to the stream. +// Examines the last command sent to this stream and returns a signal to wait on, if required. +inline int ihipStream_t::copyCommand(ihipSignal_t *lastCopy, hsa_signal_t *waitSignal, ihipCommand_t copyType) +{ + int needSync = 0; + + waitSignal->handle = 0; + // If switching command types, we need to add a barrier packet to synchronize things. + if (_last_command_type != copyType) { + + + if (_last_command_type == ihipCommandKernel) { + tprintf (TRACE_SYNC, "stream %p switch %s to %s (async copy dep on prev kernel)\n", + this, ihipCommandName[_last_command_type], ihipCommandName[copyType]); + needSync = 1; + hsa_signal_t *hsaSignal = (static_cast (_last_kernel_future.get_native_handle())); + if (hsaSignal) { + *waitSignal = * hsaSignal; + } + } else if (_last_copy_signal) { + needSync = 1; + tprintf (TRACE_SYNC, "stream %p switch %s to %s (async copy dep on other copy #%lu)\n", + this, ihipCommandName[_last_command_type], ihipCommandName[copyType], _last_copy_signal->_sig_id); + *waitSignal = _last_copy_signal->_hsa_signal; + } + + if (HIP_DISABLE_HW_COPY_DEP && needSync) { + // do the wait here on the host, and disable the device-side command resolution. + hsa_signal_wait_acquire(*waitSignal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + needSync = 0; + } + + _last_command_type = copyType; + } + + _last_copy_signal = lastCopy; + + return needSync; +} + + +//================================================================================================= +// +//Reset the device - this is called from hipDeviceReset. +//Device may be reset multiple times, and may be reset after init. +void ihipDevice_t::reset() +{ + _staging_buffer[0] = new StagingBuffer(this, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS); + _staging_buffer[1] = new StagingBuffer(this, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS); +}; + + +//--- +void ihipDevice_t::init(unsigned device_index, hc::accelerator acc) +{ + _device_index = device_index; + _acc = acc; hsa_agent_t *agent = static_cast (acc.get_default_view().get_hsa_agent()); if (agent) { int err = hsa_agent_get_info(*agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &_compute_units); @@ -162,30 +595,30 @@ ihipDevice_t::ihipDevice_t(unsigned device_index, hc::accelerator acc) _null_stream = new ihipStream_t(device_index, acc.get_default_view(), hipStreamDefault); this->_streams.push_back(_null_stream); tprintf(TRACE_SYNC, "created device with null_stream=%p\n", _null_stream); + + hsa_signal_create(0, 0, NULL, &_copy_signal); + + this->reset(); }; -#if 0 + ihipDevice_t::~ihipDevice_t() { if (_null_stream) { delete _null_stream; _null_stream = NULL; } + + for (int i=0; i<2; i++) { + if (_staging_buffer[i]) { + delete _staging_buffer[i]; + } + } + hsa_signal_destroy(_copy_signal); } -#endif //---- -//================================================================================================= -//TLS - must be initialized here. -thread_local hipError_t tls_lastHipError = hipSuccess; -thread_local int tls_defaultDevice = 0; - -// Global initialization. -std::once_flag hip_initialized; -std::vector g_devices; // Vector of all non-emulated (ie GPU) accelerators in the system. - -//================================================================================================= @@ -207,66 +640,71 @@ void error_check(hsa_status_t hsa_error_code, int line_num, std::string str) { hsa_status_t get_region_info(hsa_region_t region, void* data) { - hsa_status_t err; hipDeviceProp_t* p_prop = reinterpret_cast(data); - uint32_t region_segment ; + uint32_t region_segment; + // Get region segment + err = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, ®ion_segment); + ErrorCheck(err); - // Get region segment - err = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, ®ion_segment); - ErrorCheck(err); - - switch(region_segment) - { - case HSA_REGION_SEGMENT_READONLY: - err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->totalConstMem)); - - break; - - /* case HSA_REGION_SEGMENT_PRIVATE: - cout<<"PRIVATE"<sharedMemPerBlock)); - - break; - - default: - break; - } + switch(region_segment) { + case HSA_REGION_SEGMENT_READONLY: + err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->totalConstMem)); break; + /* case HSA_REGION_SEGMENT_PRIVATE: + cout<<"PRIVATE"<sharedMemPerBlock)); break; + default: break; + } return HSA_STATUS_SUCCESS; - } +// Determines if the given agent is of type HSA_DEVICE_TYPE_GPU and counts it. +static hsa_status_t countGpuAgents(hsa_agent_t agent, void *data) { + if (data == NULL) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + hsa_device_type_t device_type; + hsa_status_t status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + if (status != HSA_STATUS_SUCCESS) { + return status; + } + if (device_type == HSA_DEVICE_TYPE_GPU) { + (*static_cast(data))++; + } + return HSA_STATUS_SUCCESS; +} // Internal version, hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) { hipError_t e = hipSuccess; + hsa_status_t err; // Set some defaults in case we don't find the appropriate regions: prop->totalGlobalMem = 0; prop->totalConstMem = 0; prop->sharedMemPerBlock = 0; prop-> maxThreadsPerMultiProcessor = 0; - // - // prop->regsPerBlock = 0; - - hsa_status_t err; - if (_hsa_agent.handle == -1) { return hipErrorInvalidDevice; } + // Iterates over the agents to determine Multiple GPU devices + // using the countGpuAgents callback. + int gpuAgentsCount = 0; + err = hsa_iterate_agents(countGpuAgents, &gpuAgentsCount); + if (err == HSA_STATUS_INFO_BREAK) { err = HSA_STATUS_SUCCESS; } + DeviceErrorCheck(err); + prop->isMultiGpuBoard = 0 ? gpuAgentsCount < 2 : 1; // Get agent name err = hsa_agent_get_info(_hsa_agent, HSA_AGENT_INFO_NAME, &(prop->name)); DeviceErrorCheck(err); - // Get agent node uint32_t node; err = hsa_agent_get_info(_hsa_agent, HSA_AGENT_INFO_NODE, &node); @@ -276,12 +714,10 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) err = hsa_agent_get_info(_hsa_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE,&prop->warpSize); DeviceErrorCheck(err); - // Get max total number of work-items in a workgroup err = hsa_agent_get_info(_hsa_agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &prop->maxThreadsPerBlock ); DeviceErrorCheck(err); - // Get max number of work-items of each dimension of a work-group uint16_t work_group_max_dim[3]; err = hsa_agent_get_info(_hsa_agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, work_group_max_dim); @@ -290,7 +726,6 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) prop->maxThreadsDim[i]= work_group_max_dim[i]; } - hsa_dim3_t grid_max_dim; err = hsa_agent_get_info(_hsa_agent, HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim); DeviceErrorCheck(err); @@ -298,9 +733,8 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) prop->maxGridSize[1]= (int) ((grid_max_dim.y == UINT32_MAX) ? (INT32_MAX) : grid_max_dim.y); prop->maxGridSize[2]= (int) ((grid_max_dim.z == UINT32_MAX) ? (INT32_MAX) : grid_max_dim.z); - // Get Max clock frequency - err = hsa_agent_get_info(_hsa_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY,&prop->clockRate); + err = hsa_agent_get_info(_hsa_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &prop->clockRate); prop->clockRate *= 1000.0; // convert Mhz to Khz. DeviceErrorCheck(err); @@ -310,52 +744,70 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) //prop->clockInstructionRate = counterHz / 1000; prop->clockInstructionRate = 100*1000; /* TODO-RT - hard-code until HSART has function to properly report clock */ + // Get Agent BDFID (bus/device/function ID) + uint16_t bdf_id = 1; + err = hsa_agent_get_info(_hsa_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &bdf_id); + DeviceErrorCheck(err); + // BDFID is 16bit uint: [8bit - BusID | 5bit - Device ID | 3bit - Function/DomainID] + // TODO/Clarify: cudaDeviceProp::pciDomainID how to report? + // prop->pciDomainID = bdf_id & 0x7; + prop->pciDeviceID = (bdf_id>>3) & 0x1F; + prop->pciBusID = (bdf_id>>8) & 0xFF; // Masquerade as a 3.0-level device. This will change as more HW functions are properly supported. // Application code should use the arch.has* to do detailed feature detection. prop->major = 2; prop->minor = 0; - // Get number of Compute Unit err = hsa_agent_get_info(_hsa_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &(prop->multiProcessorCount)); DeviceErrorCheck(err); - // TODO-hsart - this appears to return 0? uint32_t cache_size[4]; err = hsa_agent_get_info(_hsa_agent, HSA_AGENT_INFO_CACHE_SIZE, cache_size); DeviceErrorCheck(err); prop->l2CacheSize = cache_size[1]; - /* Computemode for HSA Devices is always : cudaComputeModeDefault :/ - Default compute mode (Multiple threads can use cudaSetDevice() with this device) */ + /* Computemode for HSA Devices is always : cudaComputeModeDefault */ prop->computeMode = 0; + // Get Max Threads Per Multiprocessor +/* + HsaSystemProperties props; + hsaKmtReleaseSystemProperties(); + if(HSAKMT_STATUS_SUCCESS == hsaKmtAcquireSystemProperties(&props)) { + HsaNodeProperties node_prop = {0}; + if(HSAKMT_STATUS_SUCCESS == hsaKmtGetNodeProperties(node, &node_prop)) { + uint32_t waves_per_cu = node_prop.MaxWavesPerSIMD; + prop-> maxThreadsPerMultiProcessor = prop->warpsize*waves_per_cu; + } + } +*/ - -/* HsaSystemProperties props; - hsaKmtReleaseSystemProperties(); - if(HSAKMT_STATUS_SUCCESS == hsaKmtAcquireSystemProperties(&props)) - { - HsaNodeProperties node_prop = {0}; - if(HSAKMT_STATUS_SUCCESS == hsaKmtGetNodeProperties(node, &node_prop)) - { - uint32_t waves_per_cu = node_prop.MaxWavesPerSIMD; - prop-> maxThreadsPerMultiProcessor = prop->warpsize*waves_per_cu; - } - } */ - - // get memory properties */ - - err = hsa_agent_iterate_regions(_hsa_agent,get_region_info,prop); + // Get memory properties + err = hsa_agent_iterate_regions(_hsa_agent, get_region_info, prop); DeviceErrorCheck(err); - // Get the size of the region we are using for Accelerator Memory allocations: - hsa_region_t *am_region = static_cast (_acc.get_hsa_am_region()); - err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &(prop->totalGlobalMem)); + hsa_region_t *am_region = static_cast(_acc.get_hsa_am_region()); + err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem); + DeviceErrorCheck(err); + // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. + // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size. + prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; + +#if USE_ROCR_V2 + // Get Max memory clock frequency + //err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate); + DeviceErrorCheck(err); + prop->memoryClockRate *= 1000.0; // convert Mhz to Khz. + + // Get global memory bus width in bits + //err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth); + DeviceErrorCheck(err); +#endif // Set feature flags - these are all mandatory for HIP on HCC path: // Some features are under-development and future revs may support flags that are currently 0. @@ -363,22 +815,18 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) prop->arch.hasGlobalInt32Atomics = 1; prop->arch.hasGlobalFloatAtomicExch = 1; - prop->arch.hasSharedInt32Atomics = 0; // TODO-hcc-atomics - prop->arch.hasSharedFloatAtomicExch = 0; // TODO-hcc-atomics + prop->arch.hasSharedInt32Atomics = 1; + prop->arch.hasSharedFloatAtomicExch = 1; prop->arch.hasFloatAtomicAdd = 0; prop->arch.hasGlobalInt64Atomics = 1; - prop->arch.hasSharedInt64Atomics = 0; // TODO-hcc-atomics - + prop->arch.hasSharedInt64Atomics = 1; prop->arch.hasDoubles = 1; // TODO - true for Fiji. - prop->arch.hasWarpVote = 1; prop->arch.hasWarpBallot = 1; prop->arch.hasWarpShuffle = 1; prop->arch.hasFunnelShift = 0; // TODO-hcc - prop->arch.hasThreadFenceSystem = 0; // TODO-hcc prop->arch.hasSyncThreadsExt = 0; // TODO-hcc - prop->arch.hasSurfaceFuncs = 0; // TODO-hcc prop->arch.has3dGrid = 1; prop->arch.hasDynamicParallelism = 0; @@ -409,17 +857,43 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c env = getenv(var_name2); } - // Default is set when variable is initialized (at top of this file), so only override if we find - // an environment variable. - if (env) { - long int v = strtol(env, NULL, 0); - *var_ptr = (int) (v); + // Check if the environment variable is either HIP_VISIBLE_DEVICES or CUDA_LAUNCH_BLOCKING, which + // contains a sequence of comma-separated device IDs + if (!(strcmp(var_name1,"HIP_VISIBLE_DEVICES") && strcmp(var_name2, "CUDA_VISIBLE_DEVICES")) && env){ + // Parse the string stream of env and store the device ids to g_hip_visible_devices global variable + std::string str = env; + std::istringstream ss(str); + std::string device_id; + // Clean up the defult value + g_hip_visible_devices.clear(); + g_visible_device = true; + // Read the visible device numbers + while (std::getline(ss, device_id, ',')) { + if (atoi(device_id.c_str()) >= 0) { + g_hip_visible_devices.push_back(atoi(device_id.c_str())); + }else// Any device number after invalid number will not present + break; + } + // Print out the number of ids + if (HIP_PRINT_ENV) { + printf ("%-30s = ", var_name1); + for(int i=0;i= deviceCnt){ + // Make sure any DeviceID after invalid DeviceID will be erased. + g_hip_visible_devices.resize(i); + break; } } - /* - * Environment variables - */ - READ_ENV_I(release, HIP_PRINT_ENV, 0, "Print HIP environment variables."); - READ_ENV_I(release, HIP_TRACE_API, 0, "Trace each HIP API call. Print function name and return code to stderr as program executes."); - READ_ENV_I(release, HIP_LAUNCH_BLOCKING, CUDA_LAUNCH_BLOCKING, "Make HIP APIs 'host-synchronous', so they block until any kernel launches or data copy commands complete. Alias: CUDA_LAUNCH_BLOCKING." ); + g_devices = new ihipDevice_t[deviceCnt]; + g_deviceCnt = 0; + for (int i=0; i"); @@ -474,16 +989,22 @@ void ihipInit() INLINE bool ihipIsValidDevice(unsigned deviceIndex) { // deviceIndex is unsigned so always > 0 - return (deviceIndex < g_devices.size()); + return (deviceIndex < g_deviceCnt); } +/*// check if the device ID is set as visible*/ +//INLINE bool ihipIsVisibleDevice(unsigned deviceIndex) +//{ + //return std::find(g_hip_visible_devices.begin(), g_hip_visible_devices.end(), + //(int)deviceIndex) != g_hip_visible_devices.end(); +/*}*/ //--- INLINE ihipDevice_t *ihipGetTlsDefaultDevice() { // If this is invalid, the TLS state is corrupt. // This can fire if called before devices are initialized. - // TODO - consider replacing assert with error code + // TODO - consider replacing assert with error code assert (ihipIsValidDevice(tls_defaultDevice)); return &g_devices[tls_defaultDevice]; @@ -493,7 +1014,7 @@ INLINE ihipDevice_t *ihipGetTlsDefaultDevice() //--- INLINE ihipDevice_t *ihipGetDevice(int deviceId) { - if ((deviceId >= 0) && (deviceId < g_devices.size())) { + if ((deviceId >= 0) && (deviceId < g_deviceCnt)) { return &g_devices[deviceId]; } else { return NULL; @@ -508,7 +1029,7 @@ static inline void ihipWaitAllStreams(ihipDevice_t *device) { tprintf(TRACE_SYNC, "waitAllStream\n"); for (auto streamI=device->_streams.begin(); streamI!=device->_streams.end(); streamI++) { - (*streamI)->_av.wait(); + (*streamI)->wait(); } } @@ -524,7 +1045,7 @@ inline void ihipWaitNullStream(ihipDevice_t *device) if (!(stream->_flags & hipStreamNonBlocking)) { // TODO-hcc - use blocking or active wait here? // TODO-sync - cudaDeviceBlockingSync - stream->_av.wait(); + stream->wait(); } } } @@ -547,78 +1068,35 @@ inline hipStream_t ihipSyncAndResolveStream(hipStream_t stream) } } -#if 0 -inline hsa_status_t -HSABarrier::enqueueBarrier(hsa_queue_t* queue) { - hsa_status_t status = HSA_STATUS_SUCCESS; - - hc::completion_future marker = stream->_av.create_marker(); - - // Create a signal to wait for the barrier to finish. - std::pair ret = Kalmar::ctx.getSignal(); - signal = ret.first; - signalIndex = ret.second; - - // Obtain the write index for the command queue - uint64_t index = hsa_queue_load_write_index_relaxed(queue); - const uint32_t queueMask = queue->size - 1; - - // Define the barrier packet to be at the calculated queue index address - hsa_barrier_and_packet_t* barrier = &(((hsa_barrier_and_packet_t*)(queue->base_address))[index&queueMask]); - memset(barrier, 0, sizeof(hsa_barrier_and_packet_t)); - - // setup header - uint16_t header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; - header |= 1 << HSA_PACKET_HEADER_BARRIER; - header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; - header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; - barrier->header = header; - - barrier->completion_signal = signal; - - // Increment write index and ring doorbell to dispatch the kernel - hsa_queue_store_write_index_relaxed(queue, index+1); - hsa_signal_store_relaxed(queue->doorbell_signal, index); - - isDispatched = true; - - return status; -} -#endif - -//-- -//When the commands in a stream change types (ie kernel command follows a data command, -//or data command follows a kernel command), then we need to add a barrier packet -//into the stream to mimic CUDA stream semantics. (some hardware uses separate -//queues for data commands and kernel commands, and no implicit ordering is provided). -// -inline bool ihipCheckCommandSwitchSync(hipStream_t stream, ihipCommand_t new_command, hc::completion_future *marker) -{ - bool addedSync = false; - // If switching command types, we need to add a barrier packet to synchronize things. - if (stream->_last_command != new_command) { - addedSync = true; - *marker = stream->_av.create_marker(); - - tprintf (TRACE_SYNC, "stream %p switch to %s (barrier pkt inserted)\n", (void*)stream, new_command == ihipCommandKernel ? "Kernel" : "Data"); - stream->_last_command = new_command; - } - - return addedSync; -} + + + + +// TODO - data-up to data-down: // Called just before a kernel is launched from hipLaunchKernel. // Allows runtime to track some information about the stream. -hc::accelerator_view *ihipLaunchKernel(hipStream_t stream) +hipStream_t ihipPreLaunchKernel(hipStream_t stream, hc::accelerator_view **av) { - stream = ihipSyncAndResolveStream(stream); - hc::completion_future marker; - ihipCheckCommandSwitchSync(stream, ihipCommandKernel, &marker); + stream->preKernelCommand(); - return &(stream->_av); + *av = &stream->_av; + + return (stream); +} + + +//--- +//Called after kernel finishes execution. +void ihipPostLaunchKernel(hipStream_t stream, hc::completion_future &kernelFuture) +{ + stream->postKernelCommand(kernelFuture); + if (HIP_LAUNCH_BLOCKING) { + tprintf(TRACE_SYNC, " stream:%p LAUNCH_BLOCKING for kernel completion\n", stream); + } } @@ -660,7 +1138,7 @@ hipError_t hipGetDeviceCount(int *count) { std::call_once(hip_initialized, ihipInit); - *count = g_devices.size(); + *count = g_deviceCnt; if (*count > 0) { return ihipLogStatus(hipSuccess); @@ -749,7 +1227,7 @@ hipError_t hipSetDevice(int device) { std::call_once(hip_initialized, ihipInit); - if ((device < 0) || (device > g_devices.size())) { + if ((device < 0) || (device >= g_deviceCnt)) { return ihipLogStatus(hipErrorInvalidDevice); } else { tls_defaultDevice = device; @@ -775,17 +1253,38 @@ hipError_t hipDeviceSynchronize(void) //--- /** * @return @ref hipSuccess - * @bug On HCC, hipDeviceReset is a nop and does not reset the device state. */ hipError_t hipDeviceReset(void) { std::call_once(hip_initialized, ihipInit); + ihipDevice_t *device = ihipGetTlsDefaultDevice(); + // TODO-HCC - // This function needs some support from HSART and KFD. - // It should destroy and clean up all resources allocated with the default device in the current process. - // and needs to destroy all queues as well. - // + // This function currently does a user-level cleanup of known resources. + // It could benefit from KFD support to perform a more "nuclear" clean that would include any associated kernel resources and page table entries. + + + //--- + //Wait for pending activity to complete? + //TODO - check if this is required behavior: + for (auto streamI=device->_streams.begin(); streamI!=device->_streams.end(); streamI++) { + ihipStream_t *stream = *streamI; + stream->wait(); + } + + // Reset and remove streams: + device->_streams.clear(); + + +#if USE_AM_TRACKER + if (device) { + am_memtracker_reset(device->_acc); + device->reset(); // re-allocate required resources. + } +#endif + + // TODO - reset all streams on the device. return ihipLogStatus(hipSuccess); } @@ -827,6 +1326,10 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) *pi = prop->regsPerBlock; break; case hipDeviceAttributeClockRate: *pi = prop->clockRate; break; + case hipDeviceAttributeMemoryClockRate: + *pi = prop->memoryClockRate; break; + case hipDeviceAttributeMemoryBusWidth: + *pi = prop->memoryBusWidth; break; case hipDeviceAttributeMultiprocessorCount: *pi = prop->multiProcessorCount; break; case hipDeviceAttributeComputeMode: @@ -839,6 +1342,16 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) *pi = prop->major; break; case hipDeviceAttributeComputeCapabilityMinor: *pi = prop->minor; break; + case hipDeviceAttributePciBusId: + *pi = prop->pciBusID; break; + case hipDeviceAttributeConcurrentKernels: + *pi = prop->concurrentKernels; break; + case hipDeviceAttributePciDeviceId: + *pi = prop->pciDeviceID; break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + *pi = prop->maxSharedMemoryPerMultiProcessor; break; + case hipDeviceAttributeIsMultiGpuBoard: + *pi = prop->isMultiGpuBoard; break; default: e = hipErrorInvalidValue; break; } @@ -915,6 +1428,7 @@ const char *hipGetErrorName(hipError_t hip_error) case hipErrorInvalidValue : return "hipErrorInvalidValue"; case hipErrorInvalidResourceHandle : return "hipErrorInvalidResourceHandle"; case hipErrorInvalidDevice : return "hipErrorInvalidDevice"; + case hipErrorInvalidMemcpyDirection : return "hipErrorInvalidMemcpyDirection"; case hipErrorNoDevice : return "hipErrorNoDevice"; case hipErrorNotReady : return "hipErrorNotReady"; case hipErrorUnknown : return "hipErrorUnknown"; @@ -978,7 +1492,7 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int { // Super-conservative version of this - TODO - remove me: - stream->_av.wait(); + stream->wait(); e = hipSuccess; } @@ -996,7 +1510,7 @@ hipError_t hipStreamSynchronize(hipStream_t stream) ihipDevice_t *device = ihipGetTlsDefaultDevice(); ihipWaitNullStream(device); } else { - stream->_av.wait(); + stream->wait(); e = hipSuccess; } @@ -1015,20 +1529,25 @@ hipError_t hipStreamDestroy(hipStream_t stream) hipError_t e = hipSuccess; - if (ihipIsValidDevice(stream->_device_index)) { - - ihipDevice_t *device = &g_devices[stream->_device_index]; - - device->_streams.remove(stream); - - delete stream; - + //--- Drain the stream: + if (stream == NULL) { + ihipDevice_t *device = ihipGetTlsDefaultDevice(); + ihipWaitNullStream(device); + } else { + stream->wait(); e = hipSuccess; + } + + ihipDevice_t *device = stream->getDevice(); + + if (device) { + device->_streams.remove(stream); + delete stream; } else { e = hipErrorInvalidResourceHandle; } - return ihipLogStatus(hipSuccess); + return ihipLogStatus(e); } @@ -1069,6 +1588,8 @@ hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) eh->_state = hipEventStatusCreated; eh->_stream = NULL; eh->_flags = flags; + eh->_timestamp = 0; + eh->_copy_seq_id = 0; } else { e = hipErrorInvalidValue; } @@ -1103,6 +1624,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) // Clear timestamps eh->_timestamp = 0; eh->_marker = stream->_av.create_marker(); + eh->_copy_seq_id = stream->lastCopySeqId(); return ihipLogStatus(hipSuccess); } @@ -1150,6 +1672,8 @@ hipError_t hipEventSynchronize(hipEvent_t event) #else eh->_marker.wait(); #endif + eh->_stream->reclaimSignals(eh->_copy_seq_id); + return ihipLogStatus(hipSuccess); } } else { @@ -1165,7 +1689,7 @@ void ihipSetTs(hipEvent_t e) // already recorded, done: return; } else { - // Test this code: + // TODO - use completion-future functions to obtain ticks and timestamps: hsa_signal_t *sig = static_cast (eh->_marker.get_native_handle()); if (sig) { if (hsa_signal_load_acquire(*sig) == 0) { @@ -1246,13 +1770,100 @@ hipError_t hipEventQuery(hipEvent_t event) // Memory // // +// + +//--- +/** + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr) +{ + std::call_once(hip_initialized, ihipInit); + + hipError_t e = hipSuccess; + +#if USE_AM_TRACKER + hc::accelerator acc; + hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); + am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr); + if (status == AM_SUCCESS) { + + attributes->memoryType = amPointerInfo._isInDeviceMem ? hipMemoryTypeDevice: hipMemoryTypeHost; + attributes->hostPointer = amPointerInfo._hostPointer; + attributes->devicePointer = amPointerInfo._devicePointer; + attributes->isManaged = 0; + if(attributes->memoryType == hipMemoryTypeHost){ + attributes->hostPointer = ptr; + } + if(attributes->memoryType == hipMemoryTypeDevice){ + attributes->devicePointer = ptr; + } + attributes->allocationFlags = amPointerInfo._appAllocationFlags; + attributes->device = amPointerInfo._appId; + + if (attributes->device < 0) { + e = hipErrorInvalidDevice; + } + + + } else { + attributes->memoryType = hipMemoryTypeDevice; + attributes->hostPointer = 0; + attributes->devicePointer = 0; + attributes->device = -1; + attributes->isManaged = 0; + attributes->allocationFlags = 0; + + e = hipErrorInvalidValue; + } +#else + e = hipErrorInvalidValue; +#endif + + return ihipLogStatus(e); +} + + +#if USE_AM_TRACKER +// TODO - test this function: +/** + * @returns #hipSuccess, + * @returns #hipErrorInvalidValue if flags are not 0 + * @returns #hipErrorMemoryAllocation if hostPointer is not a tracked allocation. + */ +hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsigned flags) +{ + std::call_once(hip_initialized, ihipInit); + + hipError_t e = hipSuccess; + + // Flags must be 0: + if (flags == 0) { + e = hipErrorInvalidValue; + } else { + hc::accelerator acc; + hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); + am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPointer); + if (status == AM_SUCCESS) { + *devicePointer = amPointerInfo._devicePointer; + } else { + e = hipErrorMemoryAllocation; + *devicePointer = NULL; + } + } + + return ihipLogStatus(e); +} +#endif + + // kernel for launching memcpy operations: template hc::completion_future ihipMemcpyKernel(hipStream_t stream, T * c, const T * a, size_t sizeBytes) { - int wg = std::min((unsigned)8, g_devices[stream->_device_index]._compute_units); + int wg = std::min((unsigned)8, stream->getDevice()->_compute_units); const int threads_per_wg = 256; int threads = wg * threads_per_wg; @@ -1289,7 +1900,7 @@ template hc::completion_future ihipMemsetKernel(hipStream_t stream, T * ptr, T val, size_t sizeBytes) { - int wg = std::min((unsigned)8, g_devices[stream->_device_index]._compute_units); + int wg = std::min((unsigned)8, stream->getDevice()->_compute_units); const int threads_per_wg = 256; int threads = wg * threads_per_wg; @@ -1322,24 +1933,33 @@ ihipMemsetKernel(hipStream_t stream, T * ptr, T val, size_t sizeBytes) } //--- +/** + * @returns #hipSuccess #hipErrorMemoryAllocation + */ hipError_t hipMalloc(void** ptr, size_t sizeBytes) { std::call_once(hip_initialized, ihipInit); hipError_t hip_status = hipSuccess; - const unsigned am_flags = 0; - *ptr = hc::am_alloc(sizeBytes, ihipGetTlsDefaultDevice()->_acc, am_flags); + auto device = ihipGetTlsDefaultDevice(); - if (*ptr == NULL) { - hip_status = hipErrorMemoryAllocation; + if (device) { + const unsigned am_flags = 0; + *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); + + if (sizeBytes && (*ptr == NULL)) { + hip_status = hipErrorMemoryAllocation; + } else { +#if USE_AM_TRACKER + hc::am_memtracker_update(*ptr, device->_device_index, 0); +#endif + } } else { - hip_status = hipSuccess; + hip_status = hipErrorMemoryAllocation; } - ihipLogStatus(hip_status); - - return hip_status; + return ihipLogStatus(hip_status); } @@ -1347,42 +1967,32 @@ hipError_t hipMallocHost(void** ptr, size_t sizeBytes) { std::call_once(hip_initialized, ihipInit); -#if USE_PINNED_HOST + hipError_t hip_status = hipSuccess; const unsigned am_flags = amHostPinned; + auto device = ihipGetTlsDefaultDevice(); - *ptr = hc::am_alloc(sizeBytes, ihipGetTlsDefaultDevice()->_acc, am_flags); - hipError_t hip_status = hipSuccess; - if (*ptr == NULL) { - hip_status = hipErrorMemoryAllocation; - } else { - hip_status = hipSuccess; - } - - tprintf (TRACE_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); - - ihipLogStatus(hip_status); - - return hip_status; - -#else - // TODO-hcc remove-me - - // This code only works on Kaveri: - *ptr = malloc(sizeBytes); // TODO - call am_alloc for device memory, this will only on KV HSA. - if (*ptr != NULL) { - //TODO-hsart : need memory pin APIs to implement this correctly. - // FOr now do our best to allocate the memory, but return an error since - // the returned pointer can only be used on the HOST not the GPU. - return ihipLogStatus(hipErrorMemoryAllocation); - } else { - return ihipLogStatus(hipErrorMemoryAllocation); - } + if (device) { + *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); + if (sizeBytes && (*ptr == NULL)) { + hip_status = hipErrorMemoryAllocation; + } else { +#if USE_AM_TRACKER + hc::am_memtracker_update(*ptr, device->_device_index, 0); #endif + } + + tprintf (TRACE_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); + } + + return ihipLogStatus(hip_status); } +//--- hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind) { + std::call_once(hip_initialized, ihipInit); + #ifdef USE_MEMCPYTOSYMBOL if(kind != hipMemcpyHostToDevice) { @@ -1390,15 +2000,339 @@ hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t cou } auto device = ihipGetTlsDefaultDevice(); - hc::completion_future marker; - ihipCheckCommandSwitchSync(device._null_stream, ihipCommandData, &marker); + //hsa_signal_t depSignal; + //int depSignalCnt = device._null_stream->copyCommand(NULL, &depSignal, ihipCommandCopyH2D); + assert(0); // Need to properly synchronize the copy - do something with depSignal if != NULL. device->_acc.memcpy_symbol(symbolName, (void*) src,count, offset); #endif - return ihipLogStatus(hipSuccess); + return ihipLogStatus(hipSuccess); } +//------------------------------------------------------------------------------------------------- +StagingBuffer::StagingBuffer(ihipDevice_t *device, size_t bufferSize, int numBuffers) : + _device(device), + _bufferSize(bufferSize), + _numBuffers(numBuffers > _max_buffers ? _max_buffers : numBuffers) +{ + + + + for (int i=0; i<_numBuffers; i++) { + // TODO - experiment with alignment here. + _pinnedStagingBuffer[i] = hc::am_alloc(_bufferSize, device->_acc, amHostPinned); + if (_pinnedStagingBuffer[i] == NULL) { + throw; + } + hsa_signal_create(0, 0, NULL, &_completion_signal[i]); + } +}; + +//--- +StagingBuffer::~StagingBuffer() +{ + for (int i=0; i<_numBuffers; i++) { + if (_pinnedStagingBuffer[i]) { + hc::am_free(_pinnedStagingBuffer[i]); + _pinnedStagingBuffer[i] = NULL; + } + hsa_signal_destroy(_completion_signal[i]); + } +} + + + +//Copies sizeBytes from src to dst, using either a copy to a staging buffer or a staged pin-in-place strategy +//IN: dst - dest pointer - must be accessible from host CPU. +//IN: src - src pointer for copy. Must be accessible from agent this buffer is associated with (via _device) +//IN: waitFor - hsaSignal to wait for - the copy will begin only when the specified dependency is resolved. May be NULL indicating no dependency. +void StagingBuffer::CopyHostToDevicePinInPlace(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor) +{ + const char *srcp = static_cast (src); + char *dstp = static_cast (dst); + + for (int i=0; i<_numBuffers; i++) { + hsa_signal_store_relaxed(_completion_signal[i], 0); + } + + assert(sizeBytes < UINT64_MAX/2); // TODO + int bufferIndex = 0; + for (int64_t bytesRemaining=sizeBytes; bytesRemaining>0 ; bytesRemaining -= _bufferSize) { + + size_t theseBytes = (bytesRemaining > _bufferSize) ? _bufferSize : bytesRemaining; + + tprintf (TRACE_COPY2, "H2D: waiting... on completion signal handle=%lu\n", _completion_signal[bufferIndex].handle); + hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + + tprintf (TRACE_COPY2, "H2D: bytesRemaining=%zu: pin-in-place:%p+%zu bufferIndex[%d]\n", bytesRemaining, srcp, theseBytes, bufferIndex); + + + memcpy(_pinnedStagingBuffer[bufferIndex], srcp, theseBytes); + void *locked_srcp; + hsa_status_t hsa_status = hsa_amd_memory_lock(const_cast (srcp), theseBytes, &_device->_hsa_agent, 1, &locked_srcp); + + assert (hsa_status == HSA_STATUS_SUCCESS); + + hsa_signal_store_relaxed(_completion_signal[bufferIndex], 1); + +#if USE_ROCR_V2 + hsa_status = hsa_amd_memory_async_copy(dstp, _device->_hsa_agent, locked_srcp, _device->_hsa_agent, theseBytes, waitFor ? 1:0, waitFor, _completion_signal[bufferIndex]); +#else + assert(0); +#endif + tprintf (TRACE_COPY2, "H2D: bytesRemaining=%zu: async_copy %zu bytes %p to %p status=%x\n", bytesRemaining, theseBytes, _pinnedStagingBuffer[bufferIndex], dstp, hsa_status); + + assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw + + srcp += theseBytes; + dstp += theseBytes; + if (++bufferIndex >= _numBuffers) { + bufferIndex = 0; + } + + if (HIP_ONESHOT_COPY_DEP) { + waitFor = NULL; // TODO - don't need dependency after first copy submitted? + } + } + + // TODO - + printf ("unpin the memory\n"); + + + for (int i=0; i<_numBuffers; i++) { + hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + } +} + + + + +//--- +//Copies sizeBytes from src to dst, using either a copy to a staging buffer or a staged pin-in-place strategy +//IN: dst - dest pointer - must be accessible from host CPU. +//IN: src - src pointer for copy. Must be accessible from agent this buffer is associated with (via _device) +//IN: waitFor - hsaSignal to wait for - the copy will begin only when the specified dependency is resolved. May be NULL indicating no dependency. +void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor) +{ + const char *srcp = static_cast (src); + char *dstp = static_cast (dst); + + for (int i=0; i<_numBuffers; i++) { + hsa_signal_store_relaxed(_completion_signal[i], 0); + } + + assert(sizeBytes < UINT64_MAX/2); // TODO + int bufferIndex = 0; + for (int64_t bytesRemaining=sizeBytes; bytesRemaining>0 ; bytesRemaining -= _bufferSize) { + + size_t theseBytes = (bytesRemaining > _bufferSize) ? _bufferSize : bytesRemaining; + + tprintf (TRACE_COPY2, "H2D: waiting... on completion signal handle=%lu\n", _completion_signal[bufferIndex].handle); + hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + + tprintf (TRACE_COPY2, "H2D: bytesRemaining=%zu: copy %zu bytes %p to stagingBuf[%d]:%p\n", bytesRemaining, theseBytes, srcp, bufferIndex, _pinnedStagingBuffer[bufferIndex]); + // TODO - use uncached memcpy, someday. + memcpy(_pinnedStagingBuffer[bufferIndex], srcp, theseBytes); + + + hsa_signal_store_relaxed(_completion_signal[bufferIndex], 1); + +#if USE_ROCR_V2 + hsa_status_t hsa_status = hsa_amd_memory_async_copy(dstp, _device->_hsa_agent, _pinnedStagingBuffer[bufferIndex], _device->_hsa_agent, theseBytes, waitFor ? 1:0, waitFor, _completion_signal[bufferIndex]); +#else + hsa_status_t hsa_status = hsa_amd_memory_async_copy(dstp, _pinnedStagingBuffer[bufferIndex], theseBytes, _device->_hsa_agent, 0, NULL, _completion_signal[bufferIndex]); +#endif + tprintf (TRACE_COPY2, "H2D: bytesRemaining=%zu: async_copy %zu bytes %p to %p status=%x\n", bytesRemaining, theseBytes, _pinnedStagingBuffer[bufferIndex], dstp, hsa_status); + + assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw + + srcp += theseBytes; + dstp += theseBytes; + if (++bufferIndex >= _numBuffers) { + bufferIndex = 0; + } + + if (HIP_ONESHOT_COPY_DEP) { + waitFor = NULL; // TODO - don't need dependency after first copy submitted? + } + } + + + for (int i=0; i<_numBuffers; i++) { + hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + } +} + +//--- +//Copies sizeBytes from src to dst, using either a copy to a staging buffer or a staged pin-in-place strategy +//IN: dst - dest pointer - must be accessible from agent this buffer is assocaited with (via _device). +//IN: src - src pointer for copy. Must be accessible from host CPU. +//IN: waitFor - hsaSignal to wait for - the copy will begin only when the specified dependency is resolved. May be NULL indicating no dependency. +void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor) +{ + const char *srcp0 = static_cast (src); + char *dstp1 = static_cast (dst); + + for (int i=0; i<_numBuffers; i++) { + hsa_signal_store_relaxed(_completion_signal[i], 0); + } + + assert(sizeBytes < UINT64_MAX/2); // TODO + + int64_t bytesRemaining0 = sizeBytes; // bytes to copy from dest into staging buffer. + int64_t bytesRemaining1 = sizeBytes; // bytes to copy from staging buffer into final dest + + while (bytesRemaining1 > 0) { + // First launch the async copies to copy from dest to host + for (int bufferIndex = 0; (bytesRemaining0>0) && (bufferIndex < _numBuffers); bytesRemaining0 -= _bufferSize, bufferIndex++) { + + size_t theseBytes = (bytesRemaining0 > _bufferSize) ? _bufferSize : bytesRemaining0; + + tprintf (TRACE_COPY2, "D2H: bytesRemaining0=%zu async_copy %zu bytes src:%p to staging:%p\n", bytesRemaining0, theseBytes, srcp0, _pinnedStagingBuffer[bufferIndex]); + hsa_signal_store_relaxed(_completion_signal[bufferIndex], 1); +#if USE_ROCR_V2 + hsa_status_t hsa_status = hsa_amd_memory_async_copy(_pinnedStagingBuffer[bufferIndex], _device->_hsa_agent, srcp0, _device->_hsa_agent, theseBytes, waitFor ? 1:0, waitFor, _completion_signal[bufferIndex]); +#else + hsa_status_t hsa_status = hsa_amd_memory_async_copy(_pinnedStagingBuffer[bufferIndex], srcp0, theseBytes, _device->_hsa_agent, 0, NULL, _completion_signal[bufferIndex]); +#endif + assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw + + srcp0 += theseBytes; + + + if (HIP_ONESHOT_COPY_DEP) { + waitFor = NULL; // TODO - don't need dependency after first copy submitted? + } + } + + // Now unload the staging buffers: + for (int bufferIndex=0; (bytesRemaining1>0) && (bufferIndex < _numBuffers); bytesRemaining1 -= _bufferSize, bufferIndex++) { + + size_t theseBytes = (bytesRemaining1 > _bufferSize) ? _bufferSize : bytesRemaining1; + + tprintf (TRACE_COPY2, "D2H: wait_completion[%d] bytesRemaining=%zu\n", bufferIndex, bytesRemaining1); + hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + + tprintf (TRACE_COPY2, "D2H: bytesRemaining1=%zu copy %zu bytes stagingBuf[%d]:%p to dst:%p\n", bytesRemaining1, theseBytes, bufferIndex, _pinnedStagingBuffer[bufferIndex], dstp1); + memcpy(dstp1, _pinnedStagingBuffer[bufferIndex], theseBytes); + + dstp1 += theseBytes; + } + } + + + //for (int i=0; i<_numBuffers; i++) { + // hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + //} +} + + + + +#if USE_AM_TRACKER +void ihipSyncCopy(ihipStream_t *stream, void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) +{ + ihipDevice_t *device = stream->getDevice(); + + if (device == NULL) { + throw; + } + + hc::accelerator acc; + hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0); + hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0); + + bool dstNotTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) != AM_SUCCESS); + bool srcNotTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) != AM_SUCCESS); + + + // Resolve default to a specific Kind so we know which algorithm to use: + if (kind == hipMemcpyDefault) { + bool dstIsHost = (dstNotTracked || !dstPtrInfo._isInDeviceMem); + bool srcIsHost = (srcNotTracked || !srcPtrInfo._isInDeviceMem); + if (srcIsHost && !dstIsHost) { + kind = hipMemcpyHostToDevice; + } else if (!srcIsHost && dstIsHost) { + kind = hipMemcpyDeviceToHost; + } else if (srcIsHost && dstIsHost) { + kind = hipMemcpyHostToHost; + } else if (srcIsHost && dstIsHost) { + kind = hipMemcpyDeviceToDevice; + } + } + + + if ((kind == hipMemcpyHostToDevice) && (srcNotTracked)) { + if (HIP_STAGING_BUFFERS) { + std::lock_guard l (device->_copy_lock[0]); + //printf ("staged-copy- read dep signals\n"); + + hsa_signal_t depSignal; + int depSignalCnt = stream->copyCommand(NULL, &depSignal, ihipCommandCopyH2D); + + if (HIP_PININPLACE) { + device->_staging_buffer[0]->CopyHostToDevicePinInPlace(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL); + } else { + device->_staging_buffer[0]->CopyHostToDevice(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL); + } + + // The copy waits for inputs and then completes before returning. + stream->resetToEmpty(); + } else { + // TODO - remove, slow path. + hc::am_copy(dst, src, sizeBytes); + } + } else if ((kind == hipMemcpyDeviceToHost) && (dstNotTracked)) { + if (HIP_STAGING_BUFFERS) { + std::lock_guard l (device->_copy_lock[HIP_DISABLE_BIDIR_MEMCPY ? 0:1]); + //printf ("staged-copy- read dep signals\n"); + hsa_signal_t depSignal; + int depSignalCnt = stream->copyCommand(NULL, &depSignal, ihipCommandCopyD2H); + device->_staging_buffer[1]->CopyDeviceToHost(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL); + } else { + // TODO - remove, slow path. + hc::am_copy(dst, src, sizeBytes); + } + } else if (kind == hipMemcpyHostToHost) { // TODO-refactor. + memcpy(dst, src, sizeBytes); + + } else { + ihipCommand_t copyType; + if ((kind == hipMemcpyHostToDevice) || (kind == hipMemcpyDeviceToDevice)) { + copyType = ihipCommandCopyH2D; + } else if (kind == hipMemcpyDeviceToHost) { + copyType = ihipCommandCopyD2H; + } else { + // TODO - return error condition: + //e = hipErrorInvalidMemcpyDirection; + copyType = ihipCommandCopyD2H; + } + + device->_copy_lock[HIP_DISABLE_BIDIR_MEMCPY? 0:1].lock(); + + hsa_signal_store_relaxed(device->_copy_signal, 1); + + +#if USE_ROCR_V2 + hsa_signal_t depSignal; + int depSignalCnt = stream->copyCommand(NULL, &depSignal, copyType); + hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, device->_hsa_agent, src, device->_hsa_agent, sizeBytes, depSignalCnt, depSignalCnt ? &depSignal:0x0, device->_copy_signal); +#else + hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, src, sizeBytes, device->_hsa_agent, 0, NULL, device->_copy_signal); +#endif + + if (hsa_status == HSA_STATUS_SUCCESS) { + hsa_signal_wait_relaxed(device->_copy_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + } + + device->_copy_lock[HIP_DISABLE_BIDIR_MEMCPY ? 0:1].unlock(); + + } +} +#endif + + //--- hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { @@ -1407,83 +2341,115 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); hc::completion_future marker; - ihipCheckCommandSwitchSync(stream, ihipCommandData, &marker); hipError_t e = hipSuccess; -#ifdef USE_ASYNC_COPY - if (ihipIsValidDevice(stream->_device_index)) { - - ihipDevice_t *device = &g_devices[stream->_device_index]; - - hsa_signal_t completion_signal; // init/obtain from pool. - - hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, src, size, device->_hsa_agent, 0, NULL, &completion_signal); - - e = (hsa_status == HSA_STATUS_SUCCESS) ? hipSuccess : hipErrorTbd; - } else { +#if USE_AM_TRACKER + try { + ihipSyncCopy(stream, dst, src, sizeBytes, kind); + } + catch (...) { e = hipErrorInvalidResourceHandle; } - + #else - // TODO-hsart - what synchronization does hsa_copy provide? hc::am_copy(dst, src, sizeBytes); e = hipSuccess; #endif - // TODO - when am_copy becomes async, and we have HIP_LAUNCH_BLOCKING set, then we would wait for copy operation to complete here. - return ihipLogStatus(e); } -//--- -/* +#if USE_AM_TRACKER==0 +/** * @warning on HCC hipMemcpyAsync uses a synchronous copy. */ +#endif +/** + * @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies. + */ +//--- hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { std::call_once(hip_initialized, ihipInit); hipError_t e = hipSuccess; - stream = ihipSyncAndResolveStream(stream); + stream = ihipSyncAndResolveStream(stream); - hc::completion_future marker; - ihipCheckCommandSwitchSync(stream, ihipCommandData, &marker); +#if USE_AM_TRACKER + if (stream) { + ihipDevice_t *device = stream->getDevice(); - // Dispatch async memory copy to synchronize with items in the specified stream. + if (device == NULL) { + e = hipErrorInvalidDevice; - // Async - need to set up dependency on the last command queued to the device? + } else if (kind == hipMemcpyDefault) { + e = hipErrorInvalidMemcpyDirection; - // TODO-hsart This routine needs to ensure that dst and src are mapped on the GPU. - // This is a synchronous copy - remove and replace with code below when we have appropriate LOCK APIs. - hc::am_copy(dst, src, sizeBytes); + } else if (kind == hipMemcpyHostToHost) { + tprintf (TRACE_COPY2, "H2H copy with memcpy"); -#if 0 + memcpy(dst, src, sizeBytes); - hipStream_t s =ihipGetStream(stream); + } else { + ihipSignal_t *ihip_signal = stream->getSignal(); + hsa_signal_store_relaxed(ihip_signal->_hsa_signal, 1); + + ihipCommand_t copyType; + if ((kind == hipMemcpyHostToDevice) || (kind == hipMemcpyDeviceToDevice)) { + copyType = ihipCommandCopyH2D; + } else if (kind == hipMemcpyDeviceToHost) { + copyType = ihipCommandCopyD2H; + } else { + e = hipErrorInvalidMemcpyDirection; + copyType = ihipCommandCopyD2H; + } + +#if USE_ROCR_V2 + hsa_signal_t depSignal; + int depSignalCnt = stream->copyCommand(ihip_signal, &depSignal, copyType); + + tprintf (TRACE_SYNC, " copy-async, waitFor=%lu completion=#%lu(%lu)\n", depSignalCnt? depSignal.handle:0x0, ihip_signal->_sig_id, ihip_signal->_hsa_signal.handle); + + hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, device->_hsa_agent, src, device->_hsa_agent, sizeBytes, depSignalCnt, depSignalCnt ? &depSignal:0x0, ihip_signal->_hsa_signal); +#else + hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, src, sizeBytes, device->_hsa_agent, 0, NULL, ihip_signal->_hsa_signal); +#endif - if (s) { - hc::completion_future cf = ihipMemcpyKernel (s, static_cast (dst), static_cast (src), sizeBytes); - - //cf.wait(); - - e = hipSuccess; + if (hsa_status == HSA_STATUS_SUCCESS) { + // TODO-stream - fix release-signal calls here. + if (HIP_LAUNCH_BLOCKING) { + tprintf(TRACE_SYNC, "LAUNCH_BLOCKING for completion of hipMemcpyAsync(%zu)\n", sizeBytes); + stream->wait(); + } + } else { + // This path can be hit if src or dst point to unpinned host memory. + // TODO-stream - does async-copy fall back to sync if input pointers are not pinned? + e = hipErrorInvalidValue; + } + } } else { e = hipErrorInvalidValue; } +#else + // TODO-hsart This routine needs to ensure that dst and src are mapped on the GPU. + // This is a synchronous copy - remove and replace with code below when we have appropriate LOCK APIs. + hc::am_copy(dst, src, sizeBytes); #endif - // TODO - if am_copy becomes async, and we have HIP_LAUNCH_BLOCKING set, then we would wait for copy operation to complete here. return ihipLogStatus(e); } // TODO-sync: function is async unless target is pinned host memory - then these are fully sync. +/** @return #hipErrorInvalidValue + */ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream ) { std::call_once(hip_initialized, ihipInit); @@ -1491,37 +2457,42 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s hipError_t e = hipSuccess; stream = ihipSyncAndResolveStream(stream); - hc::completion_future marker; - ihipCheckCommandSwitchSync(stream, ihipCommandData, &marker); + stream->preKernelCommand(); + if (stream) { - hc::completion_future cf ; + hc::completion_future cf ; - if ((sizeBytes & 0x3) == 0) { - // use a faster word-per-workitem copy: - try { - value = value & 0xff; - unsigned value32 = (value << 24) | (value << 16) | (value << 8) | (value) ; - cf = ihipMemsetKernel (stream, static_cast (dst), value32, sizeBytes/sizeof(unsigned)); + if ((sizeBytes & 0x3) == 0) { + // use a faster word-per-workitem copy: + try { + value = value & 0xff; + unsigned value32 = (value << 24) | (value << 16) | (value << 8) | (value) ; + cf = ihipMemsetKernel (stream, static_cast (dst), value32, sizeBytes/sizeof(unsigned)); + } + catch (std::exception &ex) { + e = hipErrorInvalidValue; + } + } else { + // use a slow byte-per-workitem copy: + try { + cf = ihipMemsetKernel (stream, static_cast (dst), value, sizeBytes); + } + catch (std::exception &ex) { + e = hipErrorInvalidValue; + } } - catch (std::exception &ex) { - e = hipErrorInvalidValue; + + stream->postKernelCommand(cf); + + + if (HIP_LAUNCH_BLOCKING) { + tprintf (TRACE_SYNC, "'%s' LAUNCH_BLOCKING wait for completion [stream:%p].\n", __func__, (void*)stream); + cf.wait(); + tprintf (TRACE_SYNC, "'%s' LAUNCH_BLOCKING completed [stream:%p].\n", __func__, (void*)stream); } } else { - // use a slow byte-per-workitem copy: - try { - cf = ihipMemsetKernel (stream, static_cast (dst), value, sizeBytes); - } - catch (std::exception &ex) { - e = hipErrorInvalidValue; - } - } - - - if (HIP_LAUNCH_BLOCKING) { - tprintf (TRACE_SYNC, "'%s' LAUNCH_BLOCKING wait for completion [stream:%p].\n", __func__, (void*)stream); - cf.wait(); - tprintf (TRACE_SYNC, "'%s' LAUNCH_BLOCKING completed [stream:%p].\n", __func__, (void*)stream); + e = hipErrorInvalidValue; } @@ -1531,15 +2502,18 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s hipError_t hipMemset(void* dst, int value, size_t sizeBytes ) { + std::call_once(hip_initialized, ihipInit); + + // TODO - call an ihip memset so HIP_TRACE is correct. return hipMemsetAsync(dst, value, sizeBytes, hipStreamNull); } /* - * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue (if free != NULL due to bug) - * @bug - on hcc free always returns 50% of peak regardless of current allocations. hipMemGetInfo returns hipErrorInvalidValue to indicate this. + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue (if free != NULL due to bug)S + * @warning On HCC, the free memory only accounts for memory allocated by this process and may be optimistic. */ -hipError_t hipMemGetInfo ( size_t * free, size_t * total ) +hipError_t hipMemGetInfo (size_t *free, size_t *total) { std::call_once(hip_initialized, ihipInit); @@ -1552,23 +2526,29 @@ hipError_t hipMemGetInfo ( size_t * free, size_t * total ) } if (free) { - *free = hipDevice->_props.totalGlobalMem * 0.5; // TODO +#if USE_AM_TRACKER + // TODO - replace with kernel-level for reporting free memory: + size_t deviceMemSize, hostMemSize, userMemSize; + hc::am_memtracker_sizeinfo(hipDevice->_acc, &deviceMemSize, &hostMemSize, &userMemSize); + *free = hipDevice->_props.totalGlobalMem - deviceMemSize; +#else + *free = hipDevice->_props.totalGlobalMem * 0.5; // TODO e=hipErrorInvalidValue; +#endif } } else { e = hipErrorInvalidDevice; } - // TODO-runtime - when we fix the 50% bug. - //return ihipLogStatus(hipErrorSuccess); - return ihipLogStatus(hipErrorInvalidValue); + return ihipLogStatus(e); } //--- hipError_t hipFree(void* ptr) { + // TODO - ensure this pointer was created by hipMalloc and not hipMallocHost std::call_once(hip_initialized, ihipInit); @@ -1585,15 +2565,12 @@ hipError_t hipFree(void* ptr) hipError_t hipFreeHost(void* ptr) { + // TODO - ensure this pointer was created by hipMallocHost and not hipMalloc std::call_once(hip_initialized, ihipInit); if (ptr) { -#if USE_PINNED_HOST tprintf (TRACE_MEM, " %s: %p\n", __func__, ptr); hc::am_free(ptr); -#else - free(ptr); -#endif } return ihipLogStatus(hipSuccess); @@ -1665,10 +2642,8 @@ hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int hipError_t hipDriverGetVersion(int *driverVersion) { std::call_once(hip_initialized, ihipInit); - *driverVersion = 4; - - return ihipLogStatus(hipSuccess); + return ihipLogStatus(hipSuccess); } @@ -1715,3 +2690,5 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a hipError_t err = hipSuccess; return ihipLogStatus(err); } + +// TODO - review signal / error reporting code. diff --git a/projects/hip/tests/README.md b/projects/hip/tests/README.md index 48b88505ef..96de22b558 100644 --- a/projects/hip/tests/README.md +++ b/projects/hip/tests/README.md @@ -1,33 +1,39 @@ Tests uses CMAKE as teh build infrastructure. Use : - -> mkdir build -> cd build -> cmake ../src -> make test - +``` +$ mkdir build +$ cd build +$ cmake ../src +$ make +$ make test +``` #----- -# How to add a new test; +### How to add a new test -# edit src/CMakeFiles to add the test: +edit src/CMakeFiles to add the test: -# add the executable and list of required CPP files, ie: -# make_test (EXE CPP_FILES) -> make_hip_executable (hipMemset hipMemset.cpp) +### add the executable and list of required CPP files, ie: +``` +make_test (EXE CPP_FILES) +make_hip_executable (hipMemset hipMemset.cpp) +``` -# Add to automated Test framework: -# make_test (TESTNAME ARGS) -> make_test(hipMemset " ") +### Add to automated Test framework: +``` +make_test (TESTNAME ARGS) +make_test(hipMemset " ") +``` - -# Running tests: +### Running tests: +``` make test +``` # Run a specific test: +``` ./hipMemset - - +``` diff --git a/projects/hip/tests/src/CMakeLists.txt b/projects/hip/tests/src/CMakeLists.txt index ba4be66a22..0037846e03 100644 --- a/projects/hip/tests/src/CMakeLists.txt +++ b/projects/hip/tests/src/CMakeLists.txt @@ -19,8 +19,10 @@ MESSAGE ("HIP_PATH=" ${HIP_PATH}) if (${HIP_PLATFORM} STREQUAL "hcc") MESSAGE ("HIP_PLATFORM=hcc") - set (HC_PATH ${HIP_PATH}/hc) - set (HSA_PATH /opt/hsa) + set (HSA_PATH $ENV{HSA_PATH}) + if (NOT DEFINED HSA_PATH) + set (HSA_PATH /opt/hsa) + endif() #--- # Add HSA library: @@ -30,7 +32,7 @@ if (${HIP_PLATFORM} STREQUAL "hcc") #These includes are used for all files. #Include HIP and HC since the tests need both of these: #Note below HSA path is surgically included only where necessary. - include_directories(${HIP_PATH}/include ${HC_PATH}/include) + include_directories(${HIP_PATH}/include) # hip_hcc.o: add_library(hip_hcc OBJECT ${HIP_PATH}/src/hip_hcc.cpp) @@ -39,6 +41,10 @@ if (${HIP_PLATFORM} STREQUAL "hcc") elseif (${HIP_PLATFORM} STREQUAL "nvcc") MESSAGE ("HIP_PLATFORM=nvcc") + + #Need C++11 for threads in some of the tests. + add_definitions(-std=c++11) + # NVCC does not not support -rdynamic option set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS ) set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS ) @@ -104,7 +110,10 @@ make_hip_executable (hip_clz hip_clz.cpp) make_hip_executable (hip_brev hip_brev.cpp) make_hip_executable (hip_ffs hip_ffs.cpp) make_hip_executable (hipGetDeviceAttribute hipGetDeviceAttribute.cpp) +make_hip_executable (hipEnvVar hipEnvVar.cpp) +make_hip_executable (hipEnvVarDriver hipEnvVarDriver.cpp) make_hip_executable (hipMemcpy hipMemcpy.cpp) +make_hip_executable (hipMemcpyAsync hipMemcpyAsync.cpp) make_hip_executable (hipMemset hipMemset.cpp) make_hip_executable (hipEventRecord hipEventRecord.cpp) make_hip_executable (hipLanguageExtensions hipLanguageExtensions.cpp) @@ -114,6 +123,9 @@ make_hip_executable (hipSimpleAtomicsTest hipSimpleAtomicsTest.cpp) make_hip_executable (hipMathFunctionsHost hipMathFunctions.cpp hipSinglePrecisionMathHost.cpp hipDoublePrecisionMathHost.cpp) make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecisionMathDevice.cpp hipDoublePrecisionMathDevice.cpp) make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp) +make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) +make_hip_executable (hipMultiThreadStreams hipMultiThreadStreams.cpp) +make_hip_executable (hipStreamL5 hipStreamL5.cpp) target_link_libraries(hipMathFunctionsHost m) make_test(hip_ballot " " ) @@ -126,11 +138,17 @@ make_test(hipEventRecord --iterations 10) make_test(hipMemset " " ) make_test(hipMemset --N 10 --memsetval 0x42 ) # small copy, just 10 bytes. make_test(hipMemset --N 10013 --memsetval 0x5a ) # oddball size. -make_test(hipMemset --N 500M --memsetval 0xa6 ) # big copy +make_test(hipMemset --N 256M --memsetval 0xa6 ) # big copy make_test(hipGridLaunch " " ) +make_test(hipEnvVarDriver " " ) +make_test(hipPointerAttrib " " ) +make_test(hipMultiThreadStreams " " ) make_test(hipMemcpy " " ) +make_test(hipMemcpyAsync " " ) make_test(hipHcc " " ) +make_test(hipStreamL5 " ") + make_hipify_test(specialFunc.cu ) diff --git a/projects/hip/tests/src/hipDoublePrecisionMathDevice.cpp b/projects/hip/tests/src/hipDoublePrecisionMathDevice.cpp index 4f36b91eb7..7e1d862392 100644 --- a/projects/hip/tests/src/hipDoublePrecisionMathDevice.cpp +++ b/projects/hip/tests/src/hipDoublePrecisionMathDevice.cpp @@ -42,7 +42,9 @@ __device__ void double_precision_math_functions() copysign(1.0, -2.0); cos(0.0); cosh(0.0); - //cospi(0.0); +#if __hcc_workweek__ >= 16073 + cospi(0.0); +#endif //cyl_bessel_i0(0.0); //cyl_bessel_i1(0.0); erf(0.0); @@ -100,7 +102,9 @@ __device__ void double_precision_math_functions() //rnorm3d(0.0, 0.0, 1.0); //rnorm4d(0.0, 0.0, 0.0, 1.0); round(0.0); - //rsqrt(1.0); +#if __hcc_workweek__ >= 16073 + rsqrt(1.0); +#endif //scalbln(0.0, 1); scalbn(0.0, 1); signbit(1.0); @@ -108,7 +112,9 @@ __device__ void double_precision_math_functions() //sincos(0.0, &fX, &fY); //sincospi(0.0, &fX, &fY); sinh(0.0); - //sinpi(0.0); +#if __hcc_workweek__ >= 16073 + sinpi(0.0); +#endif sqrt(0.0); tan(0.0); tanh(0.0); diff --git a/projects/hip/tests/src/hipDoublePrecisionMathHost.cpp b/projects/hip/tests/src/hipDoublePrecisionMathHost.cpp index 9e4c43e2be..d45423a879 100644 --- a/projects/hip/tests/src/hipDoublePrecisionMathHost.cpp +++ b/projects/hip/tests/src/hipDoublePrecisionMathHost.cpp @@ -42,7 +42,9 @@ __host__ void double_precision_math_functions() copysign(1.0, -2.0); cos(0.0); cosh(0.0); - //cospi(0.0); +#if __hcc_workweek__ >= 16073 + cospi(0.0); +#endif //cyl_bessel_i0(0.0); //cyl_bessel_i1(0.0); erf(0.0); @@ -100,7 +102,9 @@ __host__ void double_precision_math_functions() //rnorm3d(0.0, 0.0, 1.0); //rnorm4d(0.0, 0.0, 0.0, 1.0); round(0.0); - //rsqrt(1.0); +#if __hcc_workweek__ >= 16073 + rsqrt(1.0); +#endif ///scalbln(0.0, 1); scalbn(0.0, 1); signbit(1.0); @@ -108,7 +112,9 @@ __host__ void double_precision_math_functions() sincos(0.0, &fX, &fY); //sincospi(0.0, &fX, &fY); sinh(0.0); - //sinpi(0.0); +#if __hcc_workweek__ >= 16073 + sinpi(0.0); +#endif sqrt(0.0); tan(0.0); tanh(0.0); diff --git a/projects/hip/tests/src/hipEnvVar.cpp b/projects/hip/tests/src/hipEnvVar.cpp new file mode 100644 index 0000000000..6f9047776c --- /dev/null +++ b/projects/hip/tests/src/hipEnvVar.cpp @@ -0,0 +1,124 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#include +#include +#include +#include +#include +#include + +using namespace std; + +void usage() { + printf("hipEnvVar [otpions]\n\ + -c,\t\ttotal number ofavailable GPUs and their pciBusID\n\ + -d,\t\tselect one GPU and return its pciBusID\n\ + -v,\t\tsend the list to HIP_VISIBLE_DEVICES env var\n\ + -h,\t\tshow this help message\n\ + "); +} +int main(int argc, char **argv) +{ + //string str = getenv("HIP_VISIBLE_DEVICES"); + //std::cout << "The current env HIP_VISIBLE_DEVICES is"< devCount -1) { + printf("Selected device %d is out of bound. Devices on your system are in range %d - %d\n", + device, 0, devCount -1); + return -1; + } + + if (retDevCnt) { + //std::cout << "Total number of devices visible in system is "<< devCount << std::endl; + std::cout << devCount << std::endl; + } + if (retDevInfo) { + hipSetDevice(device); + hipDeviceProp_t devProp; + + hipDeviceGetProperties(&devProp, device); + if (devProp.major < 1) { + printf("%d does not support HIP\n", device); + return -1; + } + std::cout << devProp.pciBusID << std::endl; + } + exit(0); +} + diff --git a/projects/hip/tests/src/hipEnvVarDriver.cpp b/projects/hip/tests/src/hipEnvVarDriver.cpp new file mode 100644 index 0000000000..ebb1be956d --- /dev/null +++ b/projects/hip/tests/src/hipEnvVarDriver.cpp @@ -0,0 +1,103 @@ +/* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the "Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the +following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO +EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include +#include +#include +#include +#include +using namespace std; + +//./hipEnvVar -c -d 0 -h + //putenv("SomeVariable=SomeValue"); + //putenv("export HIP_VISIBLE_DEVICES=0,1,2,3"); + +int getDeviceNumber(){ + FILE *in; + char buff[512]; + string str; + if(!(in = popen("./hipEnvVar -c", "r"))){ + return 1; + } + fgets(buff, sizeof(buff), in); + pclose(in); + return atoi(buff); +} + +int getDevicePCIBusNum(int deviceID){ + FILE *in; + char buff[512]; + string str = "./hipEnvVar -d "; + str += std::to_string(deviceID); + if(!(in = popen(str.c_str(), "r"))){ + return 1; + } + fgets(buff, sizeof(buff), in); + pclose(in); + return atoi(buff); +} + +int main() { + unsetenv("HIP_VISIBLE_DEVICES"); + //collect the device pci bus ID for all devices + int totalDeviceNum = getDeviceNumber(); + std::cout << "The total number of available devices is " << totalDeviceNum<< std::endl + <<"Valid index range is 0 - "< devPCINum; + for (int i = 0; i < totalDeviceNum ; i++) { + devPCINum.push_back(getDevicePCIBusNum(i)); + std::cout <<"The collected device PCI Bus ID of Device "< 2){ + setenv("HIP_VISIBLE_DEVICES","0,1,1000,2",1); + assert(getDeviceNumber() == 2); + + setenv("HIP_VISIBLE_DEVICES","0,1,2",1); + assert(getDeviceNumber() == 3); + // test if CUDA_VISIBLE_DEVICES will be accepted by the runtime + unsetenv("HIP_VISIBLE_DEVICES"); + setenv("CUDA_VISIBLE_DEVICES","0,1,2",1); + assert(getDeviceNumber() == 3); + } + + setenv("HIP_VISIBLE_DEVICES","-100,0,1",1); + assert(getDeviceNumber() == 0); + + std::cout << "Passed!" << std::endl; + return 0; +} diff --git a/projects/hip/tests/src/hipGetDeviceAttribute.cpp b/projects/hip/tests/src/hipGetDeviceAttribute.cpp index 62b6d432a0..0073dfeed7 100644 --- a/projects/hip/tests/src/hipGetDeviceAttribute.cpp +++ b/projects/hip/tests/src/hipGetDeviceAttribute.cpp @@ -67,13 +67,19 @@ int main(int argc, char *argv[]) CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeWarpSize, props.warpSize)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxRegistersPerBlock, props.regsPerBlock)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeClockRate, props.clockRate)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryClockRate, props.memoryClockRate)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryBusWidth, props.memoryBusWidth)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMultiprocessorCount, props.multiProcessorCount)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeIsMultiGpuBoard, props.isMultiGpuBoard)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeMode, props.computeMode)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeL2CacheSize, props.l2CacheSize)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxThreadsPerMultiProcessor, props.maxThreadsPerMultiProcessor)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMajor, props.major)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMinor, props.minor)); - + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeConcurrentKernels, props.concurrentKernels)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciBusId, props.pciBusID)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciDeviceId, props.pciDeviceID)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, props.maxSharedMemoryPerMultiProcessor)); passed(); }; diff --git a/projects/hip/tests/src/hipGridLaunch.cpp b/projects/hip/tests/src/hipGridLaunch.cpp index 4502446b3b..f13781362e 100644 --- a/projects/hip/tests/src/hipGridLaunch.cpp +++ b/projects/hip/tests/src/hipGridLaunch.cpp @@ -37,7 +37,7 @@ __device__ int foo(int i) //Syntax we would like to support with GRID_LAUNCH enabled: template __global__ void -vectorADD2( grid_launch_parm lp, +vectorADD2( hipLaunchParm lp, T *A_d, T *B_d, T *C_d, diff --git a/projects/hip/tests/src/hipMemcpy.cpp b/projects/hip/tests/src/hipMemcpy.cpp index 5db2b270d6..b2bfc63fe5 100644 --- a/projects/hip/tests/src/hipMemcpy.cpp +++ b/projects/hip/tests/src/hipMemcpy.cpp @@ -23,24 +23,28 @@ THE SOFTWARE. #include "test_common.h" - -int main(int argc, char *argv[]) +void printSep() { - HipTest::parseStandardArguments(argc, argv, true); + printf ("======================================================================================\n"); +} +//--- +// Test simple H2D copies and back. +// Designed to stress a small number of simple smoke tests +void simpleTest1() +{ + printf ("test: %s\n", __func__); size_t Nbytes = N*sizeof(int); - - printf ("N=%zu \n", N); + printf ("N=%zu Nbytes=%6.2fMB\n", N, Nbytes/1024.0/1024.0); int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; - HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - + HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + printf ("A_d=%p B_d=%p C_d=%p A_h=%p B_h=%p C_h=%p\n", A_d, B_d, C_d, A_h, B_d, C_h); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); @@ -50,8 +54,223 @@ int main(int argc, char *argv[]) HIPCHECK (hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, false); + HIPCHECK (hipDeviceReset()); + + printf (" %s success\n", __func__); +} + + + +//--- +// Test many different kinds of memory copies. +// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and checks the result. +// +// IN: numElements controls the number of elements used for allocations. +// IN: usePinnedHost : If true, allocate host with hipMallocHost and is pinned ; else allocate host memory with malloc. +// IN: useHostToHost : If true, add an extra host-to-host copy. +// IN: useDeviceToDevice : If true, add an extra deviceto-device copy after result is produced. +// IN: useMemkindDefault : If true, use memkinddefault (runtime figures out direction). if false, use explicit memcpy direction. +// +template +void memcpytest2(size_t numElements, bool usePinnedHost, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault) +{ + size_t sizeElements = numElements * sizeof(T); + printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d\n", + __func__, + TYPENAME(T), + sizeElements, sizeElements/1024.0/1024.0, + usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); + + + T *A_d, *B_d, *C_d; + T *A_h, *B_h, *C_h; + + + HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, numElements, usePinnedHost); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); + + T *A_hh = NULL; + T *B_hh = NULL; + T *C_dd = NULL; + + + + if (useHostToHost) { + if (usePinnedHost) { + HIPCHECK ( hipMallocHost(&A_hh, sizeElements) ); + HIPCHECK ( hipMallocHost(&B_hh, sizeElements) ); + } else { + A_hh = (T*)malloc(sizeElements); + B_hh = (T*)malloc(sizeElements); + } + + + // Do some extra host-to-host copies here to mix things up: + HIPCHECK ( hipMemcpy(A_hh, A_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); + HIPCHECK ( hipMemcpy(B_hh, B_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); + + + HIPCHECK ( hipMemcpy(A_d, A_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + } else { + HIPCHECK ( hipMemcpy(A_d, A_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + } + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements); + + if (useDeviceToDevice) { + HIPCHECK ( hipMalloc(&C_dd, sizeElements) ); + + // Do an extra device-to-device copies here to mix things up: + HIPCHECK ( hipMemcpy(C_dd, C_d, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyDeviceToDevice)); + + //Destroy the original C_d: + HIPCHECK ( hipMemset(C_d, 0x5A, sizeElements)); + + HIPCHECK ( hipMemcpy(C_h, C_dd, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); + } else { + HIPCHECK ( hipMemcpy(C_h, C_d, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); + } + + HIPCHECK ( hipDeviceSynchronize() ); + HipTest::checkVectorADD(A_h, B_h, C_h, numElements); + + HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost); + + printf (" %s success\n", __func__); +} + + +//--- +//Try all the 16 possible combinations to memcpytest2 - usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault +template +void memcpytest2_loop(size_t numElements) +{ + printSep(); + + for (int usePinnedHost =0; usePinnedHost<=1; usePinnedHost++) { + for (int useHostToHost =0; useHostToHost<=1; useHostToHost++) { // TODO + for (int useDeviceToDevice =0; useDeviceToDevice<=1; useDeviceToDevice++) { + for (int useMemkindDefault =0; useMemkindDefault<=1; useMemkindDefault++) { + memcpytest2(numElements, usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); + } + } + } + } +} + + +//--- +//Try many different sizes to memory copy. +template +void memcpytest2_sizes(size_t maxElem=0, size_t offset=0) +{ + printSep(); + printf ("test: %s<%s>\n", __func__, TYPENAME(T)); + + int deviceId; + HIPCHECK(hipGetDevice(&deviceId)); + + size_t free, total; + HIPCHECK(hipMemGetInfo(&free, &total)); + + if (maxElem == 0) { + maxElem = free/sizeof(T)/5; + } + + printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB offset=%lu\n", + deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset); + + for (size_t elem=64; elem+offset<=maxElem; elem*=2) { + HIPCHECK ( hipDeviceReset() ); + memcpytest2(elem+offset, 0, 1, 1, 0); // unpinned host + HIPCHECK ( hipDeviceReset() ); + memcpytest2(elem+offset, 1, 1, 1, 0); // pinned host + } +} + + +//--- +//Create multiple threads to stress multi-thread locking behavior in the allocation/deallocation/tracking logic: +template +void multiThread_1(bool serialize, bool usePinnedHost) +{ + printSep(); + printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, usePinnedHost); + std::thread t1 (memcpytest2,N, usePinnedHost,0,0,0); + if (serialize) { + t1.join(); + } + + + std::thread t2 (memcpytest2,N, usePinnedHost,0,0,0); + if (serialize) { + t2.join(); + } + + if (!serialize) { + t1.join(); + t2.join(); + } +} + + + +int main(int argc, char *argv[]) +{ + HipTest::parseStandardArguments(argc, argv, true); + + printf ("info: set device to %d\n", p_gpuDevice); + HIPCHECK(hipSetDevice(p_gpuDevice)); + + + if (p_tests & 0x1) { + HIPCHECK ( hipDeviceReset() ); + simpleTest1(); + } + + if (p_tests & 0x2) { + HIPCHECK ( hipDeviceReset() ); + memcpytest2_loop(N); + memcpytest2_loop(N); + memcpytest2_loop(N); + memcpytest2_loop(N); + } + + if (p_tests & 0x4) { + HIPCHECK ( hipDeviceReset() ); + printSep(); + memcpytest2_sizes(0,0); + printSep(); + memcpytest2_sizes(0,64); + printSep(); + memcpytest2_sizes(1024*1024, 13); + printSep(); + memcpytest2_sizes(1024*1024, 50); + } + + if (p_tests & 0x8) { + HIPCHECK ( hipDeviceReset() ); + printSep(); + + // Simplest cases: serialize the threads, and also used pinned memory: + // This verifies that the sub-calls to memcpytest2 are correct. + multiThread_1(true, true); + + // Serialize, but use unpinned memory to stress the unpinned memory xfer path. + multiThread_1(true, false); + + // Remove serialization, so two threads are performing memory copies in parallel. + multiThread_1(false, true); + + // Remove serialization, and use unpinned. + multiThread_1(false, false); // TODO + } + passed(); } diff --git a/projects/hip/tests/src/hipMemcpyAsync.cpp b/projects/hip/tests/src/hipMemcpyAsync.cpp new file mode 100644 index 0000000000..4b92e2fc1e --- /dev/null +++ b/projects/hip/tests/src/hipMemcpyAsync.cpp @@ -0,0 +1,349 @@ +// Test under-development. Calls async mem-copy API, experiment with functionality. + +#include "hip_runtime.h" +#include "test_common.h" + +unsigned p_streams = 2; + + +void simpleNegTest() +{ + printf ("testing: %s\n",__func__); + hipError_t e; + float *A_malloc, *A_pinned, *A_d; + + size_t Nbytes = N*sizeof(float); + A_malloc = (float*)malloc(Nbytes); + HIPCHECK(hipMallocHost(&A_pinned, Nbytes)); + HIPCHECK(hipMalloc(&A_d, Nbytes)); + + + // Can't use default with async copy + e = hipMemcpyAsync(A_pinned, A_d, Nbytes, hipMemcpyDefault, NULL); + HIPASSERT (e==hipErrorInvalidMemcpyDirection); // TODO + HIPASSERT (e!= hipSuccess); + + + // Not sure what happens here, the memory must be pinned. + e = hipMemcpyAsync(A_malloc, A_d, Nbytes, hipMemcpyHostToDevice, NULL); + + printf (" async memcpy of A_malloc to A_d. Result=%d\n", e); + //HIPASSERT (e==hipErrorInvalidValue); +} + +class Pinned; +class Unpinned; + +template struct HostTraits; + +template<> +struct HostTraits +{ + static const char *Name() { return "Pinned"; } ; + + static void *Alloc(size_t sizeBytes) { + void *p; + HIPCHECK(hipMallocHost(&p, sizeBytes)); + return p; + }; +}; + + +template +__global__ void +addK (hipLaunchParm lp, T *A, T K, size_t numElements) +{ + size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t stride = hipBlockDim_x * hipGridDim_x ; + + for (size_t i=offset; i +void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int numPongs, bool doHostSide) +{ + HIPASSERT(numElements % numInflight == 0); // Must be evenly divisible. + size_t Nbytes = numElements*sizeof(T); + size_t eachCopyElements = numElements / numInflight; + size_t eachCopyBytes = eachCopyElements * sizeof(T); + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); + + printf ("-----------------------------------------------------------------------------------------------\n"); + printf ("testing: %s<%s> Nbytes=%zu (%6.1f MB) numPongs=%d numInflight=%d eachCopyElements=%zu eachCopyBytes=%zu\n", + __func__, HostTraits::Name(), Nbytes, (double)(Nbytes)/1024.0/1024.0, numPongs, numInflight, eachCopyElements, eachCopyBytes); + + T *A_h; + T *A_d; + + A_h = (T*)(HostTraits::Alloc(Nbytes)); + HIPCHECK(hipMalloc(&A_d, Nbytes)); + + // Initialize the host array: + const T initValue = 13; + const T deviceConst = 2; + const T hostConst = 10000; + for (size_t i=0; i, dim3(blocks), dim3(threadsPerBlock), 0, stream, A_d, 2, numElements); + + for (int i=0; i (i); + } + + + //stream=0; // fixme TODO + + + for (int i=0; i= argc || !HipTest::parseUInt(argv[i], &p_streams)) { + failed("Bad streams argument"); + } + } else { + failed("Bad argument '%s'", arg); + } + }; +}; + + + + +int main(int argc, char *argv[]) +{ + HipTest::parseStandardArguments(argc, argv, true); + parseMyArguments(argc, argv); + + + printf ("info: set device to %d\n", p_gpuDevice); + HIPCHECK(hipSetDevice(p_gpuDevice)); + + if (p_tests & 0x01) { + simpleNegTest(); + } + + if (p_tests & 0x02) { + hipStream_t stream; + HIPCHECK (hipStreamCreate(&stream)); + + test_manyInflightCopies(stream, 1024, 16, true); + test_manyInflightCopies(stream, 1024, 4, true); // verify we re-use the same entries instead of growing pool. + test_manyInflightCopies(stream, 1024*8, 64, false); + + HIPCHECK(hipStreamDestroy(stream)); + } + + + if (p_tests & 0x04) { + test_chunkedAsyncExample(p_streams, true, true, true); // Easy sync version + test_chunkedAsyncExample(p_streams, false, true, true); // Easy sync version + test_chunkedAsyncExample(p_streams, false, false, true); // Some async + test_chunkedAsyncExample(p_streams, false, false, false); // All async + } + + if (p_tests & 0x08) { + hipStream_t stream; + HIPCHECK (hipStreamCreate(&stream)); + + test_pingpong(stream, 1024*1024*32, 1, 1, false); + test_pingpong(stream, 1024*1024*32, 1, 10, false); + + HIPCHECK(hipStreamDestroy(stream)); + } + + + passed(); + +} diff --git a/projects/hip/tests/src/hipMultiThreadStreams.cpp b/projects/hip/tests/src/hipMultiThreadStreams.cpp new file mode 100644 index 0000000000..a3dd94e077 --- /dev/null +++ b/projects/hip/tests/src/hipMultiThreadStreams.cpp @@ -0,0 +1,142 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "hip_runtime.h" +#include "test_common.h" + + + +void printSep() +{ + printf ("======================================================================================\n"); +} + +//--- +// Test simple H2D copies and back. +// Designed to stress a small number of simple smoke tests + +template< + typename T=float, + class P=HipTest::Unpinned, + class C=HipTest::Memcpy +> +void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream) +{ + using HipTest::MemTraits; + + std::thread::id pid = std::this_thread::get_id(); + + printf ("test: %s <%s> %s %s\n", __func__, TYPENAME(T), P::str(), C::str()); + size_t Nbytes = numElements*sizeof(T); + printf ("numElements=%zu Nbytes=%6.2fMB\n", numElements, Nbytes/1024.0/1024.0); + + T *A_d, *B_d, *C_d; + T *A_h, *B_h, *C_h; + + HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, P::isPinned); + + + for (int i=0; i::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream); + MemTraits::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream); + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements); + + MemTraits::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream); + + HIPCHECK (hipDeviceSynchronize()); + + HipTest::checkVectorADD(A_h, B_h, C_h, numElements); + } + + HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, P::isPinned); + HIPCHECK (hipDeviceSynchronize()); + + std::cout <<" pid" << pid << " success\n"; +} + +template +void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t stream1, bool serialize) +{ + printSep(); + printf ("%s\n", __func__); + std::cout << testName << std::endl; + + // Test 2 threads operating on same stream: + std::thread t1 (simpleVectorCopy, 2000000/*mb*/, 1000, stream0); + if (serialize) { + t1.join(); + } + std::thread t2 (simpleVectorCopy, 2000000/*mb*/, 1000, stream1); + if (serialize) { + t2.join(); + } + + if (!serialize) { + t1.join(); + t2.join(); + } + + HIPCHECK(hipDeviceSynchronize()); +}; + + +int main(int argc, char *argv[]) +{ + HipTest::parseStandardArguments(argc, argv, true); + + printf ("info: set device to %d\n", p_gpuDevice); + HIPCHECK(hipSetDevice(p_gpuDevice)); + + + if (p_tests & 0x1) { + HIPCHECK ( hipDeviceReset() ); + + hipStream_t stream; + HIPCHECK (hipStreamCreate(&stream)); + + simpleVectorCopy (2000000/*mb*/, 10/*iters*/, stream); + simpleVectorCopy (2000000/*mb*/, 10/*iters*/, stream); + + //HIPCHECK(hipStreamDestroy(stream)); + } + + + if (p_tests & 0x2) { + hipStream_t stream0, stream1; + HIPCHECK (hipStreamCreate(&stream0)); + HIPCHECK (hipStreamCreate(&stream1)); + + // Easy tests to verify the test works - these don't allow overlap between the threads: + test_multiThread_1 ("Multithread NULL with serialized", NULL, NULL, true); + test_multiThread_1 ("Multithread with serialized", stream0, stream1, true); + + test_multiThread_1 ("Multithread with NULL stream", NULL, NULL, false); + test_multiThread_1 ("Multithread with two streams", stream0, stream1, false); + test_multiThread_1 ("Multithread with one stream", stream0, stream0, false); + } + + passed(); + +} diff --git a/projects/hip/tests/src/hipPointerAttrib.cpp b/projects/hip/tests/src/hipPointerAttrib.cpp new file mode 100644 index 0000000000..8bed5af869 --- /dev/null +++ b/projects/hip/tests/src/hipPointerAttrib.cpp @@ -0,0 +1,524 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +// Test pointer tracking logic: allocate memory and retrieve stats with hipPointerGetAttributes + +#include "hip_runtime.h" +#include "test_common.h" + +#ifdef __HIP_PLATFORM_HCC__ +//#include "hcc_detail/AM.h" +#include "hc_am.hpp" + +#endif + +size_t Nbytes = 0; + +//================================================================================================= +// Utility Functions: +//================================================================================================= + +bool operator==(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs) +{ + return ((lhs.hostPointer == rhs.hostPointer) && + (lhs.devicePointer == rhs.devicePointer) && + (lhs.memoryType == rhs.memoryType) && + (lhs.device == rhs.device) && + (lhs.allocationFlags == rhs.allocationFlags) + ) ; + +}; + + +bool operator!=(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs) +{ + return ! (lhs == rhs); +} + + +const char *memoryTypeToString(hipMemoryType memoryType) +{ + switch (memoryType) { + case hipMemoryTypeHost : return "[Host]"; + case hipMemoryTypeDevice : return "[Device]"; + default: return "[Unknown]"; + }; +} + + +void resetAttribs(hipPointerAttribute_t *attribs) +{ + attribs->hostPointer = (void*) (-1); + attribs->devicePointer = (void*) (-1); + attribs->memoryType = hipMemoryTypeHost; + attribs->device = -2; + attribs->isManaged = -1; + attribs->allocationFlags = 0xffff; +}; + + +void printAttribs(const hipPointerAttribute_t *attribs) +{ + printf ("hostPointer:%p devicePointer:%p memoryType:%s deviceId:%d isManaged:%d allocationFlags:%u\n", + attribs->hostPointer, + attribs->devicePointer, + memoryTypeToString(attribs->memoryType), + attribs->device, + attribs->isManaged, + attribs->allocationFlags + ); +}; + + +inline int zrand(int max) +{ + return rand() % max; +} + + +//================================================================================================= +// Functins to run tests +//================================================================================================= +//-- +//Run through a couple simple cases to test lookups and host pointer arithmetic: +void testSimple() +{ + printf ("\n"); + printf ("===========================================================================\n"); + printf ("Simple Tests\n"); + printf ("===========================================================================\n"); + + char *A_d; + char *A_Pinned_h; + char *A_OSAlloc_h; + hipError_t e; + + HIPCHECK ( hipMalloc(&A_d, Nbytes) ); + HIPCHECK ( hipMallocHost(&A_Pinned_h, Nbytes) ); + A_OSAlloc_h = (char*)malloc(Nbytes); + + size_t free, total; + HIPCHECK(hipMemGetInfo(&free, &total)); + printf ("hipMemGetInfo: free=%zu (%4.2f) Nbytes=%lu total=%zu (%4.2f)\n", free, (float)(free/1024.0/1024.0), Nbytes, total, (float)(total/1024.0/1024.0)); + HIPASSERT(free + Nbytes <= total); + + + hipPointerAttribute_t attribs; + hipPointerAttribute_t attribs2; + + // Device memory + printf ("\nDevice memory (hipMalloc)\n"); + HIPCHECK( hipPointerGetAttributes(&attribs, A_d)); + printf("getAttr:%-20s", "A_d"); printAttribs(&attribs); + + // Check pointer arithmetic cases: + resetAttribs(&attribs2); + HIPCHECK( hipPointerGetAttributes(&attribs2, A_d+100)); + printf("getAttr:%-20s", "A_d+100"); printAttribs(&attribs2); + HIPASSERT((char*)attribs.devicePointer+100 == (char*)attribs2.devicePointer); + + // Corner case at end of array: + resetAttribs(&attribs2); + HIPCHECK( hipPointerGetAttributes(&attribs2, A_d+Nbytes-1)); + printf("getAttr:%-20s", "A_d+Nbytes-1"); printAttribs(&attribs2); + HIPASSERT((char*)attribs.devicePointer+Nbytes-1 == (char*)attribs2.devicePointer); + + // Pointer just beyond array - must be invalid or at least a different pointer + resetAttribs(&attribs2); + e = hipPointerGetAttributes(&attribs2, A_d+Nbytes+1); + printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e)); + if (e != hipErrorInvalidValue) { + // We might have strayed into another pointer area. + printf("getAttr:%-20s", "A_d+NBytes"); printAttribs(&attribs2); + HIPASSERT((char*)attribs.devicePointer != (char*)attribs2.devicePointer); + } + + + resetAttribs(&attribs2); + e = hipPointerGetAttributes(&attribs2, A_d+Nbytes); + if (e != hipErrorInvalidValue) { + printf("%-20s", "A_d+Nbytes"); printAttribs(&attribs2); + HIPASSERT(attribs.devicePointer != attribs2.devicePointer); + } + + hipFree(A_d); + e = hipPointerGetAttributes(&attribs, A_d); + HIPASSERT(e == hipErrorInvalidValue); // Just freed the pointer, this should return an error. + + + // Device-visible host memory + printf ("\nDevice-visible host memory (hipMallocHost)\n"); + HIPCHECK( hipPointerGetAttributes(&attribs, A_Pinned_h)); + printf("getAttr:%-20s", "A_pinned_h"); printAttribs(&attribs); + + resetAttribs(&attribs2); + HIPCHECK( hipPointerGetAttributes(&attribs2, A_Pinned_h+Nbytes/2)); + printf("getAttr:%-20s", "A_pinned_h+NBytes/2"); printAttribs(&attribs2); + HIPASSERT((char*)attribs.hostPointer+Nbytes/2 == (char*)attribs2.hostPointer); + + + hipFreeHost(A_Pinned_h); + e = hipPointerGetAttributes(&attribs, A_Pinned_h); + HIPASSERT(e == hipErrorInvalidValue); // Just freed the pointer, this should return an error. + printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e)); + + + // OS memory + printf ("\nOS-allocated memory (malloc)\n"); + e = hipPointerGetAttributes(&attribs, A_OSAlloc_h); + printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_OSAlloc_h", e, hipGetErrorString(e)); + HIPASSERT(e == hipErrorInvalidValue); // OS-allocated pointers should return hipErrorInvalidValue. +} + +//--- +//Reset the memory tracker (remove allocations from all known devices): +//This frees any memory allocated through the runtime. +//The routine will not release any +void resetTracker () +{ + if (p_verbose & 0x1) { + printf ("info: reset tracker for all devices in platform\n"); + } + + int numDevices; + HIPCHECK(hipGetDeviceCount(&numDevices)); + + // Clean up: + for (int i=0; i reference(numAllocs); + + HIPASSERT(minSize > 0); + HIPASSERT(maxSize >= minSize); + + int numDevices; + HIPCHECK(hipGetDeviceCount(&numDevices)); + + //--- + //Populate with device and host allocations. + size_t totalDeviceAllocated[numDevices]; + for (int i =0; i 1) { + checkPointer(ref, i, 2, (char *)ref._pointer + ref._sizeBytes-1); + } + + if (ref._attrib.memoryType == hipMemoryTypeDevice) { + hipFree(ref._pointer); + } else { + hipFreeHost(ref._pointer); + } + + } + +#ifdef __HIP_PLATFORM_HCC__ + if (p_verbose & 0x2) { + printf ("Tracker after cleanup:\n"); + hc::am_memtracker_print(); + } +#endif +} + + +//--- +// Multi-threaded test with many simul allocs. +// IN : serialize will force the test to run in serial fashion. +// Seems like this does not hit MT corner cases in the tracker very often - testMultiThreaded_2 below seems more effective. +void testMultiThreaded_1(bool serialize=false) +{ + printf ("\n===========================================================================\n"); + printf ("MultiThreaded_1\n"); + if (serialize) printf ("[SERIALIZE]\n"); + printf ("===========================================================================\n"); + std::thread t1(clusterAllocs, 1000, 101, 1000); + if (serialize) t1.join(); + + std::thread t2(clusterAllocs, 1000, 11, 100); + if (serialize) t2.join(); + + std::thread t3(clusterAllocs, 1000, 5, 10); + if (serialize) t3.join(); + + std::thread t4(clusterAllocs, 1000, 1, 4); + if (serialize) t4.join(); + + if (!serialize) { + t1.join(); + t2.join(); + t3.join(); + t4.join(); + } + + resetTracker(); +} + + +///================================================================================================ + +//--- +//Repeatedly query a single entry: +void thread_query(void *ptr, const hipPointerAttribute_t *refAttrib) +{ + int count = 0; + + for (int count=0; count< 1000000; count++) { + hipPointerAttribute_t a; + hipError_t e = hipPointerGetAttributes(&a, ptr); + if ((e != hipSuccess) || (a!= *refAttrib)) { + printf("Test %d (err=%d)\n", count, e); + HIPCHECK(e); + + printf(" ref :: "); printAttribs(refAttrib); + printf(" getattr:: "); printAttribs(&a); + } + } +} + + +#ifdef __HIP_PLATFORM_HCC__ +//--- +// Add pointers to tracker very quickly, then remove them quickly: +enum Dir {Up, Down}; +void thread_noise_generator(int iters, size_t numBuffers, Dir addDir, Dir removeDir) +{ + const size_t bufferSize = 16; + size_t maxSize = numBuffers*bufferSize; + HIPASSERT((maxSize % bufferSize) == 0); // loop logic assumes this is true + + + for (int i=0; i=0; p-=bufferSize) { + hc::am_memtracker_add(p, bufferSize, acc, false); + } + } + + if (removeDir == Up) { + for (char *p = basePtr; p=0; p-=bufferSize) { + hc::am_memtracker_remove(p); + } + } + } +} + + +//--- +//Multi-thread test that is effective at catching locking errors in the alloc/dealloc/tracker. +//The query thread repeately requests information on the same block of memory. +//Meanwhile, the thread_noise_generator registers a large number of blocks, and +//then unregisters them. This causes a large amount of rebalancing in the tree +//structure and will generate errors unless the locks in the tracker are preventing reading +//while writing. +void testMultiThreaded_2() +{ + std::atomic inflight(2); + + printf ("\n===========================================================================\n"); + printf ("MultiThreaded_2\n"); + printf ("===========================================================================\n"); + + hipSetDevice(0); + hipDeviceReset(); + + // Create some entries in the tracker: + for (int i=0; i<1000; i++) { + void *C_d; + HIPCHECK(hipMalloc(&C_d, 32)); + } + + + // Allocate a pointer that we will repeatedly lookup: + void *A_d; + HIPCHECK(hipMalloc(&A_d, 10000)); + hipPointerAttribute_t attrib1; + HIPCHECK(hipPointerGetAttributes(&attrib1, A_d)); + std::thread t1(thread_query, A_d, &attrib1); + + std::thread t2(thread_noise_generator, 10000, 1000, Up, Up); + + t1.join(); + t2.join(); + + hipSetDevice(0); + hipDeviceReset(); +} +#endif + + + +int main(int argc, char *argv[]) +{ + N= 1000000; + HipTest::parseStandardArguments(argc, argv, true); + + + Nbytes = N*sizeof(char); + + printf ("N=%zu (%6.2f MB) device=%d\n", N, Nbytes/(1024.0*1024.0), p_gpuDevice); + + + if (p_tests & 0x01) { + printf ("info: set device to %d\n", p_gpuDevice); + HIPCHECK(hipSetDevice(p_gpuDevice)); + testSimple(); + } + + if (p_tests & 0x02) { + srand(0x100); + printf ("\n===========================================================================\n"); + clusterAllocs(100, 1024*1, 1024*1024); + resetTracker(); + } + + if (p_tests & 0x04) { + srand(0x200); + printf ("\n===========================================================================\n"); + clusterAllocs(1000, 1, 10); // Many tiny allocations; + resetTracker(); + } + + if (p_tests & 0x08) { + srand(0x300); + testMultiThreaded_1(true); + testMultiThreaded_1(false); + } + + +#ifdef __HIP_PLATFORM_HCC__ + if (p_tests & 0x10) { + srand(0x400); + testMultiThreaded_2(); + resetTracker(); + } +#endif + + printf ("\n"); + passed(); +} diff --git a/projects/hip/tests/src/hipSimpleAtomicsTest.cpp b/projects/hip/tests/src/hipSimpleAtomicsTest.cpp index f492643e41..f0ae0f582f 100644 --- a/projects/hip/tests/src/hipSimpleAtomicsTest.cpp +++ b/projects/hip/tests/src/hipSimpleAtomicsTest.cpp @@ -216,11 +216,11 @@ __global__ void testKernel(hipLaunchParm lp,int *g_odata) // Atomic increment (modulo 17+1) //atomicInc((unsigned int *)&g_odata[5], 17); - atomicInc((unsigned int *)&g_odata[5]); + //atomicInc((unsigned int *)&g_odata[5]); // Atomic decrement // atomicDec((unsigned int *)&g_odata[6], 137); - atomicDec((unsigned int *)&g_odata[6]); + //atomicDec((unsigned int *)&g_odata[6]); // Atomic compare-and-swap atomicCAS(&g_odata[7], tid-1, tid); diff --git a/projects/hip/tests/src/hipSinglePrecisionMathDevice.cpp b/projects/hip/tests/src/hipSinglePrecisionMathDevice.cpp index 8413c37b77..acb74d3f2d 100644 --- a/projects/hip/tests/src/hipSinglePrecisionMathDevice.cpp +++ b/projects/hip/tests/src/hipSinglePrecisionMathDevice.cpp @@ -42,7 +42,9 @@ __device__ void single_precision_math_functions() copysignf(1.0f, -2.0f); cosf(0.0f); coshf(0.0f); - //cospif(0.0f); +#if __hcc_workweek__ >= 16073 + cospif(0.0f); +#endif //cyl_bessel_i0f(0.0f); //cyl_bessel_i1f(0.0f); erfcf(0.0f); @@ -101,7 +103,9 @@ __device__ void single_precision_math_functions() //rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); //fX = 1.0f; rnormf(1, &fX); roundf(0.0f); - //rsqrtf(1.0f); +#if __hcc_workweek__ >= 16073 + rsqrtf(1.0f); +#endif //scalblnf(0.0f, 1); scalbnf(0.0f, 1); signbit(1.0f); @@ -109,7 +113,9 @@ __device__ void single_precision_math_functions() //sincospif(0.0f, &fX, &fY); sinf(0.0f); sinhf(0.0f); - //sinpif(0.0f); +#if __hcc_workweek__ >= 16073 + sinpif(0.0f); +#endif sqrtf(0.0f); tanf(0.0f); tanhf(0.0f); diff --git a/projects/hip/tests/src/hipSinglePrecisionMathHost.cpp b/projects/hip/tests/src/hipSinglePrecisionMathHost.cpp index 6dd1c07f1b..c12b553e0f 100644 --- a/projects/hip/tests/src/hipSinglePrecisionMathHost.cpp +++ b/projects/hip/tests/src/hipSinglePrecisionMathHost.cpp @@ -42,7 +42,9 @@ __host__ void single_precision_math_functions() copysignf(1.0f, -2.0f); cosf(0.0f); coshf(0.0f); - //cospif(0.0f); +#if __hcc_workweek__ >= 16073 + cospif(0.0f); +#endif //cyl_bessel_i0f(0.0f); //cyl_bessel_i1f(0.0f); erfcf(0.0f); @@ -101,7 +103,9 @@ __host__ void single_precision_math_functions() //rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); //fX = 1.0f; rnormf(1, &fX); roundf(0.0f); - //rsqrtf(1.0f); +#if __hcc_workweek__ >= 16073 + rsqrtf(1.0f); +#endif ///scalblnf(0.0f, 1); scalbnf(0.0f, 1); signbit(1.0f); @@ -109,7 +113,9 @@ __host__ void single_precision_math_functions() //sincospif(0.0f, &fX, &fY); sinf(0.0f); sinhf(0.0f); - //sinpif(0.0f); +#if __hcc_workweek__ >= 16073 + sinpif(0.0f); +#endif sqrtf(0.0f); tanf(0.0f); tanhf(0.0f); diff --git a/projects/hip/tests/src/hipStream.h b/projects/hip/tests/src/hipStream.h new file mode 100644 index 0000000000..f9ec3472d0 --- /dev/null +++ b/projects/hip/tests/src/hipStream.h @@ -0,0 +1,102 @@ +#ifndef HIPSTREAM_H +#define HIPSTREAM_H +#include + +#define NUM_STREAMS 4 + +/* +* H2H - 1 +* H2D - 2 +* KER - 3 +* D2D - 4 +* D2H - 5 +*/ + +template +void H2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToHost, stream)); +} + +template +void H2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToDevice, stream)); +} + +template +void D2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToDevice, stream)); +} + +template +void D2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToHost, stream)); +} + +template +void H2H(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToHost)); +} + +template +void H2D(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToDevice)); +} + +template +void D2D(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToDevice)); +} + +template +void D2H(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToHost)); +} + +template +__global__ void Inc(hipLaunchParm lp, T *In){ +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +In[tx] = In[tx] + 1; +} + +template +void initArrays(T **Ad, T **Ah, + size_t N, bool usePinnedHost=false){ + size_t NBytes = N * sizeof(T); + if(Ad){ + HIPCHECK( hipMalloc(Ad, NBytes)); + } + if(usePinnedHost){ + HIPCHECK( hipMallocHost(Ah, NBytes)); + } + else{ + *Ah = new T[N]; + HIPASSERT(*Ah != NULL); + } +} + +template +void initArrays(T **Ad, size_t N, + bool deviceMemory = false, + bool usePinnedHost = false){ + size_t NBytes = N * sizeof(T); + if(deviceMemory){ + HIPCHECK( hipMalloc(Ad, NBytes)); + }else{ + if(usePinnedHost){ + HIPCHECK(hipMallocHost(Ad, NBytes)); + }else{ + *Ad = new T[N]; + HIPASSERT(*Ad != NULL); + } + } +} + +template +void setArray(T* Array, int N, T val){ +for(int i=0;i(); +test32451(); +test42351(); + +For disjoint data passed: +test24513 +test25134 +test34512 +*/ + +template +void test12345(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Ad, *Bd; + initArrays(&Ad, &Ah, N, true); + initArrays(&Bd, &Bh, N, true); + initArrays(&Ch, N, false, true); + + setArray(Ah, N, T(1)); + + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Ad, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Ah[10] + T(1)== Ch[10]); + HIPCHECK(hipStreamDestroy(stream)); +} + +template +void test13452(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad, Dh, size); + + H2HAsync(Bh, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + H2DAsync(Cd, Ch, size, stream); + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh,Cd,size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Eh[10] == Dh[10] + T(1)); + +} + +template +void test14523(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const int N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad,Dh,size); + + H2HAsync(Bh, Ah, size, stream); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + H2DAsync(Cd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh, Cd, size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Ch[10] + T(1) == Eh[10]); +} + +template +void test15234(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad, Dh, size); + + H2HAsync(Bh, Ah, size, stream); + D2HAsync(Ch, Ad, size, stream); + H2DAsync(Bd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + + D2H(Eh, Cd, size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Eh[10] == Dh[10] + T(1)); + +} + +template +void test23451(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Ad, *Bd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + + setArray(Ah, N, T(1)); + + H2DAsync(Ad, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Bh, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + HIPCHECK(hipDeviceSynchronize()); + //HIPASSERT(Ah[10] == Ch[10]); +} + +template +void test24513(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Cd, Dh, size); + + H2DAsync(Ad, Ah, size, stream); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Bh, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh, Cd, size); + + HIPASSERT(Eh[0] == Dh[0] + T(1)); + //HIPASSERT(Ah[0] == Ch[0]); +} + +template +void test25134(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Bd, Dh, size); + + H2DAsync(Ad, Ah, size, stream); + D2HAsync(Bh, Ad, size, stream); + H2HAsync(Ch, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + + D2H(Eh, Cd, size); + + HIPCHECK(hipDeviceSynchronize()); + + //HIPASSERT(Ah[10] == Ch[10]); + HIPASSERT(Dh[10] + T(1) == Eh[10]); +} + +template +void test21345(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh, *Ch, *Dh; + T *Ad, *Bd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, true); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Bh, N, T(2)); + + H2DAsync(Ad, Ah, size, stream); + H2HAsync(Ch, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Dh, Bd, size, stream); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT( Bh[10] == Ch[10] ); + HIPASSERT( Ah[10] + T(1) == Dh[10]); +} + +template +void test34512(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Bh, *Ch, *Dh; + T *Ah, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, true); + initArrays(&Ah, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + + H2D(Ad, Ah, size); + + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Bh, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + H2DAsync(Cd, Ch, size, stream); + + D2H(Dh, Cd, size); + + HIPCHECK(hipDeviceSynchronize()); + //HIPASSERT( Ah[10] + T(1) == Dh[10] ); +} + +template +void test35124(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(1)); + + H2D(Ad, Dh, size); + + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2HAsync(Ah, Ad, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Bd, Bh, size, stream); + D2DAsync(Cd, Bd, size, stream); + + D2H(Ch, Cd, size); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Dh[10] + T(1) == Ch[10]); +} + +template +void test31245(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(1)); + setArray(Ah, N, T(2)); + + H2D(Ad, Dh, size); + + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Bd, Bh, size, stream); + D2DAsync(Cd, Bd, size, stream); + D2HAsync(Ch, Cd, size, stream); + + D2H(Eh, Ad, size); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Dh[10] + T(1) == Eh[10]); + HIPASSERT(Bh[10] == Ch[10]); +} + + +template +void test32451(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Eh, N, T(2)); + + H2D(Ad, Eh, size); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + H2DAsync(Bd, Ah, size, stream); + D2DAsync(Cd, Bd, size, stream); + D2HAsync(Bh, Cd, size, stream); + H2HAsync(Ch, Bh, size, stream); + HIPCHECK(hipDeviceSynchronize()); + D2H(Dh, Ad, size); + + //HIPASSERT(Ah[10] == Ch[10]); + HIPASSERT(Eh[10] + T(1) == Dh[10]); + +} + +template +void test45123(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(1)); + + H2D(Ad, Dh, size); + + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ah, Bd, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Cd, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + D2H(Ch, Cd, size); + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Dh[10] + T(1) == Ch[10]); +} + + +template +void test41235(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh; + T *Ch; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ch, N, T(1)); + + H2D(Ad, Ch, size); + + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ah, Bd, size, stream); + H2DAsync(Cd, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + D2HAsync(Bh, Cd, size, stream); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Ch[10] + T(1) == Bh[10]); +} + +template +void test42351(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(2)); + setArray(Ah, N, T(1)); + + H2D(Ad, Dh, size); + + D2DAsync(Bd, Ad, size, stream); + H2DAsync(Cd, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + D2HAsync(Bh, Cd, size, stream); + H2HAsync(Ch, Bh, size, stream); + + D2H(Eh, Bd, size); + + HIPCHECK(hipDeviceSynchronize()); + HIPASSERT(Dh[10] == Eh[10]); + //HIPASSERT(Ah[10] + T(1) == Ch[10]); +} + +template +void test43512(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(1)); + + H2D(Ad, Dh, size); + + D2DAsync(Bd, Ad, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2HAsync(Ah, Bd, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Cd, Bh, size, stream); + + D2H(Ch, Cd, size); + HIPCHECK(hipDeviceSynchronize()); + //HIPASSERT( Dh[10] + T(1) == Ch[10]); +} + +template +void test51234(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(1)); + + H2D(Ad, Dh, size); + + D2HAsync(Ah, Ad, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Bd, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + + D2H(Ch, Cd, size); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Ch[10] == Dh[10] + T(1)); +} + +template +void test52341(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Eh, N, T(1)); + setArray(Bh, N, T(2)); + + H2D(Ad, Eh, size); + + D2HAsync(Ah, Ad, size, stream); + H2DAsync(Bd, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + + D2H(Dh, Cd, size); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Eh[10] + T(1) == Dh[10]); + HIPASSERT(Ch[10] == Bh[10]); +} + +template +void test53412(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch, *Dh; + T *Eh, *Fh, *Gh; + T *Ad, *Bd, *Cd, *Dd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, true); + initArrays(&Eh, N, false, false); + initArrays(&Fh, N, false, false); + initArrays(&Gh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + initArrays(&Dd, N, true, false); + + setArray(Dh, N, T(1)); + setArray(Eh, N, T(2)); + setArray(Bh, N, T(3)); + + H2D(Ad, Dh, size); + H2D(Bd, Eh, size); + + D2HAsync(Ah, Ad, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + H2DAsync(Dd, Ch, size, stream); + + D2H(Fh, Cd, size); + D2H(Gh, Dd, size); + + HIPASSERT(Ah[10] == Dh[10]); + HIPASSERT(Eh[10] + T(1) == Fh[10]); + HIPASSERT(Bh[10] == Gh[10]); +} + +template +void test54123(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh, *Ch; + T *Dh, *Eh, *Fh, *Gh; + T *Ad, *Bd, *Cd, *Dd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Fh, N, false, false); + initArrays(&Gh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + initArrays(&Dd, N, true, false); + + setArray(Dh, N, T(1)); + setArray(Eh, N, T(1)); + setArray(Bh, N, T(1)); + + H2D(Ad, Dh, size); + H2D(Bd, Eh, size); + + D2HAsync(Ah, Ad, size, stream); + D2DAsync(Cd, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + H2DAsync(Dd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Dd); + + D2H(Fh, Cd, size); + D2H(Gh, Dd, size); + + HIPCHECK(hipDeviceSynchronize()); + HIPASSERT(Dh[10] == Ah[10]); + HIPASSERT(Eh[10] == Fh[10]); + HIPASSERT(Bh[10] + T(1) == Gh[10]); +} + +int main(int argc, char *argv[]) +{ + HipTest::parseStandardArguments(argc, argv, true); + + test12345(); + test13452(); + test14523(); + test15234(); + + test23451(); + test24513(); + test25134(); + test21345(); + + test34512(); + test35124(); + test31245(); + test32451(); + + test45123(); + test41235(); + test42351(); + test43512(); + + test51234(); + test52341(); + test53412(); + test54123(); + + passed(); + +} + diff --git a/projects/hip/tests/src/hip_anyall.cpp b/projects/hip/tests/src/hip_anyall.cpp index e126541766..52a2a13db9 100644 --- a/projects/hip/tests/src/hip_anyall.cpp +++ b/projects/hip/tests/src/hip_anyall.cpp @@ -28,23 +28,31 @@ THE SOFTWARE. #define HIP_ASSERT(x) (assert((x)==hipSuccess)) __global__ void - warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block) + warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block, int pshift) { int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; - device_any[hipThreadIdx_x>>6] = __any(tid -77); - device_all[hipThreadIdx_x>>6] = __all(tid -77); + device_any[hipThreadIdx_x>>pshift] = __any(tid -77); + device_all[hipThreadIdx_x>>pshift] = __all(tid -77); } int main(int argc, char *argv[]) -{ - +{ int warpSize, pshift; + hipDeviceProp_t devProp; + hipDeviceGetProperties(&devProp, 0); + if(strncmp(devProp.name,"Fiji",1)==0) +{ warpSize =64; + pshift =6; +} + else {warpSize =32; pshift=5;} + int anycount =0; + int allcount =0; int Num_Threads_per_Block = 1024; int Num_Blocks_per_Grid = 1; - int Num_Warps_per_Block = Num_Threads_per_Block/64; - int Num_Warps_per_Grid = (Num_Threads_per_Block*Num_Blocks_per_Grid)/64; + int Num_Warps_per_Block = Num_Threads_per_Block/warpSize; + int Num_Warps_per_Grid = (Num_Threads_per_Block*Num_Blocks_per_Grid)/warpSize; int * host_any = ( int*)malloc(Num_Warps_per_Grid*sizeof(int)); int * host_all = ( int*)malloc(Num_Warps_per_Grid*sizeof(int)); @@ -60,7 +68,7 @@ for (int i=0; i> 6; - atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245))); + const unsigned int warp_num = hipThreadIdx_x >> pshift; + if (pshift ==6) {atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245)));} + else {atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popc(__ballot(tid - 245)));} } int main(int argc, char *argv[]) -{ - +{ int warpSize, pshift; + hipDeviceProp_t devProp; + hipDeviceGetProperties(&devProp, 0); + + if(strncmp(devProp.name,"Fiji",1)==0) + {warpSize = 64; pshift =6;} + else {warpSize =32; pshift =5;} + unsigned int Num_Threads_per_Block = 512; unsigned int Num_Blocks_per_Grid = 1; - unsigned int Num_Warps_per_Block = Num_Threads_per_Block/64; - unsigned int Num_Warps_per_Grid = (Num_Threads_per_Block*Num_Blocks_per_Grid)/64; + unsigned int Num_Warps_per_Block = Num_Threads_per_Block/warpSize; + unsigned int Num_Warps_per_Grid = (Num_Threads_per_Block*Num_Blocks_per_Grid)/warpSize; unsigned int* host_ballot = (unsigned int*)malloc(Num_Warps_per_Grid*sizeof(unsigned int)); unsigned int* device_ballot; HIP_ASSERT(hipMalloc((void**)&device_ballot, Num_Warps_per_Grid*sizeof(unsigned int))); - + int divergent_count =0; for (int i=0; i= argc || !HipTest::parseSize(argv[i], &N)) { failed("Bad N size argument"); } @@ -114,8 +116,16 @@ int parseStandardArguments(int argc, char *argv[], bool failOnUndefinedArg) failed("Bad gpuDevice argument"); } - } - else { + } else if (!strcmp(arg, "--verbose") || (!strcmp(arg, "-v"))) { + if (++i >= argc || !HipTest::parseUInt(argv[i], &p_verbose)) { + failed("Bad verbose argument"); + } + } else if (!strcmp(arg, "--tests") || (!strcmp(arg, "-t"))) { + if (++i >= argc || !HipTest::parseInt(argv[i], &p_tests)) { + failed("Bad tests argument"); + } + + } else { if (failOnUndefinedArg) { failed("Bad argument '%s'", arg); } else { diff --git a/projects/hip/tests/src/test_common.h b/projects/hip/tests/src/test_common.h index fee052c1ad..e37eec7e86 100644 --- a/projects/hip/tests/src/test_common.h +++ b/projects/hip/tests/src/test_common.h @@ -16,6 +16,16 @@ #define KCYN "\x1B[36m" #define KWHT "\x1B[37m" + + +#ifdef __HIP_PLATFORM_HCC +#define TYPENAME(T) typeid(T).name() +#else +#define TYPENAME(T) "?" +#endif + + + #define passed() \ printf ("%sPASSED!%s\n",KGRN, KNRM);\ exit(0); @@ -25,7 +35,7 @@ printf (__VA_ARGS__);\ printf ("\n");\ printf ("error: TEST FAILED\n%s", KNRM );\ - exit(EXIT_FAILURE); + abort(); #define HIPCHECK(error) \ @@ -53,6 +63,8 @@ extern int iterations; extern unsigned blocksPerCU; extern unsigned threadsPerBlock; extern int p_gpuDevice; +extern unsigned p_verbose; +extern int p_tests; namespace HipTest { @@ -80,13 +92,13 @@ vectorADD(hipLaunchParm lp, const T *A_d, const T *B_d, T *C_d, - size_t N) + size_t NELEM) { size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); size_t stride = hipBlockDim_x * hipGridDim_x ; - for (size_t i=offset; i void initArrays(T **A_d, T **B_d, T **C_d, T **A_h, T **B_h, T **C_h, - size_t N) + size_t N, bool usePinnedHost=false) { size_t Nbytes = N*sizeof(T); @@ -108,14 +120,32 @@ void initArrays(T **A_d, T **B_d, T **C_d, HIPCHECK ( hipMalloc(C_d, Nbytes) ); } - if (A_h) - *A_h = (T*)malloc(Nbytes); - - if (B_h) - *B_h = (T*)malloc(Nbytes); + if (usePinnedHost) { + if (A_h) { + HIPCHECK ( hipMallocHost(A_h, Nbytes) ); + } + if (B_h) { + HIPCHECK ( hipMallocHost(B_h, Nbytes) ); + } + if (C_h) { + HIPCHECK ( hipMallocHost(C_h, Nbytes) ); + } + } else { + if (A_h) { + *A_h = (T*)malloc(Nbytes); + HIPASSERT(*A_h != NULL); + } + + if (B_h) { + *B_h = (T*)malloc(Nbytes); + HIPASSERT(*B_h != NULL); + } - if (C_h) - *C_h = (T*)malloc(Nbytes); + if (C_h) { + *C_h = (T*)malloc(Nbytes); + HIPASSERT(*C_h != NULL); + } + } // Initialize the host data: @@ -128,7 +158,43 @@ void initArrays(T **A_d, T **B_d, T **C_d, } +template +void freeArrays(T *A_d, T *B_d, T *C_d, + T *A_h, T *B_h, T *C_h, bool usePinnedHost) +{ + if (A_d) { + HIPCHECK ( hipFree(A_d) ); + } + if (B_d) { + HIPCHECK ( hipFree(B_d) ); + } + if (C_d) { + HIPCHECK ( hipFree(C_d) ); + } + if (usePinnedHost) { + if (A_h) { + HIPCHECK (hipFreeHost(A_h)); + } + if (B_h) { + HIPCHECK (hipFreeHost(B_h)); + } + if (C_h) { + HIPCHECK (hipFreeHost(C_h)); + } + } else { + if (A_h) { + free (A_h); + } + if (B_h) { + free (B_h); + } + if (C_h) { + free (C_h); + } + } + +} // Assumes C_h contains vector add of A_h + B_h @@ -164,4 +230,70 @@ void checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true } + +//--- +struct Pinned { + static const bool isPinned = true; + static const char *str() { return "Pinned"; }; + + static void *Alloc(size_t sizeBytes) + { + void *p; + HIPCHECK(hipMallocHost(&p, sizeBytes)); + return p; + }; +}; + + +//--- +struct Unpinned +{ + static const bool isPinned = false; + static const char *str() { return "Unpinned"; }; + + static void *Alloc(size_t sizeBytes) + { + void *p = malloc (sizeBytes); + HIPASSERT(p); + return p; + }; +}; + + + +struct Memcpy +{ + static const char *str() { return "Memcpy"; }; +}; + +struct MemcpyAsync +{ + static const char *str() { return "MemcpyAsync"; }; +}; + + +template struct MemTraits; + + +template<> +struct MemTraits +{ + + static void Copy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) + { + HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind)); + } +}; + + +template<> +struct MemTraits +{ + + static void Copy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) + { + HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream)); + } +}; + }; // namespace HipTest diff --git a/projects/hip/util/vim/hip.vim b/projects/hip/util/vim/hip.vim index 01f3b3f2ad..b64cf53c3f 100644 --- a/projects/hip/util/vim/hip.vim +++ b/projects/hip/util/vim/hip.vim @@ -23,6 +23,7 @@ syn keyword hipKeyword hipThreadIdx_x hipThreadIdx_y hipThreadIdx_z syn keyword hipKeyword hipBlockDim_x hipBlockDim_y hipBlockDim_z syn keyword hipKeyword hipBlockIdx_x hipBlockIdx_y hipBlockIdx_z syn keyword hipKeyword hipGridIdx_x hipGridIdx_y hipGridIdx_z +syn keyword hipKeyword hipGridDim_x hipGridDim_y hipGridDim_z syn keyword hipType uint uint1 uint2 uint3 uint4 syn keyword hipType int1 int2 int3 int4 @@ -31,6 +32,7 @@ syn keyword hipType char1 char2 char3 char4 syn keyword hipType uchar1 uchar2 uchar3 uchar4 syn keyword hipType short1 short2 short3 short4 syn keyword hipType dim1 dim2 dim3 dim4 +syn keyword hipType hipLaunchParm " Atomic functions syn keyword hipFunctionName atomicAdd atomicAnd atomicCAS atomicDec atomicExch @@ -62,6 +64,7 @@ syn keyword hipFunctionName expf __expf exp logf __logf log " Runtime Data Types syn keyword hipType hipDeviceProp_t syn keyword hipType hipError_t +syn keyword hipType hipStream_t " Runtime functions syn keyword hipFunctionName hipBindTexture hipBindTextureToArray @@ -91,6 +94,7 @@ syn keyword hipFunctionName hipD3D9UnmapResources syn keyword hipFunctionName hipD3D9UnregisterResource syn keyword hipFunctionName hipDeviceGetProperties syn keyword hipFunctionName hipDeviceSynchronize +syn keyword hipFunctionName hipDeviceReset syn keyword hipFunctionName hipEventCreate syn keyword hipFunctionName hipEventDestroy syn keyword hipFunctionName hipEventElapsedTime @@ -115,8 +119,8 @@ syn keyword hipFunctionName hipGLRegisterBufferObject syn keyword hipFunctionName hipGLSetGLDevice syn keyword hipFunctionName hipGLUnmapBufferObject syn keyword hipFunctionName hipGLUnregisterBufferObject -syn keyword hipFunctionName hipLaunch syn keyword hipFunctionName hipLaunchKernel +syn keyword hipFunctionName hipLaunchParm syn keyword hipFunctionName hipMalloc syn keyword hipFunctionName hipMalloc3D syn keyword hipFunctionName hipMalloc3DArray @@ -124,6 +128,7 @@ syn keyword hipFunctionName hipMallocArray syn keyword hipFunctionName hipMallocHost syn keyword hipFunctionName hipMallocPitch syn keyword hipFunctionName hipMemcpy +syn keyword hipFunctionName hipMemcpyAsync syn keyword hipFunctionName hipMemcpy2D syn keyword hipFunctionName hipMemcpy2DArrayToArray syn keyword hipFunctionName hipMemcpy2DFromArray @@ -150,7 +155,12 @@ syn keyword hipFunctionName hipUnbindTexture " HIP Flags syn keyword hipFlags hipFilterModePoint syn keyword hipFlags hipMemcpyHostToDevice +syn keyword hipFlags hipMemcpyDeviceToDevice +syn keyword hipFlags hipMemcpyHostToHost syn keyword hipFlags hipMemcpyDeviceToHost +syn keyword hipFlags hipMemcpyHostToHost +syn keyword hipFlags hipMemcpyDeviceToDevice +syn keyword hipFlags hipMemcpyDefault syn keyword hipFlags hipReadModeElementType syn keyword hipFlags hipSuccess syn keyword hipFlags hipTextureType1D