Merge branch 'amd-develop' into amd-master

Change-Id: I7f2fba7875ed1c89dfc768f7415ed6fb0d1c6407


[ROCm/hip commit: 080eb1265b]
This commit is contained in:
Maneesh Gupta
2017-06-12 11:22:30 +05:30
melakukan c8e481bccb
13 mengubah file dengan 380 tambahan dan 41 penghapusan
+1 -1
Melihat File
@@ -134,7 +134,7 @@ The README with the procedures and tips the team used during this porting effort
* **bin**: Tools and scripts to help with hip porting
* **hipify** : Tool to convert CUDA code to portable CPP. Converts CUDA APIs and kernel builtins.
* **hipcc** : Compiler driver that can be used to replace nvcc in existing CUDA code. hipcc ill call nvcc or hcc depending on platform, and include appropriate platform-specific headers and libraries.
* **hipcc** : Compiler driver that can be used to replace nvcc in existing CUDA code. hipcc will call nvcc or hcc depending on platform, and include appropriate platform-specific headers and libraries.
* **hipconfig** : Print HIP configuration (HIP_PATH, HIP_PLATFORM, CXX config flags, etc)
* **hipexamine.sh** : Script to scan directory, find all code, and report statistics on how much can be ported with HIP (and identify likely features not yet supported)
+9
Melihat File
@@ -13,6 +13,15 @@ Upcoming:
## Revision History:
===================================================================================================
- new APIs: hipMemcpy2DAsync, hipMallocPitch, hipHostMallocCoherent, hipHostMallocNonCoherent
- added support for building hipify-clang using clang 3.9
- hipify-clang updates for CUDA 8.0 runtime+driver support
- renamed hipify to hipify-perl
- initial implementation of hipify-cmakefile
- several documentation updates & bug fixes
===================================================================================================
Release: 1.0.17102
Date: 2017.03.07
+279
Melihat File
@@ -0,0 +1,279 @@
#!/usr/bin/perl -w
##
# Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
##
#usage hipify-cmakefile [OPTIONS] INPUT_FILE
use Getopt::Long;
GetOptions(
"print-stats" => \$print_stats # print the command-line, like a header.
, "quiet-warnings" => \$quiet_warnings # don't print warnings on unknown CUDA functions.
, "no-output" => \$no_output # don't write any translated output to stdout.
, "inplace" => \$inplace # modify input file inplace, save backup in ".prehip" file.
, "n" => \$n # combination of print_stats + no-output.
);
$print_stats = 1 if $n;
$no_output = 1 if $n;
@warn_whitelist = ();
#---
#Stats tracking code:
@statNames = ( "macro", "include", "option", "other" );
#---
#Compute total of all individual counts:
sub totalStats {
my %count = %{ shift() };
my $total = 0;
foreach $key ( keys %count ) {
$total += $count{$key};
}
return $total;
}
#---
sub printStats {
my $label = shift();
my @statNames = @{ shift() };
my %counts = %{ shift() };
my $warnings = shift();
my $loc = shift();
my $total = totalStats( \%counts );
printf STDERR "%s %d CUDA->HIP refs( ", $label, $total;
foreach $stat (@statNames) {
printf STDERR "%s:%d ", $stat, $counts{$stat};
}
printf STDERR ") warn:%d LOC:%d", $warnings, $loc;
}
#---
# Add adder stats to dest. Used to add stats for current file to a running total for all files:
sub addStats {
my $dest_ref = shift();
my %adder = %{ shift() };
foreach $key ( keys %adder ) {
$dest_ref->{$key} += $adder{$key};
}
}
#---
sub clearStats {
my $dest_ref = shift();
my @statNames = @{ shift() };
foreach $stat (@statNames) {
$dest_ref->{$stat} = 0;
}
}
# count of transforms in all files:
my %tt;
clearStats( \%tt, \@statNames );
my $fileCount = @ARGV;
my $fileName = "";
while (@ARGV) {
$fileName = shift(@ARGV);
if ($inplace) {
my $file_prehip = "$fileName" . ".prehip";
my $infile;
my $outfile;
if ( -e $file_prehip ) {
$infile = $file_prehip;
$outfile = $fileName;
}
else {
system("cp $fileName $file_prehip");
$infile = $file_prehip;
$outfile = $fileName;
}
open( INFILE, "<", $infile ) or die "error: could not open $infile";
open( OUTFILE, ">", $outfile ) or die "error: could not open $outfile";
$OUTFILE = OUTFILE;
}
else {
open( INFILE, "<", $fileName ) or die "error: could not open $fileName";
$OUTFILE = STDOUT;
}
# count of transforms in this file, init to 0 here:
my %ft;
clearStats( \%ft, \@statNames );
my $lineCount = 0;
undef $/; # Read whole file at once, so we can match newlines.
while (<INFILE>) {
# Replace find_package(CUDA) with find_package(HIP)
$ft{'include'} += s/\bfind_package[ ]*\([ ]*CUDA[ ]*[0-9.]*/find_package(HIP/ig;
# Replace macros
$ft{'macro'} += s/\bCUDA_ADD_EXECUTABLE/HIP_ADD_EXECUTABLE/ig;
$ft{'macro'} += s/\bCUDA_ADD_LIBRARY/HIP_ADD_LIBRARY/ig;
$ft{'macro'} += s/\bCUDA_INCLUDE_DIRECTORIES/HIP_INCLUDE_DIRECTORIES/ig;
# Replace options
$ft{'option'} += s/\bCUDA_NVCC_FLAGS/HIP_NVCC_FLAGS/ig;
$ft{'option'} += s/\bCUDA_HOST_COMPILATION_CPP/HIP_HOST_COMPILATION_CPP/ig;
$ft{'option'} += s/\bCUDA_SOURCE_PROPERTY_FORMAT/HIP_SOURCE_PROPERTY_FORMAT/ig;
# Replace variables
$ft{'other'} += s/\bCUDA_FOUND/HIP_FOUND/ig;
$ft{'other'} += s/\bCUDA_VERSION/HIP_VERSION/ig;
$ft{'other'} += s/\bCUDA_TOOLKIT_ROOT_DIR/HIP_ROOT_DIR/ig;
unless ($quiet_warnings) {
#print STDERR "Check WARNINGs\n";
# copy into array of lines, process line-by-line to show warnings:
my @lines = split /\n/, $_;
my $tmp = $_; # copies the whole file, could be a little smarter here...
my $line_num = 0;
foreach (@lines) {
$line_num++;
# remove any whitelisted words:
foreach $w (@warn_whitelist) {
s/\b$w\b/ZAP/;
}
$s = warnUnsupportedSpecialFunctions($line_num);
$warnings += $s;
}
$_ = $tmp;
}
#--------
# Print it!
unless ($no_output) {
print $OUTFILE "$_";
}
$lineCount = $_ =~ tr/\n//;
}
my $totalConverted = totalStats( \%ft );
if ( ( $totalConverted + $warnings ) and $print_stats ) {
printStats( "info: converted", \@statNames, \%ft, $warnings, $lineCount );
print STDERR " in '$fileName'\n";
print STDERR "You may need to hand-edit '$fileName' to add steps to build correctly on HCC path\n";
}
# Update totals for all files:
addStats( \%tt, \%ft );
$Twarnings += $warnings;
$TlineCount += $lineCount;
}
#-- Print total stats for all files processed:
if ( $print_stats and ( $fileCount > 1 ) ) {
print STDERR "\n";
printStats( "info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount );
print STDERR "\n";
}
#---
sub warnUnsupportedSpecialFunctions {
my $line_num = shift;
my $m = 0;
foreach $func (
# macros:
"CUDA_ADD_CUFFT_TO_TARGET",
"CUDA_ADD_CUBLAS_TO_TARGET",
#"CUDA_ADD_EXECUTABLE",
#"CUDA_ADD_LIBRARY",
"CUDA_BUILD_CLEAN_TARGET",
"CUDA_COMPILE",
"CUDA_COMPILE_PTX",
"CUDA_COMPILE_FATBIN",
"CUDA_COMPILE_CUBIN",
"CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME",
#"CUDA_INCLUDE_DIRECTORIES",
"CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS",
"CUDA_SELECT_NVCC_ARCH_FLAGS",
"CUDA_WRAP_SRCS",
# options:
"CUDA_64_BIT_DEVICE_CODE",
"CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE",
"CUDA_BUILD_CUBIN",
"CUDA_BUILD_EMULATION",
"CUDA_LINK_LIBRARIES_KEYWORD",
"CUDA_GENERATED_OUTPUT_DIR",
#"CUDA_HOST_COMPILATION_CPP",
"CUDA_HOST_COMPILER",
#"CUDA_NVCC_FLAGS",
#"CUDA_NVCC_FLAGS_<CONFIG>",
"CUDA_PROPAGATE_HOST_FLAGS",
"CUDA_SEPARABLE_COMPILATION",
#"CUDA_SOURCE_PROPERTY_FORMAT",
"CUDA_USE_STATIC_CUDA_RUNTIME",
"CUDA_VERBOSE_BUILD",
# others:
#"CUDA_VERSION_MAJOR",
#"CUDA_VERSION_MINOR",
#"CUDA_VERSION",
#"CUDA_VERSION_STRING",
"CUDA_HAS_FP16",
#"CUDA_TOOLKIT_ROOT_DIR",
"CUDA_SDK_ROOT_DIR",
"CUDA_INCLUDE_DIRS",
"CUDA_LIBRARIES",
"CUDA_CUFFT_LIBRARIES",
"CUDA_CUBLAS_LIBRARIES",
"CUDA_cudart_static_LIBRARY",
"CUDA_cudadevrt_LIBRARY",
"CUDA_cupti_LIBRARY",
"CUDA_curand_LIBRARY",
"CUDA_cusolver_LIBRARY",
"CUDA_cusparse_LIBRARY",
"CUDA_npp_LIBRARY",
"CUDA_nppc_LIBRARY",
"CUDA_nppi_LIBRARY",
"CUDA_npps_LIBRARY",
"CUDA_nvcuvenc_LIBRARY",
"CUDA_nvcuvid_LIBRARY"
)
{
my $mt = m/\b($func)/g;
if ($mt) {
$m += $mt;
print STDERR " warning: $fileName:#$line_num : unsupported macro/option : $_\n";
}
}
return $m;
}
@@ -24,7 +24,7 @@ THE SOFTWARE.
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H
#include "hip/hcc_detail/hip_vector_types.h"
#if ( __clang_major__ > 3)
typedef __fp16 __half;
typedef __fp16 __half1 __attribute__((ext_vector_type(1)));
typedef __fp16 __half2 __attribute__((ext_vector_type(2)));
@@ -454,6 +454,6 @@ __device__ static inline __half2 h2trunc(const __half2 h) {
a.xy = __hip_hc_ir_h2trunc_int(h.xy);
return a;
}
#endif //clang_major > 3
#endif
@@ -305,7 +305,7 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask
__host__ __device__ int min(int arg1, int arg2);
__host__ __device__ int max(int arg1, int arg2);
__device__ ADDRESS_SPACE_3 void* __get_dynamicgroupbaseptr();
__device__ void* __get_dynamicgroupbaseptr();
/**
@@ -464,10 +464,10 @@ do {\
// Macro to replace extern __shared__ declarations
// to local variable definitions
#define HIP_DYNAMIC_SHARED(type, var) \
ADDRESS_SPACE_3 type* var = \
(ADDRESS_SPACE_3 type*)__get_dynamicgroupbaseptr(); \
type* var = \
(type*)__get_dynamicgroupbaseptr(); \
#define HIP_DYNAMIC_SHARED_ATTRIBUTE ADDRESS_SPACE_3
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
+2 -2
Melihat File
@@ -42,9 +42,9 @@ set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst")
set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm")
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
if(@COMPILE_HIP_ATP_MARKER@)
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, rocm-profiler")
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, rocm-profiler, libstdc++-static")
else()
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@")
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, libstdc++-static")
endif()
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt")
set(CPACK_SOURCE_GENERATOR "TGZ")
+4 -2
Melihat File
@@ -1101,11 +1101,13 @@ __host__ __device__ int max(int arg1, int arg2)
return (int)(hc::precise_math::fmax((float)arg1, (float)arg2));
}
__device__ ADDRESS_SPACE_3 void* __get_dynamicgroupbaseptr()
{
__device__ void* __get_dynamicgroupbaseptr() {
return hc::get_dynamic_group_segment_base_pointer();
}
__host__ void* __get_dynamicgroupbaseptr() {
return nullptr;
}
// Precise Math Functions
__device__ float __hip_precise_cosf(float x) {
+1 -1
Melihat File
@@ -98,7 +98,7 @@ int HIP_SYNC_NULL_STREAM = 0;
// HIP needs to change some behavior based on HCC_OPT_FLUSH :
// TODO - set this to 1
int HCC_OPT_FLUSH = 0;
int HCC_OPT_FLUSH = 1;
+3 -2
Melihat File
@@ -243,6 +243,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
}
hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
{
HIP_INIT_SPECIAL_API((TRACE_MEM), ptr, sizeBytes, flags);
@@ -289,10 +290,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
if (flags & hipHostMallocCoherent) {
amFlags = amHostCoherent;
} else if (flags & hipHostMallocNonCoherent) {
amFlags = amHostPinned;
amFlags = amHostNonCoherent;
} else {
// depends on env variables:
amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned;
amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostNonCoherent;
}
+64 -22
Melihat File
@@ -1,39 +1,78 @@
# HIP testing environment.
This document explains how to use the HIP CMAKE testing environment.
This document explains how to use the HIP CMAKE testing environment.
We make use of the HIT Integrated Tester (HIT) framework to automatically find and add test cases to the CMAKE testing environment.
### Quickstart
Usage :
HIP unit tests are integrated into the top-level cmake project. The tests depend upon the installed version of HIP.
Typical usage (paths relative to top of the HIP repo):
```
$ mkdir build
$ cd build
$ cmake ../src
$ cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/install
$ make
$ make install
$ make build_tests
$ make test
```
### How to add a new test
The tests/src/runtimeApi/memory/hipMemtest.cpp file contains a simple unit test and is a good starting point for other tests.
Copy this to a new test name and modify tests/src/CMakefiles.txt to add the test to the build environment.
Recent versions of the test infrastructure use a hierarchy of folders. Each folder contains src and CMakefiles.txt file.
See the CMakefiles.txt files for description of the intended purpose for each sub-directory.
The test infrastructure use a hierarchy of folders. So add the new test to the appropriate folder.
The tests/src/runtimeApi/memory/hipMemset.cpp file contains a simple unit test and is a good starting point for other tests.
Copy this to a new test name and modify it.
#### Edit CMakefiles.txt:
// Example:
### HIP Integrated Tester (HIT)
The HIT framework sutomatically finds and adds test cases to the CMAKE testing environment. It achives this by parsing all files in the tests/src folder.
The parser looks for a code block similar to the one below.
```
# Build the test executable:
build_hip_executable (hipMemset hipMemset.cpp)
# This runs the tests with the specified command-line testing.
# Multiple make_test may be specified.
make_test(hipMemset " ")
/* HIT_START
* BUILD: %t %s ../../test_common.cpp
* RUN: %t
* //Small copy
* RUN: %t -N 10 --memsetval 0x42
* // Oddball size
* RUN: %t -N 10013 --memsetval 0x5a
* // Big copy
* RUN: %t -N 256M --memsetval 0xa6
* HIT_END
*/
```
In the above, BUILD commands provide instructions on how to build the test case while RUN commands provide instructions on how to execute the test case.
It is recommended to place the build and run steps adjacent in the CMakefiles.txt.
#### BUILD command
The supported syntax for the BUILD command is:
```
BUILD: %t %s HIPCC_OPTIONS <hipcc_specific_options> HCC_OPTIONS <hcc_specific_options> NVCC_OPTIONS <nvcc_specific_options> EXCLUDE_HIP_PLATFORM <hcc|nvcc|all>
```
%s: refers to current source file name. Additional source files needed for the test can be specified by name (including relative path).
%t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified.
HIPCC_OPTIONS: All options specified after this delimiter are passed to hipcc on both HCC and NVCC platforms.
HCC_OPTIONS: All options specified after this delimiter are passed to hipcc on HCC platform only.
NVCC_OPTIONS: All options specified after this delimiter are passed to hipcc on NVCC platform only.
EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from HCC, NVCC or both platforms.
#### RUN command
The supported syntax for the RUN command is:
```
RUN: %t <arguments_to_test_executable> EXCLUDE_HIP_PLATFORM <hcc|nvcc|all>
```
%t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified.
EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from HCC, NVCC or both platforms. Note that if the test has been excluded for a specific platform in the BUILD command, it is automatically excluded from the RUN command as well for the same platform.
#### RUN_NAMED command
When using the RUN command, HIT will squash and append the arguments specified to the test executable name to generate the CMAKE test name. Sometimes we might want to specify a more descriptive name. The RUN_NAMED command is used for that. The supported syntax for the RUN_NAMED command is:
```
RUN: %t CMAKE_TEST_NAME <arguments_to_test_executable> EXCLUDE_HIP_PLATFORM <hcc|nvcc|all>
```
### Running tests:
@@ -43,11 +82,14 @@ ctest
### Run subsets of all tests:
```
# Run one test on the commandline (obtain commandline parms from CMakefiles.tst)
./hipMemset
# Run one test on the commandline
./directed_tests/runtime/memory/hipMemset
# Run all the memory tests:
# Run all the hipMemcpy tests:
ctest -R Memcpy
# Run all tests in a specific folder:
ctest -R memory
```
@@ -55,7 +97,7 @@ ctest -R Memcpy
Find the test and commandline that fail:
(From the test build directory, perhaps hip/tests/build)
(From the build directory, perhaps hip/build)
grep -IR hipMemcpy-modes -IR ../tests/
../tests/src/runtimeApi/memory/hipMemcpy.cpp: * RUN_NAMED: %t hipMemcpy-modes --tests 0x1
@@ -4,7 +4,7 @@
#include "../test_common.h"
#define LEN 1030
#define LEN 1024
#define SIZE LEN << 2
/* HIT_START
@@ -17,13 +17,13 @@
__global__ void cpy(hipLaunchParm lp, uint32_t *Out, uint32_t *In)
{
int tx = hipThreadIdx_x;
memcpy(Out + tx, In + tx, SIZE/LEN);
memcpy(Out + tx, In + tx, sizeof(uint32_t));
}
__global__ void set(hipLaunchParm lp, uint32_t *ptr, uint8_t val, size_t size)
{
int tx = hipThreadIdx_x;
memset(ptr + tx, val, size);
memset(ptr + tx, val, (sizeof(uint32_t)*(size/LEN)));
}
int main()
@@ -304,7 +304,7 @@ void memcpytest2_sizes(size_t maxElem=0)
HIPCHECK(hipMemGetInfo(&free, &total));
if (maxElem == 0) {
maxElem = free/sizeof(T)/5;
maxElem = free/sizeof(T)/8;
}
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
@@ -156,7 +156,13 @@ int main(int argc, char *argv[])
int dev0 = 0;
int dev1 = 1;
// TODO - only works on multi-GPU system:
int numDevices;
HIPCHECK(hipGetDeviceCount(&numDevices));
if (numDevices == 1) {
printf("warning : test requires atleast two gpus\n");
passed();
}
if (enablePeers(dev0,dev1) == -1) {
printf ("warning : could not find peer gpus\n");
return -1;