From afe45964ae5944693697f60b28bef1b28fad2e57 Mon Sep 17 00:00:00 2001 From: Sam Kolton Date: Thu, 4 Feb 2016 17:39:07 +0300 Subject: [PATCH 1/8] Implementation of hipDeviceGetAttribute() --- hipamd/bin/hipify | 22 ++++++ hipamd/include/hcc_detail/hip_runtime_api.h | 7 ++ hipamd/include/hip_runtime_api.h | 26 ++++++- hipamd/include/nvcc_detail/hip_runtime_api.h | 51 +++++++++++++ hipamd/src/hip_hcc.cpp | 58 +++++++++++++- hipamd/tests/src/CMakeLists.txt | 1 + hipamd/tests/src/hipInfo.cpp | 80 ++++++++++++++++++++ 7 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 hipamd/tests/src/hipInfo.cpp diff --git a/hipamd/bin/hipify b/hipamd/bin/hipify index e5f17b69d0..f4de89aab2 100755 --- a/hipamd/bin/hipify +++ b/hipamd/bin/hipify @@ -345,6 +345,28 @@ while (@ARGV) { $ft{'dev'} += s/\bcudaDeviceProp\b/hipDeviceProp_t/g; $ft{'dev'} += s/\bcudaGetDeviceProperties\b/hipDeviceGetProperties/g; + # Attribute + $ft{'err'} += s/\bcudaDevAttrMaxThreadsPerBlock\b/hipDeviceAttributeMaxThreadsPerBlock/g; + $ft{'err'} += s/\bcudaDevAttrMaxBlockDimX\b/hipDeviceAttributeMaxBlockDimX/g; + $ft{'err'} += s/\bcudaDevAttrMaxBlockDimY\b/hipDeviceAttributeMaxBlockDimY/g; + $ft{'err'} += s/\bcudaDevAttrMaxBlockDimZ\b/hipDeviceAttributeMaxBlockDimZ/g; + $ft{'err'} += s/\bcudaDevAttrMaxGridDimX\b/hipDeviceAttributeMaxGridDimX/g; + $ft{'err'} += s/\bcudaDevAttrMaxGridDimY\b/hipDeviceAttributeMaxGridDimY/g; + $ft{'err'} += s/\bcudaDevAttrMaxGridDimZ\b/hipDeviceAttributeMaxGridDimZ/g; + $ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerBlock\b/hipDeviceAttributeMaxSharedMemoryPerBlock/g; + $ft{'err'} += s/\bcudaDevAttrTotalConstantMemory\b/hipDeviceAttributeTotalConstantMemory/g; + $ft{'err'} += s/\bcudaDevAttrWarpSize\b/hipDeviceAttributeWarpSize/g; + $ft{'err'} += s/\bcudaDevAttrMaxRegistersPerBlock\b/hipDeviceAttributeMaxRegistersPerBlock/g; + $ft{'err'} += s/\bcudaDevAttrClockRate\b/hipDeviceAttributeClockRate/g; + $ft{'err'} += s/\bcudaDevAttrMultiProcessorCount\b/hipDeviceAttributeMultiprocessorCount/g; + $ft{'err'} += s/\bcudaDevAttrComputeMode\b/hipDeviceAttributeComputeMode/g; + $ft{'err'} += s/\bcudaDevAttrL2CacheSize\b/hipDeviceAttributeL2CacheSize/g; + $ft{'err'} += s/\bcudaDevAttrMaxThreadsPerMultiProcessor\b/hipDeviceAttributeMaxThreadsPerMultiProcessor/g; + $ft{'err'} += s/\bcudaDevAttrComputeCapabilityMajor\b/hipDeviceAttributeComputeCapabilityMajor/g; + $ft{'err'} += s/\bcudaDevAttrComputeCapabilityMinor\b/hipDeviceAttributeComputeCapabilityMinor/g; + $ft{'dev'} += s/\bcudaDeviceAttr\b/hipDeviceAttribute_t/g; + $ft{'dev'} += s/\bcudaDeviceGetAttribute\b/hipDeviceGetAttribute/g; + # Cache config $ft{'dev'} += s/\bcudaDeviceSetCacheConfig\b/hipDeviceSetCacheConfig/g; $ft{'dev'} += s/\bcudaThreadSetCacheConfig\b/hipDeviceSetCacheConfig/g; # translate deprecated diff --git a/hipamd/include/hcc_detail/hip_runtime_api.h b/hipamd/include/hcc_detail/hip_runtime_api.h index 63238ff354..5f5d0b6e47 100644 --- a/hipamd/include/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hcc_detail/hip_runtime_api.h @@ -232,6 +232,13 @@ hipError_t hipGetDevice(int *device); */ hipError_t hipGetDeviceCount(int *count); +/** + * @brief Query device attribute. + * @param [out] pi pointer to value to return + * @param [in] attr attribute to query + * @param [in] device which device to query for information + */ +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device); /** * @brief Returns device properties. diff --git a/hipamd/include/hip_runtime_api.h b/hipamd/include/hip_runtime_api.h index bd8ccfff23..c6fd2b7113 100644 --- a/hipamd/include/hip_runtime_api.h +++ b/hipamd/include/hip_runtime_api.h @@ -125,7 +125,31 @@ typedef enum hipError_t { ,hipErrorTbd ///< Marker that more error codes are needed. } hipError_t; - +/* + * @brief hipDeviceAttribute_t + * @enum + * @ingroup Enumerations + */ +typedef enum hipDeviceAttribute_t { + hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. + hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block. + hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block. + hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block. + hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid. + hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid. + hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid. + hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes. + hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. + hipDeviceAttributeWarpSize, ///< Warp size in threads. + hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. + hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. + hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. + hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. + hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. + hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. + hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. + hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. +} hipDeviceAttribute_t; /** * @} diff --git a/hipamd/include/nvcc_detail/hip_runtime_api.h b/hipamd/include/nvcc_detail/hip_runtime_api.h index 5c15c6259e..62cdc582bc 100644 --- a/hipamd/include/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/nvcc_detail/hip_runtime_api.h @@ -211,6 +211,57 @@ inline static hipError_t hipDeviceGetProperties(hipDeviceProp_t *p_prop, int dev return hipCUDAErrorTohipError(cerror); } +inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) +{ + cudaDeviceAttribute cdattr; + cudaError_t cerror; + + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + cdattr = cudaDevAttrMaxThreadsPerBlock; break; + case hipDeviceAttributeMaxBlockDimX: + cdattr = cudaDevAttrMaxBlockDimX; break; + case hipDeviceAttributeMaxBlockDimY: + cdattr = cudaDevAttrMaxBlockDimY; break; + case hipDeviceAttributeMaxBlockDimZ: + cdattr = cudaDevAttrMaxBlockDimZ; break; + case hipDeviceAttributeMaxGridDimX: + cdattr = cudaDevAttrMaxGridDimX; break; + case hipDeviceAttributeMaxGridDimY: + cdattr = cudaDevAttrMaxGridDimY; break; + case hipDeviceAttributeMaxGridDimZ: + cdattr = cudaDevAttrMaxGridDimZ; break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + cdattr = cudaDevAttrMaxSharedMemoryPerBlock; break; + case hipDeviceAttributeTotalConstantMemory: + cdattr = cudaDevAttrTotalConstantMemory; break; + case hipDeviceAttributeWarpSize: + cdattr = cudaDevAttrWarpSize; break; + case hipDeviceAttributeMaxRegistersPerBlock: + cdattr = cudaDevAttrMaxRegistersPerBlock; break; + case hipDeviceAttributeClockRate: + cdattr = cudaDevAttrClockRate; break; + case hipDeviceAttributeMultiprocessorCount: + cdattr = cudaDevAttrMultiProcessorCount; break; + case hipDeviceAttributeComputeMode: + cdattr = cudaDevAttrComputeMode; break; + case hipDeviceAttributeL2CacheSize: + cdattr = cudaDevAttrL2CacheSize; break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; break; + case hipDeviceAttributeComputeCapabilityMajor: + cdattr = cudaDevAttrComputeCapabilityMajor; break; + case hipDeviceAttributeComputeCapabilityMinor: + cdattr = cudaDevAttrComputeCapabilityMinor; break; + default: + e = hipErrorInvalidValue; break; + } + + cerror = cudaDeviceGetAttribute(pi, cdattr, device); + + return hipCUDAErrorTohipError(cerror); +} + inline static hipError_t hipMemGetInfo( size_t* free, size_t* total) { return hipCUDAErrorTohipError(cudaMemGetInfo(free,total)); diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index dbcba3bee4..5cb021b7e7 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -789,7 +789,63 @@ hipError_t hipDeviceReset(void) return ihipLogStatus(hipSuccess); } +/** + * + */ +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) +{ + std::call_once(hip_initialized, ihipInit); + hipError_t e = hipSuccess; + + ihipDevice_t * hipDevice = ihipGetDevice(device); + hipDeviceProp_t *prop = &hipDevice->_props; + if (hipDevice) { + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + *pi = prop->maxThreadsPerBlock; break; + case hipDeviceAttributeMaxBlockDimX: + *pi = prop->maxThreadsDim[0]; break; + case hipDeviceAttributeMaxBlockDimY: + *pi = prop->maxThreadsDim[1]; break; + case hipDeviceAttributeMaxBlockDimZ: + *pi = prop->maxThreadsDim[2]; break; + case hipDeviceAttributeMaxGridDimX: + *pi = prop->maxGridSize[0]; break; + case hipDeviceAttributeMaxGridDimY: + *pi = prop->maxGridSize[1]; break; + case hipDeviceAttributeMaxGridDimZ: + *pi = prop->maxGridSize[2]; break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + *pi = prop->sharedMemPerBlock; break; + case hipDeviceAttributeTotalConstantMemory: + *pi = prop->totalConstMem; break; + case hipDeviceAttributeWarpSize: + *pi = prop->warpSize; break; + case hipDeviceAttributeMaxRegistersPerBlock: + *pi = prop->regsPerBlock; break; + case hipDeviceAttributeClockRate: + *pi = prop->clockRate; break; + case hipDeviceAttributeMultiprocessorCount: + *pi = prop->multiProcessorCount; break; + case hipDeviceAttributeComputeMode: + *pi = prop->computeMode; break; + case hipDeviceAttributeL2CacheSize: + *pi = prop->l2CacheSize; break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + *pi = prop->maxThreadsPerMultiProcessor; break; + case hipDeviceAttributeComputeCapabilityMajor: + *pi = prop->major; break; + case hipDeviceAttributeComputeCapabilityMinor: + *pi = prop->minor; break; + default: + e = hipErrorInvalidValue; break; + } + } else { + e = hipErrorInvalidDevice; + } + return ihipLogStatus(e); +} /** @@ -1367,8 +1423,8 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind } else { e = hipErrorInvalidResourceHandle; } - + #else // TODO-hsart - what synchronization does hsa_copy provide? hc::am_copy(dst, src, sizeBytes); diff --git a/hipamd/tests/src/CMakeLists.txt b/hipamd/tests/src/CMakeLists.txt index 596d172fe3..a2b7056403 100644 --- a/hipamd/tests/src/CMakeLists.txt +++ b/hipamd/tests/src/CMakeLists.txt @@ -120,6 +120,7 @@ make_hip_executable (hip_popc hip_popc.cpp) make_hip_executable (hip_clz hip_clz.cpp) make_hip_executable (hip_brev hip_brev.cpp) make_hip_executable (hip_ffs hip_ffs.cpp) +make_hip_executable (hipInfo hipInfo.cpp) make_hip_executable (hipMemset hipMemset.cpp) make_hip_executable (hipMemcpy hipMemcpy.cpp) make_hip_executable (hipEventRecord hipEventRecord.cpp) diff --git a/hipamd/tests/src/hipInfo.cpp b/hipamd/tests/src/hipInfo.cpp new file mode 100644 index 0000000000..62b6d432a0 --- /dev/null +++ b/hipamd/tests/src/hipInfo.cpp @@ -0,0 +1,80 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Test the device info API extensions for HIP: + +#include +#include +#include + +#include "test_common.h" + +#define CHECK(error) \ + if (error != hipSuccess) { \ + fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ + exit(EXIT_FAILURE);\ + } + +hipError_t test_hipDeviceGetAttribute(int deviceId, hipDeviceAttribute_t attr, int expectedValue = 0) +{ + int value = 0; + std::cout << "Test hipDeviceGetAttribute attribute " << attr; + if (expectedValue) { std::cout << " expected value " << expectedValue; } + hipError_t e = hipDeviceGetAttribute(&value, attr, deviceId); + std::cout << " actual value " << value << std::endl; + if (expectedValue && value != expectedValue) { + std::cout << "fail" << std::endl; + return hipErrorInvalidValue; + } + return hipSuccess; +} + +int main(int argc, char *argv[]) +{ + int deviceId; + CHECK (hipGetDevice(&deviceId)); + hipDeviceProp_t props; + CHECK(hipDeviceGetProperties(&props, deviceId)); + printf ("info: running on device #%d %s\n", deviceId, props.name); + + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxThreadsPerBlock, props.maxThreadsPerBlock)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxBlockDimX, props.maxThreadsDim[0])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxBlockDimY, props.maxThreadsDim[1])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxBlockDimZ, props.maxThreadsDim[2])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxGridDimX, props.maxGridSize[0])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxGridDimY, props.maxGridSize[1])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxGridDimZ, props.maxGridSize[2])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxSharedMemoryPerBlock, props.sharedMemPerBlock)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeTotalConstantMemory, props.totalConstMem)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeWarpSize, props.warpSize)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxRegistersPerBlock, props.regsPerBlock)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeClockRate, props.clockRate)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMultiprocessorCount, props.multiProcessorCount)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeMode, props.computeMode)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeL2CacheSize, props.l2CacheSize)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxThreadsPerMultiProcessor, props.maxThreadsPerMultiProcessor)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMajor, props.major)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMinor, props.minor)); + + passed(); + +}; + From 9aec91a3b7d0636631ad7bcb78ceeabbfc8f0ac6 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 4 Feb 2016 16:26:33 -0600 Subject: [PATCH 2/8] Fix getdeviceattr compilation for NVCC --- hipamd/include/nvcc_detail/hip_runtime_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hipamd/include/nvcc_detail/hip_runtime_api.h b/hipamd/include/nvcc_detail/hip_runtime_api.h index 62cdc582bc..482b1259b0 100644 --- a/hipamd/include/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/nvcc_detail/hip_runtime_api.h @@ -213,7 +213,7 @@ inline static hipError_t hipDeviceGetProperties(hipDeviceProp_t *p_prop, int dev inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { - cudaDeviceAttribute cdattr; + cudaDeviceAttr cdattr; cudaError_t cerror; switch (attr) { @@ -254,7 +254,7 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att case hipDeviceAttributeComputeCapabilityMinor: cdattr = cudaDevAttrComputeCapabilityMinor; break; default: - e = hipErrorInvalidValue; break; + cerror = cudaErrorInvalidValue; break; } cerror = cudaDeviceGetAttribute(pi, cdattr, device); From 26854bb31c4aceba3ee4f8a540ed722aa585e46c Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 5 Feb 2016 07:15:46 -0600 Subject: [PATCH 3/8] Fix HIP_PLATFORM detection --- hipamd/tests/src/CMakeLists.txt | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/hipamd/tests/src/CMakeLists.txt b/hipamd/tests/src/CMakeLists.txt index a2b7056403..77882428e2 100644 --- a/hipamd/tests/src/CMakeLists.txt +++ b/hipamd/tests/src/CMakeLists.txt @@ -8,33 +8,20 @@ include_directories( ${PROJECT_SOURCE_DIR}/include ) set (HIP_Unit_Test_VERSION_MAJOR 1) set (HIP_Unit_Test_VERSION_MINOR 0) -set (CUDA_PATH $ENV{CUDA_PATH}) -if (NOT DEFINED CUDA_PATH) - set( CUDA_PATH /usr/local/cuda) -endif() - set (HIP_PATH $ENV{HIP_PATH}) if (NOT DEFINED HIP_PATH) set (HIP_PATH ../..) endif() -set (HIP_PLATFORM $ENV{HIP_PLATFORM}) -if (NOT DEFINED HIP_PLATFORM) - if (EXISTS $CUDA_PATH) - set (HIP_PLATFORM nvcc) - else() - set (HIP_PLATFORM hcc) - endif() -endif() - +execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM) +MESSAGE ("HIP_PATH=" ${HIP_PATH}) if (${HIP_PLATFORM} STREQUAL "hcc") - MESSAGE ("HCC") + MESSAGE ("HIP_PLATFORM=hcc") set (HC_PATH ${HIP_PATH}/hc) set (HSA_PATH /opt/hsa) - #--- # Add HSA library: add_library(hsa-runtime64 SHARED IMPORTED) @@ -51,12 +38,13 @@ if (${HIP_PLATFORM} STREQUAL "hcc") elseif (${HIP_PLATFORM} STREQUAL "nvcc") - MESSAGE ("NVCC") + MESSAGE ("HIP_PLATFORM=nvcc") # NVCC does not not support -rdynamic option set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS ) set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS ) + else() - MESSAGE ("UNKNOWN HIP_PLATFORM=" ${HIP_PLATFORM}) + MESSAGE (FATAL_ERROR "UNKNOWN HIP_PLATFORM=" ${HIP_PLATFORM}) endif() set (HIPCC ${HIP_PATH}/bin/hipcc) @@ -109,11 +97,6 @@ macro (make_test_matches exe match_string) ) endmacro() - -#set(CMAKE_INSTALL_PREFIX "./install") -#install (TARGETS hipMemset DESTINATION bin) -#install (TARGETS hipEventRecord DESTINATION bin) - make_hip_executable (hip_ballot hip_ballot.cpp) make_hip_executable (hip_anyall hip_anyall.cpp) make_hip_executable (hip_popc hip_popc.cpp) @@ -121,7 +104,7 @@ make_hip_executable (hip_clz hip_clz.cpp) make_hip_executable (hip_brev hip_brev.cpp) make_hip_executable (hip_ffs hip_ffs.cpp) make_hip_executable (hipInfo hipInfo.cpp) -make_hip_executable (hipMemset hipMemset.cpp) +make_hip_executable (hipSetValidDevices hipSetValidDevices.cpp) make_hip_executable (hipMemcpy hipMemcpy.cpp) make_hip_executable (hipEventRecord hipEventRecord.cpp) make_hip_executable (hipLanguageExtensions hipLanguageExtensions.cpp) @@ -145,6 +128,7 @@ make_test(hipMemset --N 10 --memsetval 0x42 ) # small copy, just 10 bytes. make_test(hipMemset --N 10013 --memsetval 0x5a ) # oddball size. make_test(hipMemset --N 500M --memsetval 0xa6 ) # big copy make_test(hipGridLaunch " " ) +make_test(hipSetValidDevices " " ) make_test(hipMemcpy " " ) From fdeb4778226845d5d432c6c0eec38b8d5e129f50 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 28 Jan 2016 20:16:43 -0600 Subject: [PATCH 4/8] iScript cleanup, add --full --- hipamd/Makefile | 2 +- hipamd/bin/hipconfig | 45 ++++++++++++++++++++++++++----- hipamd/bin/hipconvertinplace.sh | 4 +-- hipamd/docs/markdown/hip_terms.md | 6 ++--- 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/hipamd/Makefile b/hipamd/Makefile index 6542b03acd..33b43f53e9 100644 --- a/hipamd/Makefile +++ b/hipamd/Makefile @@ -13,7 +13,7 @@ $(HIP_OBJECTS): HIPCC_FLAGS += -I$(HSA_PATH)/include $(HIP_OBJECTS): %.o:: %.cpp - $(HIPCC) $(HIPCC_FLAGS) $< -c -o $@ + $(HIPCC) $(HIPCC_FLAGS) $< -c -O3 -o $@ clean: diff --git a/hipamd/bin/hipconfig b/hipamd/bin/hipconfig index 5d982111ed..2af9ad0937 100755 --- a/hipamd/bin/hipconfig +++ b/hipamd/bin/hipconfig @@ -14,6 +14,7 @@ GetOptions( ,"compiler|c" => \$p_compiler ,"platform|P" => \$p_platform ,"cpp_config|cxx_config|C" => \$p_cpp_config + ,"full|f" => \$p_full, ,"newline|n" => \$p_newline ); @@ -23,6 +24,7 @@ if ($p_help) { print " --cpp_config, -C : print C++ compiler options\n"; print " --compiler, -c : print compiler (hcc or nvcc)\n"; print " --platform, -P : print platform (hcc or nvcc)\n"; + print " --full, -f : print full config\n"; print " --newline, -n : print newline\n"; print " --help, -h : print help message\n"; exit(); @@ -47,8 +49,13 @@ $HIP_PATH=$ENV{'HIP_PATH'}; $HIP_PATH=Cwd::realpath (dirname (dirname $0)) unless defined $HIP_PATH; # use parent directory of this tool +if ($HIP_PLATFORM eq "hcc") { + $CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include"; +} +if ($HIP_PLATFORM eq "nvcc") { + $CPP_CONFIG = " -D__HIP_PLATFORM_NVCC__= -I$HIP_PATH/include -I$CUDA_PATH/include"; +}; -$printed = 0; if ($p_path) { print "$HIP_PATH"; $printed = 1; @@ -56,15 +63,39 @@ if ($p_path) { if ($p_cpp_config) { - if ($HIP_PLATFORM eq "hcc") { - print " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include"; - } - if ($HIP_PLATFORM eq "nvcc") { - print " -D__HIP_PLATFORM_NVCC__= -I$HIP_PATH/include -I$CUDA_PATH/include"; - }; + print $CPP_CONFIG; $printed = 1; } +if ($p_full) { + print "== hipconfig\n"; + print "HIP_PATH : ", $HIP_PATH, "\n"; + print "HIP_PLATFORM : ", $HIP_PLATFORM, "\n"; + print "CPP_CONFIG : ", $CPP_CONFIG, "\n"; + if ($HIP_PLATFORM eq "hcc") + { + print "\n" ; + print "== hcc\n"; + print ("HCC_HOME : $HCC_HOME\n"); + system("$HCC_HOME/bin/hcc --version"); + } + if ($HIP_PLATFORM eq "nvcc") { + print "\n" ; + print "== nvcc\n"; + #print "CUDA_PATH :", $CUDA_PATH"; + system("nvcc --version"); + + } + print "\n" ; + + print "=== Environment Variables\n"; + system("env | egrep '^HIP|^HSA|^HCC|^CUDA'"); + + print "\n" ; + print "== Linux Kernel\n"; + system ("uname -a"); + $printed = 1; +} if (!$printed or $p_compiler or $p_platform) { diff --git a/hipamd/bin/hipconvertinplace.sh b/hipamd/bin/hipconvertinplace.sh index 8381687853..a8c8d6d9e8 100755 --- a/hipamd/bin/hipconvertinplace.sh +++ b/hipamd/bin/hipconvertinplace.sh @@ -4,10 +4,10 @@ #hipify "inplace" all code files in specified directory. # This can be quite handy when dealing with an existing CUDA code base since the script -# preseeves the existing directory structure. +# preserves the existing directory structure. # For each code file, this script will: -# - If ".prehip file does not exist, copy the original code to a new file withextension ".prehip". Then Hipify the code file. +# - If ".prehip file does not exist, copy the original code to a new file with extension ".prehip". Then Hipify the code file. # - If ".prehip" file exists, this is used as input to hipify. # (this is useful for testing improvements to the hipify toolset). diff --git a/hipamd/docs/markdown/hip_terms.md b/hipamd/docs/markdown/hip_terms.md index d55d5da7f5..4ab04f459a 100644 --- a/hipamd/docs/markdown/hip_terms.md +++ b/hipamd/docs/markdown/hip_terms.md @@ -34,9 +34,9 @@ |Vector|`float4`|`float4`|`hc::`
`short_vector::float4`|`concurrency::`
`graphics::float_4`|`float4` ###Notes -1. For HC and C++AMP, assume captured _tiled_ext_ t_ext and captured _extent_ ext. These languages use captured variables to pass information to the kernel rather than special built-in functions so variable name may vary. -2. The indexig functions (starting with thread-index) show the terminology for a 1D grid. Some APIs use reverse order of xyz / 012 indexing for 3D grids. -3. HC allos tile dimensions to be specified at runtime while C++AMP requires that tile dimensions are specified at compile-time. Thus hc syntax for tile dims is `t_ext.tile_dim[0]` while C++AMP is t_ext.tile_dim0. +1. For HC and C++AMP, assume a captured _tiled_ext_ named "t_ext" and captured _extent_ named "ext". These languages use captured variables to pass information to the kernel rather than using special built-in functions so the exact variable name may vary. +2. The indexing functions (starting with `thread-index`) show the terminology for a 1D grid. Some APIs use reverse order of xyz / 012 indexing for 3D grids. +3. HC allows tile dimensions to be specified at runtime while C++AMP requires that tile dimensions be specified at compile-time. Thus hc syntax for tile dims is `t_ext.tile_dim[0]` while C++AMP is t_ext.tile_dim0. From 39c5f0f6107158a677d01616ddacbca438abf2b6 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 3 Feb 2016 09:48:44 -0600 Subject: [PATCH 5/8] Add hcc-config info to --full --- hipamd/bin/hipconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hipamd/bin/hipconfig b/hipamd/bin/hipconfig index 2af9ad0937..f283ac05d6 100755 --- a/hipamd/bin/hipconfig +++ b/hipamd/bin/hipconfig @@ -78,6 +78,10 @@ if ($p_full) { print "== hcc\n"; print ("HCC_HOME : $HCC_HOME\n"); system("$HCC_HOME/bin/hcc --version"); + print ("HCC-cxxflags: "); + system("$HCC_HOME/bin/hcc-config --cxxflags"); + print ("HCC-ldflags : "); + system("$HCC_HOME/bin/hcc-config --ldflags"); } if ($HIP_PLATFORM eq "nvcc") { print "\n" ; From a06e0d9050e8947a1e78333ca53bc82d935d5e67 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 5 Feb 2016 09:55:57 -0600 Subject: [PATCH 6/8] Doc update --- hipamd/docs/markdown/hip_faq.md | 37 ++++++-- hipamd/docs/markdown/hip_kernel_language.md | 95 +++++++++++---------- hipamd/docs/markdown/hip_porting_guide.md | 34 +++++++- 3 files changed, 110 insertions(+), 56 deletions(-) diff --git a/hipamd/docs/markdown/hip_faq.md b/hipamd/docs/markdown/hip_faq.md index a6a8bb1661..4c321723a7 100644 --- a/hipamd/docs/markdown/hip_faq.md +++ b/hipamd/docs/markdown/hip_faq.md @@ -1,7 +1,32 @@ # FAQ + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* -### What APIs does HIP support? +- [FAQ](#faq) +- [Table of Contents](#table-of-contents) + - [What APIs does HIP support?](#what-apis-does-hip-support) + - [What is not supported?](#what-is-not-supported) + - [Run-time features:](#run-time-features) + - [How does HIP compare with OpenCL?](#how-does-hip-compare-with-opencl) + - [What hardware does HIP support?](#what-hardware-does-hip-support) + - [Does Hipify automatically convert all source code?](#does-hipify-automatically-convert-all-source-code) + - [What is NVCC?](#what-is-nvcc) + - [What is HCC?](#what-is-hcc) + - [Why use HIP rather than supporting CUDA directly?](#why-use-hip-rather-than-supporting-cuda-directly) + - [Can I develop HIP code on an Nvidia CUDA platform?](#can-i-develop-hip-code-on-an-nvidia-cuda-platform) + - [Can I develop HIP code on an AMD HCC platform?](#can-i-develop-hip-code-on-an-amd-hcc-platform) + - [Can a HIP binary run on both AMD and Nvidia platforms?](#can-a-hip-binary-run-on-both-amd-and-nvidia-platforms) + - [Hmmm](#hmmm) + - [Link2 Is it ready?](#link2-is-it-ready) + - [What's the difference between HIP and hc?](#whats-the-difference-between-hip-and-hc) + + +================= + + +### What APIs does HIP support ? HIP provides the following: - Devices (hipSetDevice(), hipGetDeviceProperties(), etc) - Memory management (hipMalloc(), hipMemcpy(), hipFree()) @@ -15,7 +40,7 @@ HIP provides the following: The HIP documentation describes each API and its limitations, if any, compared with the equivalent CUDA API. ### What is not supported? -#### Run-time features: +#### Run-time features - Textures - Dynamic parallelism - Managed memory @@ -23,7 +48,7 @@ The HIP documentation describes each API and its limitations, if any, compared w - CUDA array, mipmappedArray and pitched memory - CUDA Driver API -#### Kernel language features: +#### Kernel language features - Device-side dynamic memory allocations (malloc, free, new, delete) - Virtual functions, indirect functions and try/catch - `__prof_trigger` @@ -58,9 +83,10 @@ NVCC is Nvidia's compiler driver for compiling "CUDA C++" code into PTX or devic ### What is HCC? HCC is AMD's compiler driver which compiles "heterogenous C++" code into HSAIL or GCN device code for AMD GPUs. HCC is an open-source compiler based on recent versions of CLANG/LLVM. -### Why use HIP rather than supporting CUDA run time directly? +### Why use HIP rather than supporting CUDA directly? While HIP is a strong subset of the CUDA, it is a subset. The HIP layer allows that subset to be clearly defined and documented. -Developers who code to the HIP API can be assured there code will remain portable across Nvidia and AMD platforms. +Developers who code to the HIP API can be assured there code will remain portable across Nvidia and AMD platforms. +In addition, HIP defines portable mechanisms to query architectural features, and supports a larger 64-bit wavesize which expands the return type for cross-lane functions like ballot and shuffle from 32-bit ints to 64-bit ints. ### Can I develop HIP code on an Nvidia CUDA platform? Yes! HIP's CUDA path only exposes the APIs and functionality that work on both NVCC and HCC back-ends. @@ -75,6 +101,7 @@ Yes! HIP's HCC path only exposes the APIs and functions that work on both NVCC a ### Can a HIP binary run on both AMD and Nvidia platforms? HIP is a source-portable language that can be compiled to run on either the HCC or NVCC platform. HIP tools don't create a "fat binary" that can run on either platform, however. + ### What's the difference between HIP and hc? HIP is a portable C++ language that supports a strong subset of the CUDA run-time APIs and device-kernel language. It's designed to simplify CUDA conversion to portable C++. HIP provides a C-compatible run-time API, C-compatible kernel-launch mechanism, C++ kernel language and pointer-based memory management. diff --git a/hipamd/docs/markdown/hip_kernel_language.md b/hipamd/docs/markdown/hip_kernel_language.md index cbc25ff518..e7a6baa1a9 100644 --- a/hipamd/docs/markdown/hip_kernel_language.md +++ b/hipamd/docs/markdown/hip_kernel_language.md @@ -1,53 +1,54 @@ -# HIP Kernel Language + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* + +- [Introduction](#introduction) +- [Function-Type Qualifiers](#function-type-qualifiers) + - [`__device__`](#__device__) + - [`__global__`](#__global__) + - [`__host__`](#__host__) +- [Calling `__global__` Functions](#calling-__global__-functions) +- [Kernel-Launch Example](#kernel-launch-example) +- [Variable-Type Qualifiers](#variable-type-qualifiers) + - [`__constant__`](#__constant__) + - [`__shared__`](#__shared__) + - [`__managed__`](#__managed__) + - [`__restrict__`](#__restrict__) +- [Built-In Variables](#built-in-variables) + - [Coordinate Built-Ins](#coordinate-built-ins) + - [warpSize](#warpsize) +- [Vector Types](#vector-types) + - [Short Vector Types](#short-vector-types) + - [dim3](#dim3) +- [Memory-Fence Instructions](#memory-fence-instructions) +- [Synchronization Functions](#synchronization-functions) +- [Math Functions](#math-functions) + - [Single Precision Mathematical Functions](#single-precision-mathematical-functions) + - [Double Precision Mathematical Functions](#double-precision-mathematical-functions) + - [Integer Intrinsics](#integer-intrinsics) +- [Texture Functions](#texture-functions) +- [Surface Functions](#surface-functions) +- [Timer Functions](#timer-functions) +- [Atomic Functions](#atomic-functions) + - [Caveats and Features Under-Development:](#caveats-and-features-under-development) +- [Warp Cross-Lane Functions](#warp-cross-lane-functions) + - [Warp Vote and Ballot Functions](#warp-vote-and-ballot-functions) + - [Warp Shuffle Functions](#warp-shuffle-functions) +- [Profiler Counter Function](#profiler-counter-function) +- [Assert](#assert) +- [Printf](#printf) +- [Device-Side Dynamic Global Memory Allocation](#device-side-dynamic-global-memory-allocation) +- [`__launch_bounds__`](#__launch_bounds__) +- [Register Keyword](#register-keyword) +- [Pragma Unroll](#pragma-unroll) +- [In-Line Assembly](#in-line-assembly) +- [C++ Support](#c-support) + + -###Table of Contents -================= - - * [HIP Kernel Language](#hip-kernel-language" aria-hidden="true"> hipify --inplace +``` + +For each input file FILE, this script will: + - If "FILE.prehip file does not exist, copy the original code to a new file with extension ".prehip". Then Hipify the code file. + - If "FILE.prehip" file exists, hipify FILE.prehip and save to FILE. + +This is useful for testing improvements to the hipify toolset. + + +The "hipconvertinplace.sh" script will perform inplace conversion for all code files in the specified directory. +This can be quite handy when dealing with an existing CUDA code base since the script preserves the existing directory structure +and filenames - so includes work. After converting in-place, you can review the code to add additional parameters to +directory names. + + +```shell +> hipconverinplace.sh MY_SRC_DIR +``` + + + + ## Distinguishing Compiler Modes From c482a3f45641d36e09cc6f9c083b4cd60dc2110b Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 8 Feb 2016 21:45:49 -0600 Subject: [PATCH 7/8] in HIPCHECK, only run command once even if error occurs --- hipamd/tests/src/test_common.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hipamd/tests/src/test_common.h b/hipamd/tests/src/test_common.h index 891fe381d2..ac89459956 100644 --- a/hipamd/tests/src/test_common.h +++ b/hipamd/tests/src/test_common.h @@ -29,12 +29,15 @@ #define HIPCHECK(error) \ - if (error != hipSuccess) { \ +{\ + hipError_t localError = error; \ + if (localError != hipSuccess) { \ printf("%serror: '%s'(%d) at %s:%d%s\n", \ - KRED,hipGetErrorString(error), error,\ + KRED,hipGetErrorString(localError), localError,\ __FILE__, __LINE__,KNRM); \ failed("API returned error code.");\ - } + }\ +} #define HIPASSERT(condition) \ if (! (condition) ) { \ From ce2fc0f7fe834a123da05e62f6c8a24246d6f3b5 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 8 Feb 2016 22:55:23 -0600 Subject: [PATCH 8/8] Test fixes: - Remove reference to missing test. - Add hipMemset back. - Parse --gpu option to specify default starting GPU. --- hipamd/tests/src/CMakeLists.txt | 3 +-- hipamd/tests/src/hipMemset.cpp | 4 +++- hipamd/tests/src/test_common.cpp | 8 +++++++- hipamd/tests/src/test_common.h | 1 + 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/hipamd/tests/src/CMakeLists.txt b/hipamd/tests/src/CMakeLists.txt index 77882428e2..8d37f79208 100644 --- a/hipamd/tests/src/CMakeLists.txt +++ b/hipamd/tests/src/CMakeLists.txt @@ -104,8 +104,8 @@ make_hip_executable (hip_clz hip_clz.cpp) make_hip_executable (hip_brev hip_brev.cpp) make_hip_executable (hip_ffs hip_ffs.cpp) make_hip_executable (hipInfo hipInfo.cpp) -make_hip_executable (hipSetValidDevices hipSetValidDevices.cpp) make_hip_executable (hipMemcpy hipMemcpy.cpp) +make_hip_executable (hipMemset hipMemset.cpp) make_hip_executable (hipEventRecord hipEventRecord.cpp) make_hip_executable (hipLanguageExtensions hipLanguageExtensions.cpp) make_hip_executable (hipGridLaunch hipGridLaunch.cpp) @@ -128,7 +128,6 @@ make_test(hipMemset --N 10 --memsetval 0x42 ) # small copy, just 10 bytes. make_test(hipMemset --N 10013 --memsetval 0x5a ) # oddball size. make_test(hipMemset --N 500M --memsetval 0xa6 ) # big copy make_test(hipGridLaunch " " ) -make_test(hipSetValidDevices " " ) make_test(hipMemcpy " " ) diff --git a/hipamd/tests/src/hipMemset.cpp b/hipamd/tests/src/hipMemset.cpp index 63e14a5369..9769acaf25 100644 --- a/hipamd/tests/src/hipMemset.cpp +++ b/hipamd/tests/src/hipMemset.cpp @@ -31,9 +31,11 @@ int main(int argc, char *argv[]) HipTest::parseStandardArguments(argc, argv, true); + HIPCHECK(hipSetDevice(p_gpuDevice)); + size_t Nbytes = N*sizeof(char); - printf ("N=%zu memsetval=%2x\n", N, memsetval); + printf ("N=%zu memsetval=%2x device=%d\n", N, memsetval, p_gpuDevice); char *A_d; char *A_h; diff --git a/hipamd/tests/src/test_common.cpp b/hipamd/tests/src/test_common.cpp index 45d01d147b..d7a108a11b 100644 --- a/hipamd/tests/src/test_common.cpp +++ b/hipamd/tests/src/test_common.cpp @@ -27,6 +27,7 @@ char memsetval=0x42; int iterations = 1; unsigned blocksPerCU = 6; // to hide latency unsigned threadsPerBlock = 256; +int p_gpuDevice = 0; @@ -105,7 +106,12 @@ int parseStandardArguments(int argc, char *argv[], bool failOnUndefinedArg) memsetval = ex; } else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) { if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) { - failed("Bad itertions argument"); + failed("Bad iterations argument"); + } + + } else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-g"))) { + if (++i >= argc || !HipTest::parseInt(argv[i], &p_gpuDevice)) { + failed("Bad gpuDevice argument"); } } diff --git a/hipamd/tests/src/test_common.h b/hipamd/tests/src/test_common.h index ac89459956..fee052c1ad 100644 --- a/hipamd/tests/src/test_common.h +++ b/hipamd/tests/src/test_common.h @@ -52,6 +52,7 @@ extern char memsetval; extern int iterations; extern unsigned blocksPerCU; extern unsigned threadsPerBlock; +extern int p_gpuDevice; namespace HipTest {