diff --git a/hipamd/Makefile b/hipamd/Makefile index 6542b03acd..33b43f53e9 100644 --- a/hipamd/Makefile +++ b/hipamd/Makefile @@ -13,7 +13,7 @@ $(HIP_OBJECTS): HIPCC_FLAGS += -I$(HSA_PATH)/include $(HIP_OBJECTS): %.o:: %.cpp - $(HIPCC) $(HIPCC_FLAGS) $< -c -o $@ + $(HIPCC) $(HIPCC_FLAGS) $< -c -O3 -o $@ clean: diff --git a/hipamd/bin/hipconfig b/hipamd/bin/hipconfig index 5d982111ed..f283ac05d6 100755 --- a/hipamd/bin/hipconfig +++ b/hipamd/bin/hipconfig @@ -14,6 +14,7 @@ GetOptions( ,"compiler|c" => \$p_compiler ,"platform|P" => \$p_platform ,"cpp_config|cxx_config|C" => \$p_cpp_config + ,"full|f" => \$p_full, ,"newline|n" => \$p_newline ); @@ -23,6 +24,7 @@ if ($p_help) { print " --cpp_config, -C : print C++ compiler options\n"; print " --compiler, -c : print compiler (hcc or nvcc)\n"; print " --platform, -P : print platform (hcc or nvcc)\n"; + print " --full, -f : print full config\n"; print " --newline, -n : print newline\n"; print " --help, -h : print help message\n"; exit(); @@ -47,8 +49,13 @@ $HIP_PATH=$ENV{'HIP_PATH'}; $HIP_PATH=Cwd::realpath (dirname (dirname $0)) unless defined $HIP_PATH; # use parent directory of this tool +if ($HIP_PLATFORM eq "hcc") { + $CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include"; +} +if ($HIP_PLATFORM eq "nvcc") { + $CPP_CONFIG = " -D__HIP_PLATFORM_NVCC__= -I$HIP_PATH/include -I$CUDA_PATH/include"; +}; -$printed = 0; if ($p_path) { print "$HIP_PATH"; $printed = 1; @@ -56,15 +63,43 @@ if ($p_path) { if ($p_cpp_config) { - if ($HIP_PLATFORM eq "hcc") { - print " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include"; - } - if ($HIP_PLATFORM eq "nvcc") { - print " -D__HIP_PLATFORM_NVCC__= -I$HIP_PATH/include -I$CUDA_PATH/include"; - }; + print $CPP_CONFIG; $printed = 1; } +if ($p_full) { + print "== hipconfig\n"; + print "HIP_PATH : ", $HIP_PATH, "\n"; + print "HIP_PLATFORM : ", $HIP_PLATFORM, "\n"; + print "CPP_CONFIG : ", $CPP_CONFIG, "\n"; + if ($HIP_PLATFORM eq "hcc") + { + print "\n" ; + print "== hcc\n"; + print ("HCC_HOME : $HCC_HOME\n"); + system("$HCC_HOME/bin/hcc --version"); + print ("HCC-cxxflags: "); + system("$HCC_HOME/bin/hcc-config --cxxflags"); + print ("HCC-ldflags : "); + system("$HCC_HOME/bin/hcc-config --ldflags"); + } + if ($HIP_PLATFORM eq "nvcc") { + print "\n" ; + print "== nvcc\n"; + #print "CUDA_PATH :", $CUDA_PATH"; + system("nvcc --version"); + + } + print "\n" ; + + print "=== Environment Variables\n"; + system("env | egrep '^HIP|^HSA|^HCC|^CUDA'"); + + print "\n" ; + print "== Linux Kernel\n"; + system ("uname -a"); + $printed = 1; +} if (!$printed or $p_compiler or $p_platform) { diff --git a/hipamd/bin/hipconvertinplace.sh b/hipamd/bin/hipconvertinplace.sh index 8381687853..a8c8d6d9e8 100755 --- a/hipamd/bin/hipconvertinplace.sh +++ b/hipamd/bin/hipconvertinplace.sh @@ -4,10 +4,10 @@ #hipify "inplace" all code files in specified directory. # This can be quite handy when dealing with an existing CUDA code base since the script -# preseeves the existing directory structure. +# preserves the existing directory structure. # For each code file, this script will: -# - If ".prehip file does not exist, copy the original code to a new file withextension ".prehip". Then Hipify the code file. +# - If ".prehip file does not exist, copy the original code to a new file with extension ".prehip". Then Hipify the code file. # - If ".prehip" file exists, this is used as input to hipify. # (this is useful for testing improvements to the hipify toolset). diff --git a/hipamd/bin/hipify b/hipamd/bin/hipify index e5f17b69d0..f4de89aab2 100755 --- a/hipamd/bin/hipify +++ b/hipamd/bin/hipify @@ -345,6 +345,28 @@ while (@ARGV) { $ft{'dev'} += s/\bcudaDeviceProp\b/hipDeviceProp_t/g; $ft{'dev'} += s/\bcudaGetDeviceProperties\b/hipDeviceGetProperties/g; + # Attribute + $ft{'err'} += s/\bcudaDevAttrMaxThreadsPerBlock\b/hipDeviceAttributeMaxThreadsPerBlock/g; + $ft{'err'} += s/\bcudaDevAttrMaxBlockDimX\b/hipDeviceAttributeMaxBlockDimX/g; + $ft{'err'} += s/\bcudaDevAttrMaxBlockDimY\b/hipDeviceAttributeMaxBlockDimY/g; + $ft{'err'} += s/\bcudaDevAttrMaxBlockDimZ\b/hipDeviceAttributeMaxBlockDimZ/g; + $ft{'err'} += s/\bcudaDevAttrMaxGridDimX\b/hipDeviceAttributeMaxGridDimX/g; + $ft{'err'} += s/\bcudaDevAttrMaxGridDimY\b/hipDeviceAttributeMaxGridDimY/g; + $ft{'err'} += s/\bcudaDevAttrMaxGridDimZ\b/hipDeviceAttributeMaxGridDimZ/g; + $ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerBlock\b/hipDeviceAttributeMaxSharedMemoryPerBlock/g; + $ft{'err'} += s/\bcudaDevAttrTotalConstantMemory\b/hipDeviceAttributeTotalConstantMemory/g; + $ft{'err'} += s/\bcudaDevAttrWarpSize\b/hipDeviceAttributeWarpSize/g; + $ft{'err'} += s/\bcudaDevAttrMaxRegistersPerBlock\b/hipDeviceAttributeMaxRegistersPerBlock/g; + $ft{'err'} += s/\bcudaDevAttrClockRate\b/hipDeviceAttributeClockRate/g; + $ft{'err'} += s/\bcudaDevAttrMultiProcessorCount\b/hipDeviceAttributeMultiprocessorCount/g; + $ft{'err'} += s/\bcudaDevAttrComputeMode\b/hipDeviceAttributeComputeMode/g; + $ft{'err'} += s/\bcudaDevAttrL2CacheSize\b/hipDeviceAttributeL2CacheSize/g; + $ft{'err'} += s/\bcudaDevAttrMaxThreadsPerMultiProcessor\b/hipDeviceAttributeMaxThreadsPerMultiProcessor/g; + $ft{'err'} += s/\bcudaDevAttrComputeCapabilityMajor\b/hipDeviceAttributeComputeCapabilityMajor/g; + $ft{'err'} += s/\bcudaDevAttrComputeCapabilityMinor\b/hipDeviceAttributeComputeCapabilityMinor/g; + $ft{'dev'} += s/\bcudaDeviceAttr\b/hipDeviceAttribute_t/g; + $ft{'dev'} += s/\bcudaDeviceGetAttribute\b/hipDeviceGetAttribute/g; + # Cache config $ft{'dev'} += s/\bcudaDeviceSetCacheConfig\b/hipDeviceSetCacheConfig/g; $ft{'dev'} += s/\bcudaThreadSetCacheConfig\b/hipDeviceSetCacheConfig/g; # translate deprecated diff --git a/hipamd/docs/markdown/hip_faq.md b/hipamd/docs/markdown/hip_faq.md index a6a8bb1661..4c321723a7 100644 --- a/hipamd/docs/markdown/hip_faq.md +++ b/hipamd/docs/markdown/hip_faq.md @@ -1,7 +1,32 @@ # FAQ + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* -### What APIs does HIP support? +- [FAQ](#faq) +- [Table of Contents](#table-of-contents) + - [What APIs does HIP support?](#what-apis-does-hip-support) + - [What is not supported?](#what-is-not-supported) + - [Run-time features:](#run-time-features) + - [How does HIP compare with OpenCL?](#how-does-hip-compare-with-opencl) + - [What hardware does HIP support?](#what-hardware-does-hip-support) + - [Does Hipify automatically convert all source code?](#does-hipify-automatically-convert-all-source-code) + - [What is NVCC?](#what-is-nvcc) + - [What is HCC?](#what-is-hcc) + - [Why use HIP rather than supporting CUDA directly?](#why-use-hip-rather-than-supporting-cuda-directly) + - [Can I develop HIP code on an Nvidia CUDA platform?](#can-i-develop-hip-code-on-an-nvidia-cuda-platform) + - [Can I develop HIP code on an AMD HCC platform?](#can-i-develop-hip-code-on-an-amd-hcc-platform) + - [Can a HIP binary run on both AMD and Nvidia platforms?](#can-a-hip-binary-run-on-both-amd-and-nvidia-platforms) + - [Hmmm](#hmmm) + - [Link2 Is it ready?](#link2-is-it-ready) + - [What's the difference between HIP and hc?](#whats-the-difference-between-hip-and-hc) + + +================= + + +### What APIs does HIP support ? HIP provides the following: - Devices (hipSetDevice(), hipGetDeviceProperties(), etc) - Memory management (hipMalloc(), hipMemcpy(), hipFree()) @@ -15,7 +40,7 @@ HIP provides the following: The HIP documentation describes each API and its limitations, if any, compared with the equivalent CUDA API. ### What is not supported? -#### Run-time features: +#### Run-time features - Textures - Dynamic parallelism - Managed memory @@ -23,7 +48,7 @@ The HIP documentation describes each API and its limitations, if any, compared w - CUDA array, mipmappedArray and pitched memory - CUDA Driver API -#### Kernel language features: +#### Kernel language features - Device-side dynamic memory allocations (malloc, free, new, delete) - Virtual functions, indirect functions and try/catch - `__prof_trigger` @@ -58,9 +83,10 @@ NVCC is Nvidia's compiler driver for compiling "CUDA C++" code into PTX or devic ### What is HCC? HCC is AMD's compiler driver which compiles "heterogenous C++" code into HSAIL or GCN device code for AMD GPUs. HCC is an open-source compiler based on recent versions of CLANG/LLVM. -### Why use HIP rather than supporting CUDA run time directly? +### Why use HIP rather than supporting CUDA directly? While HIP is a strong subset of the CUDA, it is a subset. The HIP layer allows that subset to be clearly defined and documented. -Developers who code to the HIP API can be assured there code will remain portable across Nvidia and AMD platforms. +Developers who code to the HIP API can be assured there code will remain portable across Nvidia and AMD platforms. +In addition, HIP defines portable mechanisms to query architectural features, and supports a larger 64-bit wavesize which expands the return type for cross-lane functions like ballot and shuffle from 32-bit ints to 64-bit ints. ### Can I develop HIP code on an Nvidia CUDA platform? Yes! HIP's CUDA path only exposes the APIs and functionality that work on both NVCC and HCC back-ends. @@ -75,6 +101,7 @@ Yes! HIP's HCC path only exposes the APIs and functions that work on both NVCC a ### Can a HIP binary run on both AMD and Nvidia platforms? HIP is a source-portable language that can be compiled to run on either the HCC or NVCC platform. HIP tools don't create a "fat binary" that can run on either platform, however. + ### What's the difference between HIP and hc? HIP is a portable C++ language that supports a strong subset of the CUDA run-time APIs and device-kernel language. It's designed to simplify CUDA conversion to portable C++. HIP provides a C-compatible run-time API, C-compatible kernel-launch mechanism, C++ kernel language and pointer-based memory management. diff --git a/hipamd/docs/markdown/hip_kernel_language.md b/hipamd/docs/markdown/hip_kernel_language.md index cbc25ff518..e7a6baa1a9 100644 --- a/hipamd/docs/markdown/hip_kernel_language.md +++ b/hipamd/docs/markdown/hip_kernel_language.md @@ -1,53 +1,54 @@ -# HIP Kernel Language + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* + +- [Introduction](#introduction) +- [Function-Type Qualifiers](#function-type-qualifiers) + - [`__device__`](#__device__) + - [`__global__`](#__global__) + - [`__host__`](#__host__) +- [Calling `__global__` Functions](#calling-__global__-functions) +- [Kernel-Launch Example](#kernel-launch-example) +- [Variable-Type Qualifiers](#variable-type-qualifiers) + - [`__constant__`](#__constant__) + - [`__shared__`](#__shared__) + - [`__managed__`](#__managed__) + - [`__restrict__`](#__restrict__) +- [Built-In Variables](#built-in-variables) + - [Coordinate Built-Ins](#coordinate-built-ins) + - [warpSize](#warpsize) +- [Vector Types](#vector-types) + - [Short Vector Types](#short-vector-types) + - [dim3](#dim3) +- [Memory-Fence Instructions](#memory-fence-instructions) +- [Synchronization Functions](#synchronization-functions) +- [Math Functions](#math-functions) + - [Single Precision Mathematical Functions](#single-precision-mathematical-functions) + - [Double Precision Mathematical Functions](#double-precision-mathematical-functions) + - [Integer Intrinsics](#integer-intrinsics) +- [Texture Functions](#texture-functions) +- [Surface Functions](#surface-functions) +- [Timer Functions](#timer-functions) +- [Atomic Functions](#atomic-functions) + - [Caveats and Features Under-Development:](#caveats-and-features-under-development) +- [Warp Cross-Lane Functions](#warp-cross-lane-functions) + - [Warp Vote and Ballot Functions](#warp-vote-and-ballot-functions) + - [Warp Shuffle Functions](#warp-shuffle-functions) +- [Profiler Counter Function](#profiler-counter-function) +- [Assert](#assert) +- [Printf](#printf) +- [Device-Side Dynamic Global Memory Allocation](#device-side-dynamic-global-memory-allocation) +- [`__launch_bounds__`](#__launch_bounds__) +- [Register Keyword](#register-keyword) +- [Pragma Unroll](#pragma-unroll) +- [In-Line Assembly](#in-line-assembly) +- [C++ Support](#c-support) + + -###Table of Contents -================= - - * [HIP Kernel Language](#hip-kernel-language" aria-hidden="true"> hipify --inplace +``` + +For each input file FILE, this script will: + - If "FILE.prehip file does not exist, copy the original code to a new file with extension ".prehip". Then Hipify the code file. + - If "FILE.prehip" file exists, hipify FILE.prehip and save to FILE. + +This is useful for testing improvements to the hipify toolset. + + +The "hipconvertinplace.sh" script will perform inplace conversion for all code files in the specified directory. +This can be quite handy when dealing with an existing CUDA code base since the script preserves the existing directory structure +and filenames - so includes work. After converting in-place, you can review the code to add additional parameters to +directory names. + + +```shell +> hipconverinplace.sh MY_SRC_DIR +``` + + + + ## Distinguishing Compiler Modes diff --git a/hipamd/docs/markdown/hip_terms.md b/hipamd/docs/markdown/hip_terms.md index d55d5da7f5..4ab04f459a 100644 --- a/hipamd/docs/markdown/hip_terms.md +++ b/hipamd/docs/markdown/hip_terms.md @@ -34,9 +34,9 @@ |Vector|`float4`|`float4`|`hc::`
`short_vector::float4`|`concurrency::`
`graphics::float_4`|`float4` ###Notes -1. For HC and C++AMP, assume captured _tiled_ext_ t_ext and captured _extent_ ext. These languages use captured variables to pass information to the kernel rather than special built-in functions so variable name may vary. -2. The indexig functions (starting with thread-index) show the terminology for a 1D grid. Some APIs use reverse order of xyz / 012 indexing for 3D grids. -3. HC allos tile dimensions to be specified at runtime while C++AMP requires that tile dimensions are specified at compile-time. Thus hc syntax for tile dims is `t_ext.tile_dim[0]` while C++AMP is t_ext.tile_dim0. +1. For HC and C++AMP, assume a captured _tiled_ext_ named "t_ext" and captured _extent_ named "ext". These languages use captured variables to pass information to the kernel rather than using special built-in functions so the exact variable name may vary. +2. The indexing functions (starting with `thread-index`) show the terminology for a 1D grid. Some APIs use reverse order of xyz / 012 indexing for 3D grids. +3. HC allows tile dimensions to be specified at runtime while C++AMP requires that tile dimensions be specified at compile-time. Thus hc syntax for tile dims is `t_ext.tile_dim[0]` while C++AMP is t_ext.tile_dim0. diff --git a/hipamd/include/hcc_detail/hip_runtime.h b/hipamd/include/hcc_detail/hip_runtime.h index 6d906dc5d5..958c2b0a38 100644 --- a/hipamd/include/hcc_detail/hip_runtime.h +++ b/hipamd/include/hcc_detail/hip_runtime.h @@ -325,6 +325,16 @@ __device__ inline unsigned int __ffsll(unsigned long long int input) return hc::__lastbit_u32_u64( input)+1; } +__device__ inline unsigned int __ffs(int input) +{ + return hc::__lastbit_u32_s32( input)+1; +} + +__device__ inline unsigned int __ffsll(long long int input) +{ + return hc::__lastbit_u32_s64( input)+1; +} + __device__ inline unsigned int __brev( unsigned int input) { return hc::__bitrev_b32( input); @@ -336,7 +346,6 @@ __device__ inline unsigned long long int __brevll( unsigned long long int input) } // warp vote function __all __any __ballot - __device__ inline int __all( int input) { return hc::__all( input); diff --git a/hipamd/include/hcc_detail/hip_runtime_api.h b/hipamd/include/hcc_detail/hip_runtime_api.h index 5eb1a48075..225b065654 100644 --- a/hipamd/include/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hcc_detail/hip_runtime_api.h @@ -230,6 +230,13 @@ hipError_t hipGetDevice(int *device); */ hipError_t hipGetDeviceCount(int *count); +/** + * @brief Query device attribute. + * @param [out] pi pointer to value to return + * @param [in] attr attribute to query + * @param [in] device which device to query for information + */ +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device); /** * @brief Returns device properties. @@ -687,11 +694,11 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind /** - * @brief Copies count bytes from the memory area pointed to by src to the memory area pointed to by offset bytes from the start of symbol symbol. + * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area pointed to by @p offset bytes from the start of symbol @p symbol. * * The memory areas may not overlap. Symbol can either be a variable that resides in global or constant memory space, or it can be a character string, * naming a variable that resides in global or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice - * TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use hipErrorUnknown for now + * TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use hipErrorUnknown for now. * * @param[in] symbolName - Symbol destination on device * @param[in] src - Data being copy from @@ -706,8 +713,6 @@ hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t siz /** * @brief Copy data from src to dst asynchronously. * - * It supports memory from host to device, - * device to host, device to device and host to host. * TODO: cudaErrorInvalidMemcpyDirection error code is not supported right now, use hipErrorUnknown for now * * @param[out] dst Data being copy to @@ -735,7 +740,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes ); /** - * @brief Fills the first count bytes of the memory area pointed to by dev with the constant byte value value. + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value value. * * hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the memset is complete. * The operation can optionally be associated to a stream by passing a non-zero stream argument. @@ -743,7 +748,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes ); * * @param[out] dst Pointer to device memory * @param[in] value - Value to set for each byte of specified memory - * @param[in] count - Size in bytes to set + * @param[in] sizeBytes - Size in bytes to set * @param[in] stream - Stream identifier * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree */ @@ -817,7 +822,7 @@ hipError_t hipDeviceEnablePeerAccess ( int peerDevice, unsigned int flags ); * @param [in] dstDevice - Destination device * @param [in] src - Source device pointer * @param [in] srcDevice - Source device - * @param [in] count - Size of memory copy in bytes + * @param [in] sizeBytes - Size of memory copy in bytes * * Returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice */ @@ -830,7 +835,7 @@ hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcD * @param [in] dstDevice - Destination device * @param [in] src - Source device pointer * @param [in] srcDevice - Source device - * @param [in] count - Size of memory copy in bytes + * @param [in] sizeBytes - Size of memory copy in bytes * @param [in] stream - Stream identifier * * Returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice diff --git a/hipamd/include/hip_runtime_api.h b/hipamd/include/hip_runtime_api.h index 021dc37421..64f5a523d2 100644 --- a/hipamd/include/hip_runtime_api.h +++ b/hipamd/include/hip_runtime_api.h @@ -125,7 +125,31 @@ typedef enum hipError_t { ,hipErrorTbd ///< Marker that more error codes are needed. } hipError_t; - +/* + * @brief hipDeviceAttribute_t + * @enum + * @ingroup Enumerations + */ +typedef enum hipDeviceAttribute_t { + hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. + hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block. + hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block. + hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block. + hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid. + hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid. + hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid. + hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes. + hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. + hipDeviceAttributeWarpSize, ///< Warp size in threads. + hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. + hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. + hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. + hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. + hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. + hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. + hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. + hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. +} hipDeviceAttribute_t; /** * @} diff --git a/hipamd/include/nvcc_detail/hip_runtime_api.h b/hipamd/include/nvcc_detail/hip_runtime_api.h index 5c15c6259e..482b1259b0 100644 --- a/hipamd/include/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/nvcc_detail/hip_runtime_api.h @@ -211,6 +211,57 @@ inline static hipError_t hipDeviceGetProperties(hipDeviceProp_t *p_prop, int dev return hipCUDAErrorTohipError(cerror); } +inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) +{ + cudaDeviceAttr cdattr; + cudaError_t cerror; + + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + cdattr = cudaDevAttrMaxThreadsPerBlock; break; + case hipDeviceAttributeMaxBlockDimX: + cdattr = cudaDevAttrMaxBlockDimX; break; + case hipDeviceAttributeMaxBlockDimY: + cdattr = cudaDevAttrMaxBlockDimY; break; + case hipDeviceAttributeMaxBlockDimZ: + cdattr = cudaDevAttrMaxBlockDimZ; break; + case hipDeviceAttributeMaxGridDimX: + cdattr = cudaDevAttrMaxGridDimX; break; + case hipDeviceAttributeMaxGridDimY: + cdattr = cudaDevAttrMaxGridDimY; break; + case hipDeviceAttributeMaxGridDimZ: + cdattr = cudaDevAttrMaxGridDimZ; break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + cdattr = cudaDevAttrMaxSharedMemoryPerBlock; break; + case hipDeviceAttributeTotalConstantMemory: + cdattr = cudaDevAttrTotalConstantMemory; break; + case hipDeviceAttributeWarpSize: + cdattr = cudaDevAttrWarpSize; break; + case hipDeviceAttributeMaxRegistersPerBlock: + cdattr = cudaDevAttrMaxRegistersPerBlock; break; + case hipDeviceAttributeClockRate: + cdattr = cudaDevAttrClockRate; break; + case hipDeviceAttributeMultiprocessorCount: + cdattr = cudaDevAttrMultiProcessorCount; break; + case hipDeviceAttributeComputeMode: + cdattr = cudaDevAttrComputeMode; break; + case hipDeviceAttributeL2CacheSize: + cdattr = cudaDevAttrL2CacheSize; break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; break; + case hipDeviceAttributeComputeCapabilityMajor: + cdattr = cudaDevAttrComputeCapabilityMajor; break; + case hipDeviceAttributeComputeCapabilityMinor: + cdattr = cudaDevAttrComputeCapabilityMinor; break; + default: + cerror = cudaErrorInvalidValue; break; + } + + cerror = cudaDeviceGetAttribute(pi, cdattr, device); + + return hipCUDAErrorTohipError(cerror); +} + inline static hipError_t hipMemGetInfo( size_t* free, size_t* total) { return hipCUDAErrorTohipError(cudaMemGetInfo(free,total)); diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index dbcba3bee4..5cb021b7e7 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -789,7 +789,63 @@ hipError_t hipDeviceReset(void) return ihipLogStatus(hipSuccess); } +/** + * + */ +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) +{ + std::call_once(hip_initialized, ihipInit); + hipError_t e = hipSuccess; + + ihipDevice_t * hipDevice = ihipGetDevice(device); + hipDeviceProp_t *prop = &hipDevice->_props; + if (hipDevice) { + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + *pi = prop->maxThreadsPerBlock; break; + case hipDeviceAttributeMaxBlockDimX: + *pi = prop->maxThreadsDim[0]; break; + case hipDeviceAttributeMaxBlockDimY: + *pi = prop->maxThreadsDim[1]; break; + case hipDeviceAttributeMaxBlockDimZ: + *pi = prop->maxThreadsDim[2]; break; + case hipDeviceAttributeMaxGridDimX: + *pi = prop->maxGridSize[0]; break; + case hipDeviceAttributeMaxGridDimY: + *pi = prop->maxGridSize[1]; break; + case hipDeviceAttributeMaxGridDimZ: + *pi = prop->maxGridSize[2]; break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + *pi = prop->sharedMemPerBlock; break; + case hipDeviceAttributeTotalConstantMemory: + *pi = prop->totalConstMem; break; + case hipDeviceAttributeWarpSize: + *pi = prop->warpSize; break; + case hipDeviceAttributeMaxRegistersPerBlock: + *pi = prop->regsPerBlock; break; + case hipDeviceAttributeClockRate: + *pi = prop->clockRate; break; + case hipDeviceAttributeMultiprocessorCount: + *pi = prop->multiProcessorCount; break; + case hipDeviceAttributeComputeMode: + *pi = prop->computeMode; break; + case hipDeviceAttributeL2CacheSize: + *pi = prop->l2CacheSize; break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + *pi = prop->maxThreadsPerMultiProcessor; break; + case hipDeviceAttributeComputeCapabilityMajor: + *pi = prop->major; break; + case hipDeviceAttributeComputeCapabilityMinor: + *pi = prop->minor; break; + default: + e = hipErrorInvalidValue; break; + } + } else { + e = hipErrorInvalidDevice; + } + return ihipLogStatus(e); +} /** @@ -1367,8 +1423,8 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind } else { e = hipErrorInvalidResourceHandle; } - + #else // TODO-hsart - what synchronization does hsa_copy provide? hc::am_copy(dst, src, sizeBytes); diff --git a/hipamd/tests/src/CMakeLists.txt b/hipamd/tests/src/CMakeLists.txt index 596d172fe3..8d37f79208 100644 --- a/hipamd/tests/src/CMakeLists.txt +++ b/hipamd/tests/src/CMakeLists.txt @@ -8,33 +8,20 @@ include_directories( ${PROJECT_SOURCE_DIR}/include ) set (HIP_Unit_Test_VERSION_MAJOR 1) set (HIP_Unit_Test_VERSION_MINOR 0) -set (CUDA_PATH $ENV{CUDA_PATH}) -if (NOT DEFINED CUDA_PATH) - set( CUDA_PATH /usr/local/cuda) -endif() - set (HIP_PATH $ENV{HIP_PATH}) if (NOT DEFINED HIP_PATH) set (HIP_PATH ../..) endif() -set (HIP_PLATFORM $ENV{HIP_PLATFORM}) -if (NOT DEFINED HIP_PLATFORM) - if (EXISTS $CUDA_PATH) - set (HIP_PLATFORM nvcc) - else() - set (HIP_PLATFORM hcc) - endif() -endif() - +execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM) +MESSAGE ("HIP_PATH=" ${HIP_PATH}) if (${HIP_PLATFORM} STREQUAL "hcc") - MESSAGE ("HCC") + MESSAGE ("HIP_PLATFORM=hcc") set (HC_PATH ${HIP_PATH}/hc) set (HSA_PATH /opt/hsa) - #--- # Add HSA library: add_library(hsa-runtime64 SHARED IMPORTED) @@ -51,12 +38,13 @@ if (${HIP_PLATFORM} STREQUAL "hcc") elseif (${HIP_PLATFORM} STREQUAL "nvcc") - MESSAGE ("NVCC") + MESSAGE ("HIP_PLATFORM=nvcc") # NVCC does not not support -rdynamic option set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS ) set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS ) + else() - MESSAGE ("UNKNOWN HIP_PLATFORM=" ${HIP_PLATFORM}) + MESSAGE (FATAL_ERROR "UNKNOWN HIP_PLATFORM=" ${HIP_PLATFORM}) endif() set (HIPCC ${HIP_PATH}/bin/hipcc) @@ -109,19 +97,15 @@ macro (make_test_matches exe match_string) ) endmacro() - -#set(CMAKE_INSTALL_PREFIX "./install") -#install (TARGETS hipMemset DESTINATION bin) -#install (TARGETS hipEventRecord DESTINATION bin) - make_hip_executable (hip_ballot hip_ballot.cpp) make_hip_executable (hip_anyall hip_anyall.cpp) make_hip_executable (hip_popc hip_popc.cpp) make_hip_executable (hip_clz hip_clz.cpp) make_hip_executable (hip_brev hip_brev.cpp) make_hip_executable (hip_ffs hip_ffs.cpp) -make_hip_executable (hipMemset hipMemset.cpp) +make_hip_executable (hipInfo hipInfo.cpp) make_hip_executable (hipMemcpy hipMemcpy.cpp) +make_hip_executable (hipMemset hipMemset.cpp) make_hip_executable (hipEventRecord hipEventRecord.cpp) make_hip_executable (hipLanguageExtensions hipLanguageExtensions.cpp) make_hip_executable (hipGridLaunch hipGridLaunch.cpp) diff --git a/hipamd/tests/src/hipInfo.cpp b/hipamd/tests/src/hipInfo.cpp new file mode 100644 index 0000000000..62b6d432a0 --- /dev/null +++ b/hipamd/tests/src/hipInfo.cpp @@ -0,0 +1,80 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Test the device info API extensions for HIP: + +#include +#include +#include + +#include "test_common.h" + +#define CHECK(error) \ + if (error != hipSuccess) { \ + fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ + exit(EXIT_FAILURE);\ + } + +hipError_t test_hipDeviceGetAttribute(int deviceId, hipDeviceAttribute_t attr, int expectedValue = 0) +{ + int value = 0; + std::cout << "Test hipDeviceGetAttribute attribute " << attr; + if (expectedValue) { std::cout << " expected value " << expectedValue; } + hipError_t e = hipDeviceGetAttribute(&value, attr, deviceId); + std::cout << " actual value " << value << std::endl; + if (expectedValue && value != expectedValue) { + std::cout << "fail" << std::endl; + return hipErrorInvalidValue; + } + return hipSuccess; +} + +int main(int argc, char *argv[]) +{ + int deviceId; + CHECK (hipGetDevice(&deviceId)); + hipDeviceProp_t props; + CHECK(hipDeviceGetProperties(&props, deviceId)); + printf ("info: running on device #%d %s\n", deviceId, props.name); + + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxThreadsPerBlock, props.maxThreadsPerBlock)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxBlockDimX, props.maxThreadsDim[0])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxBlockDimY, props.maxThreadsDim[1])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxBlockDimZ, props.maxThreadsDim[2])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxGridDimX, props.maxGridSize[0])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxGridDimY, props.maxGridSize[1])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxGridDimZ, props.maxGridSize[2])); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxSharedMemoryPerBlock, props.sharedMemPerBlock)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeTotalConstantMemory, props.totalConstMem)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeWarpSize, props.warpSize)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxRegistersPerBlock, props.regsPerBlock)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeClockRate, props.clockRate)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMultiprocessorCount, props.multiProcessorCount)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeMode, props.computeMode)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeL2CacheSize, props.l2CacheSize)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxThreadsPerMultiProcessor, props.maxThreadsPerMultiProcessor)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMajor, props.major)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMinor, props.minor)); + + passed(); + +}; + diff --git a/hipamd/tests/src/hipMemset.cpp b/hipamd/tests/src/hipMemset.cpp index 63e14a5369..9769acaf25 100644 --- a/hipamd/tests/src/hipMemset.cpp +++ b/hipamd/tests/src/hipMemset.cpp @@ -31,9 +31,11 @@ int main(int argc, char *argv[]) HipTest::parseStandardArguments(argc, argv, true); + HIPCHECK(hipSetDevice(p_gpuDevice)); + size_t Nbytes = N*sizeof(char); - printf ("N=%zu memsetval=%2x\n", N, memsetval); + printf ("N=%zu memsetval=%2x device=%d\n", N, memsetval, p_gpuDevice); char *A_d; char *A_h; diff --git a/hipamd/tests/src/test_common.cpp b/hipamd/tests/src/test_common.cpp index 45d01d147b..d7a108a11b 100644 --- a/hipamd/tests/src/test_common.cpp +++ b/hipamd/tests/src/test_common.cpp @@ -27,6 +27,7 @@ char memsetval=0x42; int iterations = 1; unsigned blocksPerCU = 6; // to hide latency unsigned threadsPerBlock = 256; +int p_gpuDevice = 0; @@ -105,7 +106,12 @@ int parseStandardArguments(int argc, char *argv[], bool failOnUndefinedArg) memsetval = ex; } else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) { if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) { - failed("Bad itertions argument"); + failed("Bad iterations argument"); + } + + } else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-g"))) { + if (++i >= argc || !HipTest::parseInt(argv[i], &p_gpuDevice)) { + failed("Bad gpuDevice argument"); } } diff --git a/hipamd/tests/src/test_common.h b/hipamd/tests/src/test_common.h index 891fe381d2..fee052c1ad 100644 --- a/hipamd/tests/src/test_common.h +++ b/hipamd/tests/src/test_common.h @@ -29,12 +29,15 @@ #define HIPCHECK(error) \ - if (error != hipSuccess) { \ +{\ + hipError_t localError = error; \ + if (localError != hipSuccess) { \ printf("%serror: '%s'(%d) at %s:%d%s\n", \ - KRED,hipGetErrorString(error), error,\ + KRED,hipGetErrorString(localError), localError,\ __FILE__, __LINE__,KNRM); \ failed("API returned error code.");\ - } + }\ +} #define HIPASSERT(condition) \ if (! (condition) ) { \ @@ -49,6 +52,7 @@ extern char memsetval; extern int iterations; extern unsigned blocksPerCU; extern unsigned threadsPerBlock; +extern int p_gpuDevice; namespace HipTest {