diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bf1b0c3b8..b8cf550547 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -143,7 +143,7 @@ add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER) ############################# # Build steps ############################# -# Rebuild cmake cache updates .buildInfo and .version +# Rebuild cmake cache updates .hipInfo and .hipVersion add_custom_target(update_build_and_version_info ALL COMMAND make rebuild_cache) # Build clang hipify if enabled @@ -198,12 +198,12 @@ if(HIP_PLATFORM STREQUAL "hcc") set_source_files_properties(${SOURCE_FILES} PROPERTIES OBJECT_DEPENDS ${PROJECT_BINARY_DIR}/hcc_version.txt) add_dependencies(hip_hcc check_hcc_version update_build_and_version_info) - # Generate .buildInfo - file(WRITE "${PROJECT_BINARY_DIR}/.buildInfo" ${_buildInfo}) + # Generate .hipInfo + file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) endif() -# Generate .version -file(WRITE "${PROJECT_BINARY_DIR}/.version" ${_versionInfo}) +# Generate .hipVersion +file(WRITE "${PROJECT_BINARY_DIR}/.hipVersion" ${_versionInfo}) # Build doxygen documentation add_custom_target(doc COMMAND HIP_PATH=${CMAKE_CURRENT_SOURCE_DIR} doxygen ${CMAKE_CURRENT_SOURCE_DIR}/docs/doxygen-input/doxy.cfg @@ -219,13 +219,14 @@ if(HIP_PLATFORM STREQUAL "hcc") else() install(TARGETS hip_hcc DESTINATION lib) endif() + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_ir.ll DESTINATION lib) - # Install .buildInfo - install(FILES ${PROJECT_BINARY_DIR}/.buildInfo DESTINATION lib) + # Install .hipInfo + install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib) endif() -# Install .version -install(FILES ${PROJECT_BINARY_DIR}/.version DESTINATION bin) +# Install .hipVersion +install(FILES ${PROJECT_BINARY_DIR}/.hipVersion DESTINATION bin) # Install src, bin, include & cmake if necessary execute_process(COMMAND test ${CMAKE_INSTALL_PREFIX} -ef ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7858ef58eb..d535ccac39 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -18,9 +18,9 @@ After making HIP, don't forget the "make install" step ! ## Adding a new HIP API - - Add a translation to the bin/hipify tool ; many examples abound. + - Add a translation to the hipify-clang tool ; many examples abound. - For stat tracking purposes, place the API into an appropriate stat category ("dev", "mem", "stream", etc). - - Add a inlined NVCC implementation for the function in include/nvcc_detail/hip_runtime_api.h. + - Add a inlined NVCC implementation for the function in include/hip/nvcc_detail/hip_runtime_api.h. - These are typically headers - Add an HCC definition and Doxygen comments for the function in include/hcc_detail/hip_runtime_api.h - Source implementation typically go in src/hcc_detail/hip_hcc.cpp. The implementation may involve @@ -59,7 +59,7 @@ Selected multilib: .;@m64 The unix `date` command can print the HCC-format work-week for a specific date , ie: ``` -> date --utc +%y%W%w -d 2015-11-09 +> date --utc +%y%U%w -d 2015-11-09 15451 ``` @@ -125,60 +125,29 @@ Differences or limitations of HIP APIs as compared to CUDA APIs should be clearl is used by the GetLastError and PeekLastError functions - if a HIP API simply returns, then the error will not be logged correctly. + #### Presubmit Testing: -Before checking in or submitting a pull request, run all Rodinia tests and ensure pass results match starting point: +Before checking in or submitting a pull request, run all directed tests (see tests/README.md) and all Rodinia tests. +Ensure pass results match starting point: ```shell > cd examples/ > ./run_all.sh ``` -Recent results : -``` -hip2/examples/rodinia_3.0/hip$ make test ---TESTING: b+tree -executing: ../../test/b+tree/run0.cmd... PASSED! ---TESTING: backprop -executing: ../../test/backprop/run0.cmd... PASSED! ---TESTING: bfs -executing: ../../test/bfs/run0.cmd... PASSED! -executing: ../../test/bfs/run1.cmd... PASSED! ---TESTING: cfd -executing: ../../test/cfd/run0.cmd... PASSED! -executing: ../../test/cfd/run1.cmd... PASSED! ---TESTING: gaussian -executing: ../../test/gaussian/run0.cmd... PASSED! ---TESTING: heartwall -executing: ../../test/heartwall/run0.cmd... PASSED! ---TESTING: hotspot -executing: ../../test/hotspot/run0.cmd... PASSED! ---TESTING: kmeans -executing: ../../test/kmeans/run0.cmd... PASSED! -executing: ../../test/kmeans/run1.cmd... PASSED! -executing: ../../test/kmeans/run2.cmd... PASSED! -executing: ../../test/kmeans/run3.cmd... PASSED! ---TESTING: lavaMD -executing: ../../test/lavaMD/run0.cmd... PASSED! -executing: ../../test/lavaMD/run1.cmd... PASSED! -executing: ../../test/lavaMD/run2.cmd... PASSED! -executing: ../../test/lavaMD/run3.cmd... PASSED! -executing: ../../test/lavaMD/run4.cmd... PASSED! ---TESTING: lud -executing: ../../test/lud/run0.cmd... PASSED! ---TESTING: myocyte -executing: ../../test/myocyte/run0.cmd... PASSED! ---TESTING: nn -executing: ../../test/nn/run0.cmd... PASSED! ---TESTING: nw -executing: ../../test/nw/run0.cmd... PASSED! ---TESTING: pathfinder -executing: ../../test/pathfinder/run0.cmd... PASSED! ---TESTING: srad -executing: ../../test/srad/run0.cmd... PASSED! ---TESTING: streamcluster -executing: ../../test/streamcluster/run0.cmd... PASSED! -``` +#### Checkin messages +Follow existing best practice for writing a good Git commit message. Some tips: + http://chris.beams.io/posts/git-commit/ + https://robots.thoughtbot.com/5-useful-tips-for-a-better-commit-message + +In particular : + - Use imperative voice, ie "Fix this bug", "Refactor the XYZ routine", "Update the doc". + Not : "Fixing the bug", "Fixed the bug", "Bug fix", etc. + - Subject should summarize the commit. Do not end subject with a period. Use a blank line + after the subject. + + ## Doxygen Editing Guidelines diff --git a/bin/hipcc b/bin/hipcc index 5c991bfc25..f536937455 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -52,9 +52,9 @@ $verbose = $ENV{'HIPCC_VERBOSE'} // 0; $HIP_PATH=$ENV{'HIP_PATH'} // dirname (dirname $0); # use parent directory of hipcc #--- -# Read .buildInfo +# Read .hipInfo my %hipConfig = (); -parse_config_file("$HIP_PATH/lib/.buildInfo", \%hipConfig); +parse_config_file("$HIP_PATH/lib/.hipInfo", \%hipConfig); #--- #HIP_PLATFORM controls whether to use NVCC or HCC for compilation: @@ -119,6 +119,9 @@ if ($HIP_PLATFORM eq "hcc") { if ($ROCM_TARGET eq "hawaii") { $HIPLDFLAGS .= " -amdgpu-target=AMD:AMDGPU:7:0:1"; } + if ($ROCM_TARGET eq "polaris") { + $HIPLDFLAGS .= " -amdgpu-target=AMD:AMDGPU:8:0:3"; + } # Add trace marker library: # TODO - once we cleanly separate the HIP API headers from HIP library headers this logic should move to CMakebuild option - apps do not need to see the marker library. @@ -196,6 +199,11 @@ if($HIP_PLATFORM eq "hcc"){ } } +if(($HIP_PLATFORM eq "hcc") and defined $ENV{HIP_EXPERIMENTAL}){ + $EXPORT_LL=" "; + $ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_ir.ll\n"; +} + if($HIP_PLATFORM eq "nvcc"){ $ISACMD .= "$HIP_PATH/bin/hipcc -ptx "; if($ARGV[0] eq "--genco"){ diff --git a/bin/hipconfig b/bin/hipconfig index 4fc37944e8..663a1e14cd 100755 --- a/bin/hipconfig +++ b/bin/hipconfig @@ -1,7 +1,7 @@ #!/usr/bin/perl -w -$HIP_BASE_VERSION_MAJOR = "0"; -$HIP_BASE_VERSION_MINOR = "92"; +$HIP_BASE_VERSION_MAJOR = "1"; +$HIP_BASE_VERSION_MINOR = "0"; # Need perl > 5.10 to use logic-defined or use 5.006; use v5.10.1; @@ -86,9 +86,9 @@ if ($HIP_PLATFORM eq "nvcc") { }; #--- -# Read .version +# Read .hipVersion my %hipVersion = (); -parse_config_file("$HIP_PATH/bin/.version", \%hipVersion); +parse_config_file("$HIP_PATH/bin/.hipVersion", \%hipVersion); $HIP_VERSION_MAJOR = $hipVersion{'HIP_VERSION_MAJOR'} // $HIP_BASE_VERSION_MAJOR; $HIP_VERSION_MINOR = $hipVersion{'HIP_VERSION_MINOR'} // $HIP_BASE_VERSION_MINOR; $HIP_VERSION_PATCH = $hipVersion{'HIP_VERSION_PATCH'} // "0"; @@ -129,6 +129,7 @@ if (!$printed or $p_full) { print ("HSA_PATH : $HSA_PATH\n"); print ("HCC_HOME : $HCC_HOME\n"); system("$HCC_HOME/bin/hcc --version"); + system("$HCC_HOME/compiler/bin/llc --version"); print ("HCC-cxxflags : "); system("$HCC_HOME/bin/hcc-config --cxxflags"); print ("HCC-ldflags : "); diff --git a/docs/doxygen-input/doxy.cfg b/docs/doxygen-input/doxy.cfg old mode 100755 new mode 100644 diff --git a/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index 4c92abbba6..350ddd4e32 100644 --- a/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -2,17 +2,17 @@ | **CUDA** | **HIP** | **CUDA description** | |-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaChooseDevice` | | Select compute-device which best matches criteria. | +| `cudaChooseDevice` | `hipChooseDevice` | Select compute-device which best matches criteria. | | `cudaDeviceGetAttribute` | `hipDeviceGetAttribute` | Returns information about the device. | | `cudaDeviceGetByPCIBusId` | | Returns a handle to a compute device. | | `cudaDeviceGetCacheConfig` | `hipDeviceGetCacheConfig` | Returns the preferred cache configuration for the current device. | -| `cudaDeviceGetLimit` | | Returns resource limits. | +| `cudaDeviceGetLimit` | `hipDeviceGetLimit` | Returns resource limits. | | `cudaDeviceGetPCIBusId` | | Returns a PCI Bus Id string for the device. | | `cudaDeviceGetSharedMemConfig` | `hipDeviceGetSharedMemConfig` | Returns the shared memory configuration for the current device. | | `cudaDeviceGetStreamPriorityRange` | | Returns numerical values that correspond to the least and greatest stream priorities. | | `cudaDeviceReset` | `hipDeviceReset` | Destroy all allocations and reset all state on the current device in the current process. | | `cudaDeviceSetCacheConfig` | `hipDeviceSetCacheConfig` | Sets the preferred cache configuration for the current device. | -| `cudaDeviceSetLimit` | | Set resource limits. | +| `cudaDeviceSetLimit` | `hipDeviceSetLimit` | Set resource limits. | | `cudaDeviceSetSharedMemConfig` | `hipDeviceSetSharedMemConfig` | Sets the shared memory configuration for the current device. | | `cudaDeviceSynchronize` | `hipDeviceSynchronize` | Wait for compute device to finish. | | `cudaGetDevice` | `hipGetDevice` | Returns which device is currently being used. | diff --git a/docs/markdown/hip_kernel_language.md b/docs/markdown/hip_kernel_language.md old mode 100755 new mode 100644 diff --git a/hipify-clang/src/Cuda2Hip.cpp b/hipify-clang/src/Cuda2Hip.cpp index 789014ffc9..d2be3c242d 100644 --- a/hipify-clang/src/Cuda2Hip.cpp +++ b/hipify-clang/src/Cuda2Hip.cpp @@ -635,6 +635,17 @@ struct cuda2hipMap { cuda2hipRename["cudaSharedMemBankSizeFourByte"] = {"hipSharedMemBankSizeFourByte", CONV_DEV, API_RUNTIME}; cuda2hipRename["cudaSharedMemBankSizeEightByte"] = {"hipSharedMemBankSizeEightByte", CONV_DEV, API_RUNTIME}; + // Limits + cuda2hipRename["cudaLimit"] = {"hipLimit_t", CONV_DEV, API_RUNTIME}; + // unsupported yet + //cuda2hipRename["cudaLimitStackSize"] = {"hipLimitStackSize", CONV_DEV, API_RUNTIME}; + //cuda2hipRename["cudaLimitPrintfFifoSize"] = {"hipLimitPrintfFifoSize", CONV_DEV, API_RUNTIME}; + // unsupported yet + cuda2hipRename["cudaLimitMallocHeapSize"] = {"hipLimitMallocHeapSize", CONV_DEV, API_RUNTIME}; + //cuda2hipRename["cudaLimitDevRuntimeSyncDepth"] = {"hipLimitPrintfFifoSize", CONV_DEV, API_RUNTIME}; + //cuda2hipRename["cudaLimitDevRuntimePendingLaunchCount"] = {"hipLimitMallocHeapSize", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaDeviceGetLimit"] = {"hipDeviceGetLimit", CONV_DEV, API_RUNTIME}; + // Profiler // unsupported yet //cuda2hipRename["cudaProfilerInitialize"] = {"hipProfilerInitialize", CONV_OTHER, API_RUNTIME}; diff --git a/include/hip/hcc.h b/include/hip/hcc.h index efb5197cca..1542d5b4f2 100644 --- a/include/hip/hcc.h +++ b/include/hip/hcc.h @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #ifndef HCC_H #define HCC_H diff --git a/include/hip/hcc_detail/hcc_acc.h b/include/hip/hcc_detail/hcc_acc.h index 371a0a23a4..c36acc52f5 100644 --- a/include/hip/hcc_detail/hcc_acc.h +++ b/include/hip/hcc_detail/hcc_acc.h @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #ifndef HCC_ACC_H #define HCC_ACC_H #include "hip/hip_runtime_api.h" diff --git a/include/hip/hcc_detail/hip_blas.h b/include/hip/hcc_detail/hip_blas.h deleted file mode 100644 index 07f41ec71b..0000000000 --- a/include/hip/hcc_detail/hip_blas.h +++ /dev/null @@ -1,258 +0,0 @@ -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#pragma once - -#include -#include - -//HGSOS for Kalmar leave it as C++, only cublas needs C linkage. - -#ifdef __cplusplus -extern "C" { -#endif - -typedef hcblasHandle_t* hipblasHandle_t; -typedef hcComplex hipComplex ; - -static hipblasHandle_t dummyGlobal; - -/* Unsupported types - "cublasFillMode_t", - "cublasDiagType_t", - "cublasSideMode_t", - "cublasPointerMode_t", - "cublasAtomicsMode_t", - "cublasDataType_t" -*/ - -inline static hcblasOperation_t hipOperationToHCCOperation( hipblasOperation_t op) -{ - switch (op) - { - case HIPBLAS_OP_N: - return HCBLAS_OP_N; - - case HIPBLAS_OP_T: - return HCBLAS_OP_T; - - case HIPBLAS_OP_C: - return HCBLAS_OP_C; - - default: - throw "Non existent OP"; - } -} - -inline static hipblasOperation_t HCCOperationToHIPOperation( hcblasOperation_t op) -{ - switch (op) - { - case HCBLAS_OP_N : - return HIPBLAS_OP_N; - - case HCBLAS_OP_T : - return HIPBLAS_OP_T; - - case HCBLAS_OP_C : - return HIPBLAS_OP_C; - - default: - throw "Non existent OP"; - } -} - - -inline static hipblasStatus_t hipHCBLASStatusToHIPStatus(hcblasStatus_t hcStatus) -{ - switch(hcStatus) - { - case HCBLAS_STATUS_SUCCESS: - return HIPBLAS_STATUS_SUCCESS; - case HCBLAS_STATUS_NOT_INITIALIZED: - return HIPBLAS_STATUS_NOT_INITIALIZED; - case HCBLAS_STATUS_ALLOC_FAILED: - return HIPBLAS_STATUS_ALLOC_FAILED; - case HCBLAS_STATUS_INVALID_VALUE: - return HIPBLAS_STATUS_INVALID_VALUE; - case HCBLAS_STATUS_MAPPING_ERROR: - return HIPBLAS_STATUS_MAPPING_ERROR; - case HCBLAS_STATUS_EXECUTION_FAILED: - return HIPBLAS_STATUS_EXECUTION_FAILED; - case HCBLAS_STATUS_INTERNAL_ERROR: - return HIPBLAS_STATUS_INTERNAL_ERROR; - default: - throw "Unimplemented status"; - } -} - - - -inline static hipblasStatus_t hipblasCreate(hipblasHandle_t* handle) { - hipblasStatus_t retVal = hipHCBLASStatusToHIPStatus(hcblasCreate(*handle)); - dummyGlobal = *handle; - return retVal; - -} - -inline static hipblasStatus_t hipblasDestroy(hipblasHandle_t& handle) { - return hipHCBLASStatusToHIPStatus(hcblasDestroy(handle)); -} - -//note: no handle -inline static hipblasStatus_t hipblasSetVector(int n, int elemSize, const void *x, int incx, void *y, int incy){ - return hipHCBLASStatusToHIPStatus(hcblasSetVector(dummyGlobal, n, elemSize, x, incx, y, incy)); //HGSOS no need for handle moving forward -} - -//note: no handle -inline static hipblasStatus_t hipblasGetVector(int n, int elemSize, const void *x, int incx, void *y, int incy){ - return hipHCBLASStatusToHIPStatus(hcblasGetVector(dummyGlobal, n, elemSize, x, incx, y, incy)); //HGSOS no need for handle -} - -//note: no handle -inline static hipblasStatus_t hipblasSetMatrix(int rows, int cols, int elemSize, const void *A, int lda, void *B, int ldb){ - return hipHCBLASStatusToHIPStatus(hcblasSetMatrix(dummyGlobal, rows, cols, elemSize, A, lda, B, ldb)); -} - -//note: no handle -inline static hipblasStatus_t hipblasGetMatrix(int rows, int cols, int elemSize, const void *A, int lda, void *B, int ldb){ - return hipHCBLASStatusToHIPStatus(hcblasGetMatrix(dummyGlobal, rows, cols, elemSize, A, lda, B, ldb)); -} - -inline static hipblasStatus_t hipblasSasum(hipblasHandle_t handle, int n, float *x, int incx, float *result){ - return hipHCBLASStatusToHIPStatus(hcblasSasum(handle, n, x, incx, result)); -} - -inline static hipblasStatus_t hipblasDasum(hipblasHandle_t handle, int n, double *x, int incx, double *result){ - return hipHCBLASStatusToHIPStatus(hcblasDasum(handle, n, x, incx, result)); -} - -inline static hipblasStatus_t hipblasSasumBatched(hipblasHandle_t handle, int n, float *x, int incx, float *result, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasSasumBatched( handle, n, x, incx, result, batchCount)); -} - -inline static hipblasStatus_t hipblasDasumBatched(hipblasHandle_t handle, int n, double *x, int incx, double *result, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasDasumBatched(handle, n, x, incx, result, batchCount)); -} - -inline static hipblasStatus_t hipblasSaxpy(hipblasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy) { - return hipHCBLASStatusToHIPStatus(hcblasSaxpy(handle, n, alpha, x, incx, y, incy)); -} - -inline static hipblasStatus_t hipblasSaxpyBatched(hipblasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasSaxpyBatched(handle, n, alpha, x, incx, y, incy, batchCount)); -} - -inline static hipblasStatus_t hipblasScopy(hipblasHandle_t handle, int n, const float *x, int incx, float *y, int incy){ - return hipHCBLASStatusToHIPStatus(hcblasScopy( handle, n, x, incx, y, incy)); -} - -inline static hipblasStatus_t hipblasDcopy(hipblasHandle_t handle, int n, const double *x, int incx, double *y, int incy){ - return hipHCBLASStatusToHIPStatus(hcblasDcopy( handle, n, x, incx, y, incy)); -} - -inline static hipblasStatus_t hipblasScopyBatched(hipblasHandle_t handle, int n, const float *x, int incx, float *y, int incy, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasScopyBatched( handle, n, x, incx, y, incy, batchCount)); -} - -inline static hipblasStatus_t hipblasDcopyBatched(hipblasHandle_t handle, int n, const double *x, int incx, double *y, int incy, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasDcopyBatched( handle, n, x, incx, y, incy, batchCount)); -} - -inline static hipblasStatus_t hipblasSdot (hipblasHandle_t handle, int n, const float *x, int incx, const float *y, int incy, float *result){ - return hipHCBLASStatusToHIPStatus(hcblasSdot(handle, n, x, incx, y, incy, result)); -} - -inline static hipblasStatus_t hipblasDdot (hipblasHandle_t handle, int n, const double *x, int incx, const double *y, int incy, double *result){ - return hipHCBLASStatusToHIPStatus(hcblasDdot(handle, n, x, incx, y, incy, result)); -} - -inline static hipblasStatus_t hipblasSdotBatched (hipblasHandle_t handle, int n, const float *x, int incx, const float *y, int incy, float *result, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasSdotBatched(handle, n, x, incx, y, incy, result, batchCount)); -} - -inline static hipblasStatus_t hipblasDdotBatched (hipblasHandle_t handle, int n, const double *x, int incx, const double *y, int incy, double *result, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasDdotBatched ( handle, n, x, incx, y, incy, result, batchCount)); -} - -inline static hipblasStatus_t hipblasSscal(hipblasHandle_t handle, int n, const float *alpha, float *x, int incx){ - return hipHCBLASStatusToHIPStatus(hcblasSscal(handle, n, alpha, x, incx)); -} - -inline static hipblasStatus_t hipblasDscal(hipblasHandle_t handle, int n, const double *alpha, double *x, int incx){ - return hipHCBLASStatusToHIPStatus(hcblasDscal(handle, n, alpha, x, incx)); -} - -inline static hipblasStatus_t hipblasSscalBatched(hipblasHandle_t handle, int n, const float *alpha, float *x, int incx, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasSscalBatched(handle, n, alpha, x, incx, batchCount)); -} - -inline static hipblasStatus_t hipblasDscalBatched(hipblasHandle_t handle, int n, const double *alpha, double *x, int incx, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasDscalBatched(handle, n, alpha, x, incx, batchCount)); -} - -inline static hipblasStatus_t hipblasSgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, float *A, int lda, - float *x, int incx, const float *beta, float *y, int incy){ - return hipHCBLASStatusToHIPStatus(hcblasSgemv(handle, hipOperationToHCCOperation(trans), m, n, alpha, A, lda, x, incx, beta, y, incy)); -} - -inline static hipblasStatus_t hipblasSgemvBatched(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, float *A, int lda, - float *x, int incx, const float *beta, float *y, int incy, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasSgemvBatched(handle, hipOperationToHCCOperation(trans), m, n, alpha, A, lda, x, incx, beta, y, incy, batchCount)); -} - -inline static hipblasStatus_t hipblasSger(hipblasHandle_t handle, int m, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *A, int lda){ - return hipHCBLASStatusToHIPStatus(hcblasSger(handle, m, n, alpha, x, incx, y, incy, A, lda)); -} - -inline static hipblasStatus_t hipblasSgerBatched(hipblasHandle_t handle, int m, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *A, int lda, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasSgerBatched(handle, m, n, alpha, x, incx, y, incy, A, lda, batchCount)); -} - -inline static hipblasStatus_t hipblasSgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, - int m, int n, int k, const float *alpha, float *A, int lda, float *B, int ldb, const float *beta, float *C, int ldc){ - return hipHCBLASStatusToHIPStatus(hcblasSgemm( handle, hipOperationToHCCOperation(transa), hipOperationToHCCOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)); -} - -inline static hipblasStatus_t hipblasCgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, - int m, int n, int k, const hipComplex *alpha, hipComplex *A, int lda, hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc){ - return hipHCBLASStatusToHIPStatus(hcblasCgemm( handle, hipOperationToHCCOperation(transa), hipOperationToHCCOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)); -} - -inline static hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, - int m, int n, int k, const float *alpha, float *A, int lda, float *B, int ldb, const float *beta, float *C, int ldc, int batchCount){ - return hipHCBLASStatusToHIPStatus(hcblasSgemmBatched( handle, hipOperationToHCCOperation(transa), hipOperationToHCCOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount)); -} - -inline static hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, - int m, int n, int k, const hipComplex *alpha, hipComplex *A, int lda, hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc, int batchCount){ - return HIPBLAS_STATUS_NOT_SUPPORTED; - //return hipHCBLASStatusToHIPStatus(hcblasCgemmBatched( handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount)); -} - - - - -#ifdef __cplusplus -} -#endif - - diff --git a/include/hip/hcc_detail/hipComplex.h b/include/hip/hcc_detail/hip_complex.h similarity index 91% rename from include/hip/hcc_detail/hipComplex.h rename to include/hip/hcc_detail/hip_complex.h index 910cee946d..21995de096 100644 --- a/include/hip/hcc_detail/hipComplex.h +++ b/include/hip/hcc_detail/hip_complex.h @@ -1,23 +1,25 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #ifndef HIPCOMPLEX_H #define HIPCOMPLEX_H diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h index 9c7b3a6646..bcf2605f28 100644 --- a/include/hip/hcc_detail/hip_fp16.h +++ b/include/hip/hcc_detail/hip_fp16.h @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @@ -34,13 +37,6 @@ typedef struct __attribute__((aligned(4))){ typedef __half half; typedef __half2 half2; -typedef struct{ - union{ - float f; - unsigned u; - }; -} struct_float; - /* Arithmetic functions */ diff --git a/include/hip/hcc_detail/hip_hcc.h b/include/hip/hcc_detail/hip_hcc.h index f3967247a6..9e7499e4ac 100644 --- a/include/hip/hcc_detail/hip_hcc.h +++ b/include/hip/hcc_detail/hip_hcc.h @@ -1,19 +1,22 @@ /* -Link errors represented as this:Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @@ -29,7 +32,7 @@ THE SOFTWARE. #error("This version of HIP requires a newer version of HCC."); #endif -// #define USE_MEMCPYTOSYMBOL +#define USE_DISPATCH_HSA_KERNEL 0 // diff --git a/include/hip/hcc_detail/hip_ldg.h b/include/hip/hcc_detail/hip_ldg.h index 6fcb9d9df4..7dd6451749 100644 --- a/include/hip/hcc_detail/hip_ldg.h +++ b/include/hip/hcc_detail/hip_ldg.h @@ -1,13 +1,16 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h old mode 100755 new mode 100644 index 47b75f282d..1bda07eb7d --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + /** * @file hcc_detail/hip_runtime.h * @brief Contains definitions of APIs for HIP runtime. @@ -533,7 +534,8 @@ __device__ float __dsqrt_rz(double x); * * @warning __threadfence_block is a stub and map to no-op. */ -__device__ void __threadfence_block(void); +// __device__ void __threadfence_block(void); +extern "C" __device__ void __threadfence_block(void); /** * @brief threadfence makes wirtes visible to other threads running on same GPU. @@ -544,7 +546,8 @@ __device__ void __threadfence_block(void); * * @warning __threadfence is a stub and map to no-op, application should set "export HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches. */ -__device__ void __threadfence(void) __attribute__((deprecated("Provided for compile-time compatibility, not yet functional"))); +// __device__ void __threadfence(void) __attribute__((deprecated("Provided for compile-time compatibility, not yet functional"))); +extern "C" __device__ void __threadfence(void); /** * @brief threadfence_system makes writes to pinned system memory visible on host CPU. @@ -583,55 +586,42 @@ __device__ void __threadfence_system(void) __attribute__((deprecated("Provided // loop unrolling __device__ static inline void* memcpy(void* dst, void* src, size_t size) { - uint64_t i = 0; - uint64_t totalLength = size/sizeof(uint32_t); - for(i=hipThreadIdx_x+hipBlockIdx_x*hipBlockDim_x; - i<(totalLength/4); - i = i + hipBlockDim_x * hipGridDim_x) - { - ((uint32_t*)dst)[4*i] = ((uint32_t*)src)[4*i]; - ((uint32_t*)dst)[4*i+1] = ((uint32_t*)src)[4*i+1]; - ((uint32_t*)dst)[4*i+2] = ((uint32_t*)src)[4*i+2]; - ((uint32_t*)dst)[4*i+3] = ((uint32_t*)src)[4*i+3]; - } - if(4*i < totalLength){ - ((uint32_t*)dst)[4*i] = ((uint32_t*)src)[4*i]; - ((uint32_t*)dst)[4*i+1] = ((uint32_t*)src)[4*i+1]; - ((uint32_t*)dst)[4*i+2] = ((uint32_t*)src)[4*i+2]; - ((uint32_t*)dst)[4*i+3] = ((uint32_t*)src)[4*i+3]; - + uint8_t *dstPtr, *srcPtr; + dstPtr = (uint8_t*)dst; + srcPtr = (uint8_t*)src; + for(uint32_t i=0;i - -using namespace hc::short_vector; - - -//-- Signed -// Define char vector types -typedef hc::short_vector::char1 char1; -typedef hc::short_vector::char2 char2; -typedef hc::short_vector::char3 char3; -typedef hc::short_vector::char4 char4; - -// Define short vector types -typedef hc::short_vector::short1 short1; -typedef hc::short_vector::short2 short2; -typedef hc::short_vector::short3 short3; -typedef hc::short_vector::short4 short4; - -// Define int vector types -typedef hc::short_vector::int1 int1; -typedef hc::short_vector::int2 int2; -typedef hc::short_vector::int3 int3; -typedef hc::short_vector::int4 int4; - -// Define long vector types -typedef hc::short_vector::long1 long1; -typedef hc::short_vector::long2 long2; -typedef hc::short_vector::long3 long3; -typedef hc::short_vector::long4 long4; - -// Define longlong vector types -typedef hc::short_vector::longlong1 longlong1; -typedef hc::short_vector::longlong2 longlong2; -typedef hc::short_vector::longlong3 longlong3; -typedef hc::short_vector::longlong4 longlong4; - - -//-- Unsigned -// Define uchar vector types -typedef hc::short_vector::uchar1 uchar1; -typedef hc::short_vector::uchar2 uchar2; -typedef hc::short_vector::uchar3 uchar3; -typedef hc::short_vector::uchar4 uchar4; - -// Define ushort vector types -typedef hc::short_vector::ushort1 ushort1; -typedef hc::short_vector::ushort2 ushort2; -typedef hc::short_vector::ushort3 ushort3; -typedef hc::short_vector::ushort4 ushort4; - -// Define uint vector types -typedef hc::short_vector::uint1 uint1; -typedef hc::short_vector::uint2 uint2; -typedef hc::short_vector::uint3 uint3; -typedef hc::short_vector::uint4 uint4; - -// Define ulong vector types -typedef hc::short_vector::ulong1 ulong1; -typedef hc::short_vector::ulong2 ulong2; -typedef hc::short_vector::ulong3 ulong3; -typedef hc::short_vector::ulong4 ulong4; - -// Define ulonglong vector types -typedef hc::short_vector::ulonglong1 ulonglong1; -typedef hc::short_vector::ulonglong2 ulonglong2; -typedef hc::short_vector::ulonglong3 ulonglong3; -typedef hc::short_vector::ulonglong4 ulonglong4; - - -//-- Floating point -// Define float vector types -typedef hc::short_vector::float1 float1; -typedef hc::short_vector::float2 float2; -typedef hc::short_vector::float3 float3; -typedef hc::short_vector::float4 float4; - -// Define double vector types -typedef hc::short_vector::double1 double1; -typedef hc::short_vector::double2 double2; -typedef hc::short_vector::double3 double3; -typedef hc::short_vector::double4 double4; - -#else - -#define __hip_align(name, val, data) \ - __attribute__((aligned(val))) name \ - { data } - -struct __hip_align(char1, 1, signed char x;); -struct __hip_align(uchar1, 1, unsigned char x;); - -struct __hip_align(char2, 2, signed char x; signed char y;); -struct __hip_align(uchar2, 2, unsigned char x; unsigned char y;); - -struct char3 -{ - signed char x, y, z; -}; - -struct uchar3 -{ - unsigned char x, y, z; -}; - -struct __hip_align(char4, 4, signed char x; signed char y; signed char z; signed char w;); -struct __hip_align(uchar4, 4, unsigned char x; unsigned char y; unsigned char z; unsigned char w;); - -struct __hip_align(short1, 2, signed short x;); -struct __hip_align(ushort1, 2, unsigned short x;); - -struct __hip_align(short2, 4, signed short x; signed short y;); -struct __hip_align(ushort2, 4, unsigned short x; unsigned short y;); - -struct short3 -{ - signed short x, y, z; -}; - -struct ushort3 -{ - unsigned short x, y, z; -}; - -struct __hip_align(short4, 8, signed short x; signed short y; signed short z; signed short w;); -struct __hip_align(ushort4, 8, unsigned short x; unsigned short y; unsigned short z; unsigned short w;); - -struct __hip_align(int1, 4, signed int x;); -struct __hip_align(uint1, 4, unsigned int x;); - -struct __hip_align(int2, 8, signed int x; signed int y;); -struct __hip_align(uint2, 8, unsigned int x; unsigned int y;); - -struct int3{ - signed int x, y, z; -}; -struct uint3{ - unsigned int x, y, z; -}; - -struct __hip_align(int4, 16, signed int x; signed int y; signed int z; signed int w;); -struct __hip_align(uint4, 16, unsigned int x; unsigned int y; unsigned int z; unsigned int w;); - -struct __hip_align(long1, 8, long int x;); -struct __hip_align(ulong1, 8, unsigned long x;); - -struct __hip_align(long2, 16, long int x; long int y;); -struct __hip_align(ulong2, 16, unsigned long x; unsigned long y;); - -struct long3{ - long int x, y, z; -}; -struct ulong3{ - unsigned long x, y, z; -}; - -struct __hip_align(long4, 32, long int x; long int y; long int z; long int w;); -struct __hip_align(ulong4, 32, unsigned long x; unsigned long y; unsigned long z; unsigned long w;); - -struct float1 -{ - float x; -}; - -struct __hip_align(float2, 8, float x; float y;); - -struct float3 -{ - float x, y, z; -}; - -struct __hip_align(float4, 16, float x; float y; float z; float w;); - -struct __hip_align(longlong1, 16, long long int x;); -struct __hip_align(ulonglong1, 16, unsigned long long int x;); - -struct __attribute__((aligned(32))) longlong2 -{ - long long int x, y; -}; - -struct __attribute__((aligned(32))) ulonglong2 -{ - unsigned long long int x, y; -}; - -struct longlong3 -{ - long long int x, y, z; -}; - -struct ulonglong3 -{ - unsigned long long int x, y, z; -}; - -struct __attribute__((aligned(64))) longlong4 -{ - long long int x, y, z, w; -}; - -struct __attribute__((aligned(64))) ulonglong4 -{ - unsigned long long int x, y, z, w; -}; - -struct double1 -{ - double x; -}; - -struct __attribute__((aligned(16))) double2 -{ - double x, y; -}; - -struct double3 -{ - double x, y, z; -}; - -struct __attribute__((aligned(32))) double4 -{ - double x, y, z, w; -}; - -#endif - -#if __HCC__ -#include"hip/hcc_detail/host_defines.h" -#define __HIP_DEVICE__ __device__ __host__ -#else -#define __HIP_DEVICE__ -#endif - -__HIP_DEVICE__ char1 make_char1(signed char ); -__HIP_DEVICE__ char2 make_char2(signed char, signed char ); -__HIP_DEVICE__ char3 make_char3(signed char, signed char, signed char ); -__HIP_DEVICE__ char4 make_char4(signed char, signed char, signed char, signed char ); - -__HIP_DEVICE__ short1 make_short1(short ); -__HIP_DEVICE__ short2 make_short2(short, short ); -__HIP_DEVICE__ short3 make_short3(short, short, short ); -__HIP_DEVICE__ short4 make_short4(short, short, short, short ); - -__HIP_DEVICE__ int1 make_int1(int ); -__HIP_DEVICE__ int2 make_int2(int, int ); -__HIP_DEVICE__ int3 make_int3(int, int, int ); -__HIP_DEVICE__ int4 make_int4(int, int, int, int ); - -__HIP_DEVICE__ long1 make_long1(long ); -__HIP_DEVICE__ long2 make_long2(long, long ); -__HIP_DEVICE__ long3 make_long3(long, long, long ); -__HIP_DEVICE__ long4 make_long4(long, long, long, long ); - -__HIP_DEVICE__ longlong1 make_longlong1(long long ); -__HIP_DEVICE__ longlong2 make_longlong2(long long, long long ); -__HIP_DEVICE__ longlong3 make_longlong3(long long, long long, long long ); -__HIP_DEVICE__ longlong4 make_longlong4(long long, long long, long long, long long ); - -__HIP_DEVICE__ uchar1 make_uchar1(unsigned char ); -__HIP_DEVICE__ uchar2 make_uchar2(unsigned char, unsigned char ); -__HIP_DEVICE__ uchar3 make_uchar3(unsigned char, unsigned char, unsigned char ); -__HIP_DEVICE__ uchar4 make_uchar4(unsigned char, unsigned char, unsigned char, unsigned char ); - -__HIP_DEVICE__ ushort1 make_ushort1(unsigned short ); -__HIP_DEVICE__ ushort2 make_ushort2(unsigned short, unsigned short ); -__HIP_DEVICE__ ushort3 make_ushort3(unsigned short, unsigned short, unsigned short ); -__HIP_DEVICE__ ushort4 make_ushort4(unsigned short, unsigned short, unsigned short, unsigned short ); - -__HIP_DEVICE__ uint1 make_uint1(unsigned int ); -__HIP_DEVICE__ uint2 make_uint2(unsigned int, unsigned int ); -__HIP_DEVICE__ uint3 make_uint3(unsigned int, unsigned int, unsigned int ); -__HIP_DEVICE__ uint4 make_uint4(unsigned int, unsigned int, unsigned int, unsigned int ); - -__HIP_DEVICE__ ulong1 make_ulong1(unsigned long ); -__HIP_DEVICE__ ulong2 make_ulong2(unsigned long, unsigned long ); -__HIP_DEVICE__ ulong3 make_ulong3(unsigned long, unsigned long, unsigned long ); -__HIP_DEVICE__ ulong4 make_ulong4(unsigned long, unsigned long, unsigned long, unsigned long ); - -__HIP_DEVICE__ ulonglong1 make_ulonglong1(unsigned long long ); -__HIP_DEVICE__ ulonglong2 make_ulonglong2(unsigned long long, unsigned long long); -__HIP_DEVICE__ ulonglong3 make_ulonglong3(unsigned long long, unsigned long long, unsigned long long); -__HIP_DEVICE__ ulonglong4 make_ulonglong4(unsigned long long, unsigned long long, unsigned long long, unsigned long long ); - -__HIP_DEVICE__ float1 make_float1(float ); -__HIP_DEVICE__ float2 make_float2(float, float ); -__HIP_DEVICE__ float3 make_float3(float, float, float ); -__HIP_DEVICE__ float4 make_float4(float, float, float, float ); - -__HIP_DEVICE__ double1 make_double1(double ); -__HIP_DEVICE__ double2 make_double2(double, double ); -__HIP_DEVICE__ double3 make_double3(double, double, double ); -__HIP_DEVICE__ double4 make_double4(double, double, double, double ); - -/* -///--- -// Inline functions for creating vector types from basic types -#define ONE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x) { VT t; t.x = x; return t; }; -#define TWO_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y) { VT t; t.x=x; t.y=y; return t; }; -#define THREE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y, T z) { VT t; t.x=x; t.y=y; t.z=z; return t; }; -#define FOUR_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y, T z, T w) { VT t; t.x=x; t.y=y; t.z=z; t.w=w; return t; }; - - -//signed: -ONE_COMPONENT_ACCESS (signed char, char1); -TWO_COMPONENT_ACCESS (signed char, char2); -THREE_COMPONENT_ACCESS(signed char, char3); -FOUR_COMPONENT_ACCESS (signed char, char4); - -ONE_COMPONENT_ACCESS (short, short1); -TWO_COMPONENT_ACCESS (short, short2); -THREE_COMPONENT_ACCESS(short, short3); -FOUR_COMPONENT_ACCESS (short, short4); - -ONE_COMPONENT_ACCESS (int, int1); -TWO_COMPONENT_ACCESS (int, int2); -THREE_COMPONENT_ACCESS(int, int3); -FOUR_COMPONENT_ACCESS (int, int4); - -ONE_COMPONENT_ACCESS (long int, long1); -TWO_COMPONENT_ACCESS (long int, long2); -THREE_COMPONENT_ACCESS(long int, long3); -FOUR_COMPONENT_ACCESS (long int, long4); - -ONE_COMPONENT_ACCESS (long long int, ulong1); -TWO_COMPONENT_ACCESS (long long int, ulong2); -THREE_COMPONENT_ACCESS(long long int, ulong3); -FOUR_COMPONENT_ACCESS (long long int, ulong4); - -ONE_COMPONENT_ACCESS (long long int, longlong1); -TWO_COMPONENT_ACCESS (long long int, longlong2); -THREE_COMPONENT_ACCESS(long long int, longlong3); -FOUR_COMPONENT_ACCESS (long long int, longlong4); - - -// unsigned: -ONE_COMPONENT_ACCESS (unsigned char, uchar1); -TWO_COMPONENT_ACCESS (unsigned char, uchar2); -THREE_COMPONENT_ACCESS(unsigned char, uchar3); -FOUR_COMPONENT_ACCESS (unsigned char, uchar4); - -ONE_COMPONENT_ACCESS (unsigned short, ushort1); -TWO_COMPONENT_ACCESS (unsigned short, ushort2); -THREE_COMPONENT_ACCESS(unsigned short, ushort3); -FOUR_COMPONENT_ACCESS (unsigned short, ushort4); - -ONE_COMPONENT_ACCESS (unsigned int, uint1); -TWO_COMPONENT_ACCESS (unsigned int, uint2); -THREE_COMPONENT_ACCESS(unsigned int, uint3); -FOUR_COMPONENT_ACCESS (unsigned int, uint4); - -ONE_COMPONENT_ACCESS (unsigned long int, ulong1); -TWO_COMPONENT_ACCESS (unsigned long int, ulong2); -THREE_COMPONENT_ACCESS(unsigned long int, ulong3); -FOUR_COMPONENT_ACCESS (unsigned long int, ulong4); - -ONE_COMPONENT_ACCESS (unsigned long long int, ulong1); -TWO_COMPONENT_ACCESS (unsigned long long int, ulong2); -THREE_COMPONENT_ACCESS(unsigned long long int, ulong3); -FOUR_COMPONENT_ACCESS (unsigned long long int, ulong4); - -ONE_COMPONENT_ACCESS (unsigned long long int, ulonglong1); -TWO_COMPONENT_ACCESS (unsigned long long int, ulonglong2); -THREE_COMPONENT_ACCESS(unsigned long long int, ulonglong3); -FOUR_COMPONENT_ACCESS (unsigned long long int, ulonglong4); - - -//Floating point -ONE_COMPONENT_ACCESS (float, float1); -TWO_COMPONENT_ACCESS (float, float2); -THREE_COMPONENT_ACCESS(float, float3); -FOUR_COMPONENT_ACCESS (float, float4); - -ONE_COMPONENT_ACCESS (double, double1); -TWO_COMPONENT_ACCESS (double, double2); -THREE_COMPONENT_ACCESS(double, double3); -FOUR_COMPONENT_ACCESS (double, double4); -*/ - -#endif - +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/hip_vector_types.h + * @brief Defines the different newt vector types for HIP runtime. + */ + +#ifndef HIP_VECTOR_TYPES_H +#define HIP_VECTOR_TYPES_H + +#if defined (__HCC__) && (__hcc_workweek__ < 16032) +#error("This version of HIP requires a newer version of HCC."); +#endif + +#if __HCC__ +#include + +using namespace hc::short_vector; + + +//-- Signed +// Define char vector types +typedef hc::short_vector::char1 char1; +typedef hc::short_vector::char2 char2; +typedef hc::short_vector::char3 char3; +typedef hc::short_vector::char4 char4; + +// Define short vector types +typedef hc::short_vector::short1 short1; +typedef hc::short_vector::short2 short2; +typedef hc::short_vector::short3 short3; +typedef hc::short_vector::short4 short4; + +// Define int vector types +typedef hc::short_vector::int1 int1; +typedef hc::short_vector::int2 int2; +typedef hc::short_vector::int3 int3; +typedef hc::short_vector::int4 int4; + +// Define long vector types +typedef hc::short_vector::long1 long1; +typedef hc::short_vector::long2 long2; +typedef hc::short_vector::long3 long3; +typedef hc::short_vector::long4 long4; + +// Define longlong vector types +typedef hc::short_vector::longlong1 longlong1; +typedef hc::short_vector::longlong2 longlong2; +typedef hc::short_vector::longlong3 longlong3; +typedef hc::short_vector::longlong4 longlong4; + + +//-- Unsigned +// Define uchar vector types +typedef hc::short_vector::uchar1 uchar1; +typedef hc::short_vector::uchar2 uchar2; +typedef hc::short_vector::uchar3 uchar3; +typedef hc::short_vector::uchar4 uchar4; + +// Define ushort vector types +typedef hc::short_vector::ushort1 ushort1; +typedef hc::short_vector::ushort2 ushort2; +typedef hc::short_vector::ushort3 ushort3; +typedef hc::short_vector::ushort4 ushort4; + +// Define uint vector types +typedef hc::short_vector::uint1 uint1; +typedef hc::short_vector::uint2 uint2; +typedef hc::short_vector::uint3 uint3; +typedef hc::short_vector::uint4 uint4; + +// Define ulong vector types +typedef hc::short_vector::ulong1 ulong1; +typedef hc::short_vector::ulong2 ulong2; +typedef hc::short_vector::ulong3 ulong3; +typedef hc::short_vector::ulong4 ulong4; + +// Define ulonglong vector types +typedef hc::short_vector::ulonglong1 ulonglong1; +typedef hc::short_vector::ulonglong2 ulonglong2; +typedef hc::short_vector::ulonglong3 ulonglong3; +typedef hc::short_vector::ulonglong4 ulonglong4; + + +//-- Floating point +// Define float vector types +typedef hc::short_vector::float1 float1; +typedef hc::short_vector::float2 float2; +typedef hc::short_vector::float3 float3; +typedef hc::short_vector::float4 float4; + +// Define double vector types +typedef hc::short_vector::double1 double1; +typedef hc::short_vector::double2 double2; +typedef hc::short_vector::double3 double3; +typedef hc::short_vector::double4 double4; + +#else + +#define __hip_align(name, val, data) \ + __attribute__((aligned(val))) name \ + { data } + +struct __hip_align(char1, 1, signed char x;); +struct __hip_align(uchar1, 1, unsigned char x;); + +struct __hip_align(char2, 2, signed char x; signed char y;); +struct __hip_align(uchar2, 2, unsigned char x; unsigned char y;); + +struct char3 +{ + signed char x, y, z; +}; + +struct uchar3 +{ + unsigned char x, y, z; +}; + +struct __hip_align(char4, 4, signed char x; signed char y; signed char z; signed char w;); +struct __hip_align(uchar4, 4, unsigned char x; unsigned char y; unsigned char z; unsigned char w;); + +struct __hip_align(short1, 2, signed short x;); +struct __hip_align(ushort1, 2, unsigned short x;); + +struct __hip_align(short2, 4, signed short x; signed short y;); +struct __hip_align(ushort2, 4, unsigned short x; unsigned short y;); + +struct short3 +{ + signed short x, y, z; +}; + +struct ushort3 +{ + unsigned short x, y, z; +}; + +struct __hip_align(short4, 8, signed short x; signed short y; signed short z; signed short w;); +struct __hip_align(ushort4, 8, unsigned short x; unsigned short y; unsigned short z; unsigned short w;); + +struct __hip_align(int1, 4, signed int x;); +struct __hip_align(uint1, 4, unsigned int x;); + +struct __hip_align(int2, 8, signed int x; signed int y;); +struct __hip_align(uint2, 8, unsigned int x; unsigned int y;); + +struct int3{ + signed int x, y, z; +}; +struct uint3{ + unsigned int x, y, z; +}; + +struct __hip_align(int4, 16, signed int x; signed int y; signed int z; signed int w;); +struct __hip_align(uint4, 16, unsigned int x; unsigned int y; unsigned int z; unsigned int w;); + +struct __hip_align(long1, 8, long int x;); +struct __hip_align(ulong1, 8, unsigned long x;); + +struct __hip_align(long2, 16, long int x; long int y;); +struct __hip_align(ulong2, 16, unsigned long x; unsigned long y;); + +struct long3{ + long int x, y, z; +}; +struct ulong3{ + unsigned long x, y, z; +}; + +struct __hip_align(long4, 32, long int x; long int y; long int z; long int w;); +struct __hip_align(ulong4, 32, unsigned long x; unsigned long y; unsigned long z; unsigned long w;); + +struct float1 +{ + float x; +}; + +struct __hip_align(float2, 8, float x; float y;); + +struct float3 +{ + float x, y, z; +}; + +struct __hip_align(float4, 16, float x; float y; float z; float w;); + +struct __hip_align(longlong1, 16, long long int x;); +struct __hip_align(ulonglong1, 16, unsigned long long int x;); + +struct __attribute__((aligned(32))) longlong2 +{ + long long int x, y; +}; + +struct __attribute__((aligned(32))) ulonglong2 +{ + unsigned long long int x, y; +}; + +struct longlong3 +{ + long long int x, y, z; +}; + +struct ulonglong3 +{ + unsigned long long int x, y, z; +}; + +struct __attribute__((aligned(64))) longlong4 +{ + long long int x, y, z, w; +}; + +struct __attribute__((aligned(64))) ulonglong4 +{ + unsigned long long int x, y, z, w; +}; + +struct double1 +{ + double x; +}; + +struct __attribute__((aligned(16))) double2 +{ + double x, y; +}; + +struct double3 +{ + double x, y, z; +}; + +struct __attribute__((aligned(32))) double4 +{ + double x, y, z, w; +}; + +#endif + +#if __HCC__ +#include"hip/hcc_detail/host_defines.h" +#define __HIP_DEVICE__ __device__ __host__ +#else +#define __HIP_DEVICE__ +#endif + +__HIP_DEVICE__ char1 make_char1(signed char ); +__HIP_DEVICE__ char2 make_char2(signed char, signed char ); +__HIP_DEVICE__ char3 make_char3(signed char, signed char, signed char ); +__HIP_DEVICE__ char4 make_char4(signed char, signed char, signed char, signed char ); + +__HIP_DEVICE__ short1 make_short1(short ); +__HIP_DEVICE__ short2 make_short2(short, short ); +__HIP_DEVICE__ short3 make_short3(short, short, short ); +__HIP_DEVICE__ short4 make_short4(short, short, short, short ); + +__HIP_DEVICE__ int1 make_int1(int ); +__HIP_DEVICE__ int2 make_int2(int, int ); +__HIP_DEVICE__ int3 make_int3(int, int, int ); +__HIP_DEVICE__ int4 make_int4(int, int, int, int ); + +__HIP_DEVICE__ long1 make_long1(long ); +__HIP_DEVICE__ long2 make_long2(long, long ); +__HIP_DEVICE__ long3 make_long3(long, long, long ); +__HIP_DEVICE__ long4 make_long4(long, long, long, long ); + +__HIP_DEVICE__ longlong1 make_longlong1(long long ); +__HIP_DEVICE__ longlong2 make_longlong2(long long, long long ); +__HIP_DEVICE__ longlong3 make_longlong3(long long, long long, long long ); +__HIP_DEVICE__ longlong4 make_longlong4(long long, long long, long long, long long ); + +__HIP_DEVICE__ uchar1 make_uchar1(unsigned char ); +__HIP_DEVICE__ uchar2 make_uchar2(unsigned char, unsigned char ); +__HIP_DEVICE__ uchar3 make_uchar3(unsigned char, unsigned char, unsigned char ); +__HIP_DEVICE__ uchar4 make_uchar4(unsigned char, unsigned char, unsigned char, unsigned char ); + +__HIP_DEVICE__ ushort1 make_ushort1(unsigned short ); +__HIP_DEVICE__ ushort2 make_ushort2(unsigned short, unsigned short ); +__HIP_DEVICE__ ushort3 make_ushort3(unsigned short, unsigned short, unsigned short ); +__HIP_DEVICE__ ushort4 make_ushort4(unsigned short, unsigned short, unsigned short, unsigned short ); + +__HIP_DEVICE__ uint1 make_uint1(unsigned int ); +__HIP_DEVICE__ uint2 make_uint2(unsigned int, unsigned int ); +__HIP_DEVICE__ uint3 make_uint3(unsigned int, unsigned int, unsigned int ); +__HIP_DEVICE__ uint4 make_uint4(unsigned int, unsigned int, unsigned int, unsigned int ); + +__HIP_DEVICE__ ulong1 make_ulong1(unsigned long ); +__HIP_DEVICE__ ulong2 make_ulong2(unsigned long, unsigned long ); +__HIP_DEVICE__ ulong3 make_ulong3(unsigned long, unsigned long, unsigned long ); +__HIP_DEVICE__ ulong4 make_ulong4(unsigned long, unsigned long, unsigned long, unsigned long ); + +__HIP_DEVICE__ ulonglong1 make_ulonglong1(unsigned long long ); +__HIP_DEVICE__ ulonglong2 make_ulonglong2(unsigned long long, unsigned long long); +__HIP_DEVICE__ ulonglong3 make_ulonglong3(unsigned long long, unsigned long long, unsigned long long); +__HIP_DEVICE__ ulonglong4 make_ulonglong4(unsigned long long, unsigned long long, unsigned long long, unsigned long long ); + +__HIP_DEVICE__ float1 make_float1(float ); +__HIP_DEVICE__ float2 make_float2(float, float ); +__HIP_DEVICE__ float3 make_float3(float, float, float ); +__HIP_DEVICE__ float4 make_float4(float, float, float, float ); + +__HIP_DEVICE__ double1 make_double1(double ); +__HIP_DEVICE__ double2 make_double2(double, double ); +__HIP_DEVICE__ double3 make_double3(double, double, double ); +__HIP_DEVICE__ double4 make_double4(double, double, double, double ); + +/* +///--- +// Inline functions for creating vector types from basic types +#define ONE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x) { VT t; t.x = x; return t; }; +#define TWO_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y) { VT t; t.x=x; t.y=y; return t; }; +#define THREE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y, T z) { VT t; t.x=x; t.y=y; t.z=z; return t; }; +#define FOUR_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y, T z, T w) { VT t; t.x=x; t.y=y; t.z=z; t.w=w; return t; }; + + +//signed: +ONE_COMPONENT_ACCESS (signed char, char1); +TWO_COMPONENT_ACCESS (signed char, char2); +THREE_COMPONENT_ACCESS(signed char, char3); +FOUR_COMPONENT_ACCESS (signed char, char4); + +ONE_COMPONENT_ACCESS (short, short1); +TWO_COMPONENT_ACCESS (short, short2); +THREE_COMPONENT_ACCESS(short, short3); +FOUR_COMPONENT_ACCESS (short, short4); + +ONE_COMPONENT_ACCESS (int, int1); +TWO_COMPONENT_ACCESS (int, int2); +THREE_COMPONENT_ACCESS(int, int3); +FOUR_COMPONENT_ACCESS (int, int4); + +ONE_COMPONENT_ACCESS (long int, long1); +TWO_COMPONENT_ACCESS (long int, long2); +THREE_COMPONENT_ACCESS(long int, long3); +FOUR_COMPONENT_ACCESS (long int, long4); + +ONE_COMPONENT_ACCESS (long long int, ulong1); +TWO_COMPONENT_ACCESS (long long int, ulong2); +THREE_COMPONENT_ACCESS(long long int, ulong3); +FOUR_COMPONENT_ACCESS (long long int, ulong4); + +ONE_COMPONENT_ACCESS (long long int, longlong1); +TWO_COMPONENT_ACCESS (long long int, longlong2); +THREE_COMPONENT_ACCESS(long long int, longlong3); +FOUR_COMPONENT_ACCESS (long long int, longlong4); + + +// unsigned: +ONE_COMPONENT_ACCESS (unsigned char, uchar1); +TWO_COMPONENT_ACCESS (unsigned char, uchar2); +THREE_COMPONENT_ACCESS(unsigned char, uchar3); +FOUR_COMPONENT_ACCESS (unsigned char, uchar4); + +ONE_COMPONENT_ACCESS (unsigned short, ushort1); +TWO_COMPONENT_ACCESS (unsigned short, ushort2); +THREE_COMPONENT_ACCESS(unsigned short, ushort3); +FOUR_COMPONENT_ACCESS (unsigned short, ushort4); + +ONE_COMPONENT_ACCESS (unsigned int, uint1); +TWO_COMPONENT_ACCESS (unsigned int, uint2); +THREE_COMPONENT_ACCESS(unsigned int, uint3); +FOUR_COMPONENT_ACCESS (unsigned int, uint4); + +ONE_COMPONENT_ACCESS (unsigned long int, ulong1); +TWO_COMPONENT_ACCESS (unsigned long int, ulong2); +THREE_COMPONENT_ACCESS(unsigned long int, ulong3); +FOUR_COMPONENT_ACCESS (unsigned long int, ulong4); + +ONE_COMPONENT_ACCESS (unsigned long long int, ulong1); +TWO_COMPONENT_ACCESS (unsigned long long int, ulong2); +THREE_COMPONENT_ACCESS(unsigned long long int, ulong3); +FOUR_COMPONENT_ACCESS (unsigned long long int, ulong4); + +ONE_COMPONENT_ACCESS (unsigned long long int, ulonglong1); +TWO_COMPONENT_ACCESS (unsigned long long int, ulonglong2); +THREE_COMPONENT_ACCESS(unsigned long long int, ulonglong3); +FOUR_COMPONENT_ACCESS (unsigned long long int, ulonglong4); + + +//Floating point +ONE_COMPONENT_ACCESS (float, float1); +TWO_COMPONENT_ACCESS (float, float2); +THREE_COMPONENT_ACCESS(float, float3); +FOUR_COMPONENT_ACCESS (float, float4); + +ONE_COMPONENT_ACCESS (double, double1); +TWO_COMPONENT_ACCESS (double, double2); +THREE_COMPONENT_ACCESS(double, double3); +FOUR_COMPONENT_ACCESS (double, double4); +*/ + +#endif + diff --git a/include/hip/hcc_detail/trace_helper.h b/include/hip/hcc_detail/trace_helper.h index 7a8f1106f0..e75b492e0c 100644 --- a/include/hip/hcc_detail/trace_helper.h +++ b/include/hip/hcc_detail/trace_helper.h @@ -1,21 +1,25 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + //#pragma once #ifndef TRACE_HELPER_H diff --git a/include/hip/hipComplex.h b/include/hip/hip_complex.h similarity index 63% rename from include/hip/hipComplex.h rename to include/hip/hip_complex.h index 27281a2df4..0f4fb0b3d8 100644 --- a/include/hip/hipComplex.h +++ b/include/hip/hip_complex.h @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @@ -22,9 +25,9 @@ THE SOFTWARE. #include #if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__) -#include +#include #elif defined(__HIP_PLATFORM_NVCC__) && !defined (__HIP_PLATFORM_HCC__) -#include +#include #else #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); #endif diff --git a/include/hip/hip_fp16.h b/include/hip/hip_fp16.h index 6df481270b..b91063998a 100644 --- a/include/hip/hip_fp16.h +++ b/include/hip/hip_fp16.h @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index 3406bcbbc9..884cb0c649 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + /** * @file hip_runtime_api.h * @@ -183,7 +184,7 @@ typedef enum hipError_t { hipErrorInvalidHandle = 400, hipErrorNotFound = 500, hipErrorIllegalAddress = 700, - + hipErrorInvalidSymbol = 701, // Runtime Error Codes start here. hipErrorMissingConfiguration = 1001, hipErrorMemoryAllocation = 1002, ///< Memory allocation error. diff --git a/include/hip/hip_vector_types.h b/include/hip/hip_vector_types.h index 16b64e40bf..7733d92bda 100644 --- a/include/hip/hip_vector_types.h +++ b/include/hip/hip_vector_types.h @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + //! hip_vector_types.h : Defines the HIP vector types. #pragma once diff --git a/include/hip/hipblas.h b/include/hip/hipblas.h deleted file mode 100644 index 0e9b493a41..0000000000 --- a/include/hip/hipblas.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -//! HIP = Heterogeneous-compute Interface for Portability -//! -//! Define a extremely thin runtime layer that allows source code to be compiled unmodified -//! through either AMD HCC or NVCC. Key features tend to be in the spirit -//! and terminology of CUDA, but with a portable path to other accelerators as well. -//! -//! This is the master include file for hipblas, wrapping around hcblas and cublas "version 1" -// - -#pragma once - -enum hipblasStatus_t { - HIPBLAS_STATUS_SUCCESS, // Function succeeds - HIPBLAS_STATUS_NOT_INITIALIZED, // HIPBLAS library not initialized - HIPBLAS_STATUS_ALLOC_FAILED, // resource allocation failed - HIPBLAS_STATUS_INVALID_VALUE, // unsupported numerical value was passed to function - HIPBLAS_STATUS_MAPPING_ERROR, // access to GPU memory space failed - HIPBLAS_STATUS_EXECUTION_FAILED, // GPU program failed to execute - HIPBLAS_STATUS_INTERNAL_ERROR, // an internal HIPBLAS operation failed - HIPBLAS_STATUS_NOT_SUPPORTED // cublas supports this, but not hcblas -}; - -enum hipblasOperation_t { - HIPBLAS_OP_N, - HIPBLAS_OP_T, - HIPBLAS_OP_C -}; - -// Some standard header files, these are included by hc.hpp and so want to make them avail on both -// paths to provide a consistent include env and avoid "missing symbol" errors that only appears -// on NVCC path: - -#if defined(__HIP_PLATFORM_HCC__) and not defined (__HIP_PLATFORM_NVCC__) -#include -#elif defined(__HIP_PLATFORM_NVCC__) and not defined (__HIP_PLATFORM_HCC__) -#include -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - - - - - diff --git a/include/hip/nvcc_detail/hip_blas.h b/include/hip/nvcc_detail/hip_blas.h deleted file mode 100644 index f01fb171c7..0000000000 --- a/include/hip/nvcc_detail/hip_blas.h +++ /dev/null @@ -1,268 +0,0 @@ -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#pragma once - -#include -#include -#include - -//HGSOS for Kalmar leave it as C++, only cublas needs C linkage. - -#ifdef __cplusplus -extern "C" { -#endif - - -typedef cublasHandle_t hipblasHandle_t ; -typedef cuComplex hipComplex; - -/* Unsupported types - "cublasFillMode_t", - "cublasDiagType_t", - "cublasSideMode_t", - "cublasPointerMode_t", - "cublasAtomicsMode_t", - "cublasDataType_t" -*/ - - -inline static cublasOperation_t hipOperationToCudaOperation( hipblasOperation_t op) -{ - switch (op) - { - case HIPBLAS_OP_N: - return CUBLAS_OP_N; - - case HIPBLAS_OP_T: - return CUBLAS_OP_T; - - case HIPBLAS_OP_C: - return CUBLAS_OP_C; - - default: - throw "Non existent OP"; - } -} - -inline static hipblasOperation_t CudaOperationToHIPOperation( cublasOperation_t op) -{ - switch (op) - { - case CUBLAS_OP_N : - return HIPBLAS_OP_N; - - case CUBLAS_OP_T : - return HIPBLAS_OP_T; - - case CUBLAS_OP_C : - return HIPBLAS_OP_C; - - default: - throw "Non existent OP"; - } -} - - -inline static hipblasStatus_t hipCUBLASStatusToHIPStatus(cublasStatus_t cuStatus) -{ - switch(cuStatus) - { - case CUBLAS_STATUS_SUCCESS: - return HIPBLAS_STATUS_SUCCESS; - case CUBLAS_STATUS_NOT_INITIALIZED: - return HIPBLAS_STATUS_NOT_INITIALIZED; - case CUBLAS_STATUS_ALLOC_FAILED: - return HIPBLAS_STATUS_ALLOC_FAILED; - case CUBLAS_STATUS_INVALID_VALUE: - return HIPBLAS_STATUS_INVALID_VALUE; - case CUBLAS_STATUS_MAPPING_ERROR: - return HIPBLAS_STATUS_MAPPING_ERROR; - case CUBLAS_STATUS_EXECUTION_FAILED: - return HIPBLAS_STATUS_EXECUTION_FAILED; - case CUBLAS_STATUS_INTERNAL_ERROR: - return HIPBLAS_STATUS_INTERNAL_ERROR; - case CUBLAS_STATUS_NOT_SUPPORTED: - return HIPBLAS_STATUS_NOT_SUPPORTED; - default: - throw "Unimplemented status"; - } -} - - -inline static hipblasStatus_t hipblasCreate(hipblasHandle_t* handle) { - return hipCUBLASStatusToHIPStatus(cublasCreate(&*handle)); -} - -//TODO broke common API semantics, think about this again. -inline static hipblasStatus_t hipblasDestroy(hipblasHandle_t handle) { - return hipCUBLASStatusToHIPStatus(cublasDestroy(handle)); -} - -//note: no handle -inline static hipblasStatus_t hipblasSetVector(int n, int elemSize, const void *x, int incx, void *y, int incy){ - return hipCUBLASStatusToHIPStatus(cublasSetVector(n, elemSize, x, incx, y, incy)); //HGSOS no need for handle -} - -//note: no handle -inline static hipblasStatus_t hipblasGetVector(int n, int elemSize, const void *x, int incx, void *y, int incy){ - return hipCUBLASStatusToHIPStatus(cublasGetVector(n, elemSize, x, incx, y, incy)); //HGSOS no need for handle -} - -//note: no handle -inline static hipblasStatus_t hipblasSetMatrix(int rows, int cols, int elemSize, const void *A, int lda, void *B, int ldb){ - return hipCUBLASStatusToHIPStatus(cublasSetMatrix(rows, cols, elemSize, A, lda, B, ldb)); -} - -//note: no handle -inline static hipblasStatus_t hipblasGetMatrix(int rows, int cols, int elemSize, const void *A, int lda, void *B, int ldb){ - return hipCUBLASStatusToHIPStatus(cublasGetMatrix(rows, cols, elemSize, A, lda, B, ldb)); -} - -inline static hipblasStatus_t hipblasSasum(hipblasHandle_t handle, int n, float *x, int incx, float *result){ - return hipCUBLASStatusToHIPStatus(cublasSasum(handle, n, x, incx, result)); -} - -inline static hipblasStatus_t hipblasDasum(hipblasHandle_t handle, int n, double *x, int incx, double *result){ - return hipCUBLASStatusToHIPStatus(cublasDasum( handle, n, x, incx, result)); -} - -inline static hipblasStatus_t hipblasSasumBatched(hipblasHandle_t handle, int n, float *x, int incx, float *result, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasSasum( handle, n, x, incx, result)); -} - -inline static hipblasStatus_t hipblasDasumBatched(hipblasHandle_t handle, int n, double *x, int incx, double *result, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasDasum(handle, n, x, incx, result)); -} - -inline static hipblasStatus_t hipblasSaxpy(hipblasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy) { - return hipCUBLASStatusToHIPStatus(cublasSaxpy(handle, n, alpha, x, incx, y, incy)); -} - -inline static hipblasStatus_t hipblasSaxpyBatched(hipblasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasSaxpy(handle, n, alpha, x, incx, y, incy)); -} - -inline static hipblasStatus_t hipblasScopy(hipblasHandle_t handle, int n, const float *x, int incx, float *y, int incy){ - return hipCUBLASStatusToHIPStatus(cublasScopy( handle, n, x, incx, y, incy)); -} - -inline static hipblasStatus_t hipblasDcopy(hipblasHandle_t handle, int n, const double *x, int incx, double *y, int incy){ - return hipCUBLASStatusToHIPStatus(cublasDcopy( handle, n, x, incx, y, incy)); -} - -inline static hipblasStatus_t hipblasScopyBatched(hipblasHandle_t handle, int n, const float *x, int incx, float *y, int incy, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasScopy( handle, n, x, incx, y, incy)); -} - -inline static hipblasStatus_t hipblasDcopyBatched(hipblasHandle_t handle, int n, const double *x, int incx, double *y, int incy, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasDcopy( handle, n, x, incx, y, incy)); -} - -inline static hipblasStatus_t hipblasSdot (hipblasHandle_t handle, int n, const float *x, int incx, const float *y, int incy, float *result){ - return hipCUBLASStatusToHIPStatus(cublasSdot ( handle, n, x, incx, y, incy, result)); -} - -inline static hipblasStatus_t hipblasDdot (hipblasHandle_t handle, int n, const double *x, int incx, const double *y, int incy, double *result){ - return hipCUBLASStatusToHIPStatus(cublasDdot ( handle, n, x, incx, y, incy, result)); -} - -inline static hipblasStatus_t hipblasSdotBatched (hipblasHandle_t handle, int n, const float *x, int incx, const float *y, int incy, float *result, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasSdot ( handle, n, x, incx, y, incy, result)); -} - -inline static hipblasStatus_t hipblasDdotBatched (hipblasHandle_t handle, int n, const double *x, int incx, const double *y, int incy, double *result, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasDdot ( handle, n, x, incx, y, incy, result)); -} - -inline static hipblasStatus_t hipblasSscal(hipblasHandle_t handle, int n, const float *alpha, float *x, int incx){ - return hipCUBLASStatusToHIPStatus(cublasSscal(handle, n, alpha, x, incx)); -} -inline static hipblasStatus_t hipblasDscal(hipblasHandle_t handle, int n, const double *alpha, double *x, int incx){ - return hipCUBLASStatusToHIPStatus(cublasDscal(handle, n, alpha, x, incx)); -} -inline static hipblasStatus_t hipblasSscalBatched(hipblasHandle_t handle, int n, const float *alpha, float *x, int incx, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasSscal(handle, n, alpha, x, incx)); -} -inline static hipblasStatus_t hipblasDscalBatched(hipblasHandle_t handle, int n, const double *alpha, double *x, int incx, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasDscal(handle, n, alpha, x, incx)); -} - -inline static hipblasStatus_t hipblasSgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, float *A, int lda, - float *x, int incx, const float *beta, float *y, int incy){ - return hipCUBLASStatusToHIPStatus(cublasSgemv(handle, hipOperationToCudaOperation(trans), m, n, alpha, A, lda, x, incx, beta, y, incy)); -} - -inline static hipblasStatus_t hipblasSgemvBatched(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, float *A, int lda, - float *x, int incx, const float *beta, float *y, int incy, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasSgemv(handle, hipOperationToCudaOperation(trans), m, n, alpha, A, lda, x, incx, beta, y, incy)); -} - -inline static hipblasStatus_t hipblasSger(hipblasHandle_t handle, int m, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *A, int lda){ - return hipCUBLASStatusToHIPStatus(cublasSger(handle, m, n, alpha, x, incx, y, incy, A, lda)); -} - -inline static hipblasStatus_t hipblasSgerBatched(hipblasHandle_t handle, int m, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *A, int lda, int batchCount){ - //TODO warn user that function was demoted to ignore batch - return hipCUBLASStatusToHIPStatus(cublasSger(handle, m, n, alpha, x, incx, y, incy, A, lda)); -} - -inline static hipblasStatus_t hipblasSgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, - int m, int n, int k, const float *alpha, float *A, int lda, float *B, int ldb, const float *beta, float *C, int ldc){ - return hipCUBLASStatusToHIPStatus(cublasSgemm( handle, hipOperationToCudaOperation(transa), hipOperationToCudaOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)); -} - -inline static hipblasStatus_t hipblasCgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, - int m, int n, int k, const hipComplex *alpha, hipComplex *A, int lda, hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc){ - return hipCUBLASStatusToHIPStatus(cublasCgemm( handle, hipOperationToCudaOperation(transa), hipOperationToCudaOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)); -} - -inline static hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, - int m, int n, int k, const float *alpha, float *A, int lda, float *B, int ldb, const float *beta, float *C, int ldc, int batchCount){ - //TODO incompatible API - return HIPBLAS_STATUS_NOT_SUPPORTED; - //return hipCUBLASStatusToHIPStatus(cublasSgemmBatched( handle, hipOperationToCudaOperation(transa), hipOperationToCudaOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount)); -} - - -inline static hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, - int m, int n, int k, const hipComplex *alpha, hipComplex *A, int lda, hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc, int batchCount){ - - //TODO incompatible API - return HIPBLAS_STATUS_NOT_SUPPORTED; - //return hipCUBLASStatusToHIPStatus(cublasCgemmBatched( handle, hipOperationToCudaOperation(transa), hipOperationToCudaOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount)); -} - -#ifdef __cplusplus -} -#endif - - diff --git a/include/hip/nvcc_detail/hipComplex.h b/include/hip/nvcc_detail/hip_complex.h similarity index 73% rename from include/hip/nvcc_detail/hipComplex.h rename to include/hip/nvcc_detail/hip_complex.h index b5c182bd4d..174cabc12c 100644 --- a/include/hip/nvcc_detail/hipComplex.h +++ b/include/hip/nvcc_detail/hip_complex.h @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #ifndef HIPCOMPLEX_H #define HIPCOMPLEX_H diff --git a/include/hip/nvcc_detail/hip_runtime.h b/include/hip/nvcc_detail/hip_runtime.h index 569d6297bf..2c774bfb7d 100644 --- a/include/hip/nvcc_detail/hip_runtime.h +++ b/include/hip/nvcc_detail/hip_runtime.h @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #pragma once #include @@ -45,7 +46,7 @@ kernelName<<>>(0, ##__VA_ARGS__);\ #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 110) #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (__CUDA_ARCH__ >= 120) #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 120) -#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (__CUDA_ARCH__ >= 200) // 64-bit Atomics: #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (__CUDA_ARCH__ >= 200) @@ -92,6 +93,8 @@ kernelName<<>>(0, ##__VA_ARGS__);\ #define hipGridDim_y gridDim.y #define hipGridDim_z gridDim.z +#define HIP_SYMBOL(X) X + /** * extern __shared__ */ @@ -102,5 +105,3 @@ kernelName<<>>(0, ##__VA_ARGS__);\ #define HIP_DYNAMIC_SHARED_ATTRIBUTE #endif - - diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index 4088064f87..f7d67e6662 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #pragma once #include @@ -67,11 +68,13 @@ hipMemcpyHostToHost #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END +#define hipLimitMallocHeapSize cudaLimitMallocHeapSize typedef cudaEvent_t hipEvent_t; typedef cudaStream_t hipStream_t; typedef cudaIpcEventHandle_t hipIpcEventHandle_t; typedef cudaIpcMemHandle_t hipIpcMemHandle_t; +typedef cudaLimit hipLimit_t; typedef CUcontext hipCtx_t; typedef CUsharedconfig hipSharedMemConfig; typedef CUfunc_cache hipFuncCache; @@ -101,6 +104,7 @@ switch(cuError) { case cudaErrorPeerAccessAlreadyEnabled : return hipErrorPeerAccessAlreadyEnabled ; case cudaErrorHostMemoryAlreadyRegistered : return hipErrorHostMemoryAlreadyRegistered ; case cudaErrorHostMemoryNotRegistered : return hipErrorHostMemoryNotRegistered ; + case cudaErrorUnsupportedLimit : return hipErrorUnsupportedLimit ; default : return hipErrorUnknown; // Note - translated error. }; } @@ -185,6 +189,16 @@ inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); } +inline static hipError_t hipMallocHost(void** ptr, size_t size) __attribute__((deprecated("use hipHostMalloc instead"))); +inline static hipError_t hipMallocHost(void** ptr, size_t size) { + return hipCUDAErrorTohipError(cudaMallocHost(ptr, size)); +} + +inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) __attribute__((deprecated("use hipHostMalloc instead"))); +inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags){ + return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); +} + inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags){ return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); } @@ -205,6 +219,11 @@ inline static hipError_t hipHostUnregister(void* ptr){ return hipCUDAErrorTohipError(cudaHostUnregister(ptr)); } +inline static hipError_t hipFreeHost(void* ptr) __attribute__((deprecated("use hipHostFree instead"))); +inline static hipError_t hipFreeHost(void* ptr) { + return hipCUDAErrorTohipError(cudaFreeHost(ptr)); +} + inline static hipError_t hipHostFree(void* ptr) { return hipCUDAErrorTohipError(cudaFreeHost(ptr)); } @@ -282,9 +301,14 @@ inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeB } -inline static hipError_t hipMemcpyToSymbol(const char * symbolName, const void* src, size_t sizeBytes, size_t offset = 0, hipMemcpyKind copyType = hipMemcpyHostToDevice) { - return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbolName, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType))); +inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, size_t offset = 0, hipMemcpyKind copyType = hipMemcpyHostToDevice) { + return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType))); } + +inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind copyType, hipStream_t stream) { + return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType))); +} + inline static hipError_t hipDeviceSynchronize() { return hipCUDAErrorTohipError(cudaDeviceSynchronize()); } @@ -710,12 +734,17 @@ inline static hipError_t hipDeviceGetName(char *name,int len,hipDevice_t device) return hipCUResultTohipError(cuDeviceGetName(name,len,device)); } -inline static hipError_t hipDeviceGetPCIBusId (int *pciBusId,int len,hipDevice_t device) +inline static hipError_t hipDeviceGetPCIBusId(int *pciBusId,int len,hipDevice_t device) { return hipCUResultTohipError(cuDeviceGetPCIBusId((char*)pciBusId,len,device)); } -inline static hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) +inline static hipError_t hipDeviceGetLimit(size_t *pValue, hipLimit_t limit) +{ + return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit); +} + +inline static hipError_t hipDeviceTotalMem(size_t *bytes,hipDevice_t device) { return hipCUResultTohipError(cuDeviceTotalMem(bytes,device)); } diff --git a/packaging/hip_base.postinst b/packaging/hip_base.postinst index daf0591081..5348406489 100755 --- a/packaging/hip_base.postinst +++ b/packaging/hip_base.postinst @@ -19,7 +19,8 @@ for f in $HIPBINFILES do ln -s $f $(basename $f) done -popd >/dev/null + ln -s $HIPDIR/bin/.hipVersion .hipVersion +popd # Soft-link to headers HIPINCDIR=$HIPDIR/include/hip diff --git a/packaging/hip_base.prerm b/packaging/hip_base.prerm index 2953e16f2f..5985cc30ec 100755 --- a/packaging/hip_base.prerm +++ b/packaging/hip_base.prerm @@ -19,6 +19,7 @@ for f in $HIPBINFILES do rm $(basename $f) done +rm .hipVersion popd rmdir --ignore-fail-on-non-empty $ROCMBINDIR diff --git a/packaging/hip_base.txt b/packaging/hip_base.txt index 8e02baca9b..a208bc3463 100644 --- a/packaging/hip_base.txt +++ b/packaging/hip_base.txt @@ -6,7 +6,7 @@ if(@BUILD_HIPIFY_CLANG@) install(PROGRAMS @PROJECT_BINARY_DIR@/hipify-clang/hipify-clang DESTINATION bin) endif() install(DIRECTORY @hip_SOURCE_DIR@/include DESTINATION .) -install(FILES @PROJECT_BINARY_DIR@/.version DESTINATION bin) +install(FILES @PROJECT_BINARY_DIR@/.hipVersion DESTINATION bin) install(DIRECTORY @hip_SOURCE_DIR@/cmake DESTINATION .) ############################# diff --git a/packaging/hip_hcc.postinst b/packaging/hip_hcc.postinst index 0a3b31e0e0..14179db767 100755 --- a/packaging/hip_hcc.postinst +++ b/packaging/hip_hcc.postinst @@ -19,5 +19,6 @@ for f in $HIPLIBFILES do ln -s $f $(basename $f) done + ln -s $HIPDIR/lib/.hipInfo .hipInfo popd diff --git a/packaging/hip_hcc.prerm b/packaging/hip_hcc.prerm index 42f864f323..dda313a3a4 100755 --- a/packaging/hip_hcc.prerm +++ b/packaging/hip_hcc.prerm @@ -19,6 +19,7 @@ for f in $HIPLIBFILES do rm $(basename $f) done +rm .hipInfo popd rmdir --ignore-fail-on-non-empty $ROCMLIBDIR diff --git a/packaging/hip_hcc.txt b/packaging/hip_hcc.txt index 662a026b87..459ecd449a 100644 --- a/packaging/hip_hcc.txt +++ b/packaging/hip_hcc.txt @@ -8,7 +8,7 @@ elseif(@HIP_LIB_TYPE@ EQUAL 1) else() install(FILES @PROJECT_BINARY_DIR@/libhip_hcc.so DESTINATION lib) endif() -install(FILES @PROJECT_BINARY_DIR@/.buildInfo DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) ############################# # Packaging steps @@ -27,12 +27,20 @@ set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR} set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") -set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), hcc_lc (= @HCC_VERSION@), rocm-profiler") +if(@COMPILE_HIP_ATP_MARKER@) + set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), hcc_lc (= @HCC_VERSION@), rocm-profiler") +else() + set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), hcc_lc (= @HCC_VERSION@)") +endif() set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") -set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, hcc_lc = @HCC_VERSION@, rocm-profiler") +if(@COMPILE_HIP_ATP_MARKER@) + set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, hcc_lc = @HCC_VERSION@, rocm-profiler") +else() + set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, hcc_lc = @HCC_VERSION@") +endif() set(CPACK_SOURCE_GENERATOR "TGZ") include(CPack) diff --git a/samples/0_Intro/bit_extract/bit_extract.cpp b/samples/0_Intro/bit_extract/bit_extract.cpp index 06ca349960..1535d2bd98 100644 --- a/samples/0_Intro/bit_extract/bit_extract.cpp +++ b/samples/0_Intro/bit_extract/bit_extract.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #include #include #include "hip/hip_runtime.h" diff --git a/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp b/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp index 485b64f68d..a3162bccb9 100644 --- a/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp +++ b/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + // Simple test showing how to use C++AMP syntax with array_view. // The code uses AMP's array_view class, which provides automatic data synchronization // of data between the host and the accelerator. As noted below, the HCC runtime diff --git a/samples/0_Intro/hcc_dialects/vadd_hc_am.cpp b/samples/0_Intro/hcc_dialects/vadd_hc_am.cpp index 53a137f74c..c83051da29 100644 --- a/samples/0_Intro/hcc_dialects/vadd_hc_am.cpp +++ b/samples/0_Intro/hcc_dialects/vadd_hc_am.cpp @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + // Simple test showing how to use HC syntax with AM (accelerator memory). // AM provides a set of c-style memory management routines for allocating, // freeing, and copying memory. am_alloc returns a device pointer diff --git a/samples/0_Intro/hcc_dialects/vadd_hc_array.cpp b/samples/0_Intro/hcc_dialects/vadd_hc_array.cpp index bda3adf376..b076b926e1 100644 --- a/samples/0_Intro/hcc_dialects/vadd_hc_array.cpp +++ b/samples/0_Intro/hcc_dialects/vadd_hc_array.cpp @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + // Simple test showing how to use HC syntax with array. // Array provides a type-safe C++ mechanism to allocate accelerator memory. // Like array_view, hc::array provides multi-dimensional indexing capability, diff --git a/samples/0_Intro/hcc_dialects/vadd_hc_array.hc b/samples/0_Intro/hcc_dialects/vadd_hc_array.hc index d57b9a7e14..9ed016c7ad 100644 --- a/samples/0_Intro/hcc_dialects/vadd_hc_array.hc +++ b/samples/0_Intro/hcc_dialects/vadd_hc_array.hc @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #include int main(int argc, char *argv[]) diff --git a/samples/0_Intro/hcc_dialects/vadd_hc_arrayview.cpp b/samples/0_Intro/hcc_dialects/vadd_hc_arrayview.cpp index 2585f47001..15f5de4abb 100644 --- a/samples/0_Intro/hcc_dialects/vadd_hc_arrayview.cpp +++ b/samples/0_Intro/hcc_dialects/vadd_hc_arrayview.cpp @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + // Simple test showing how to use HC syntax with array_view. // The code uses AMP's array_view class, which provides automatic data synchronization // of data between the host and the accelerator. As noted below, the HCC runtime diff --git a/samples/0_Intro/hcc_dialects/vadd_hip.cpp b/samples/0_Intro/hcc_dialects/vadd_hip.cpp index c8f425ff90..f2afa378e0 100644 --- a/samples/0_Intro/hcc_dialects/vadd_hip.cpp +++ b/samples/0_Intro/hcc_dialects/vadd_hip.cpp @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #include "hip/hip_runtime.h" __global__ void vadd_hip(hipLaunchParm lp, const float *a, const float *b, float *c, int N) diff --git a/samples/0_Intro/module_api/runKernel.cpp b/samples/0_Intro/module_api/runKernel.cpp index 5f16677fc2..b91507aaa4 100644 --- a/samples/0_Intro/module_api/runKernel.cpp +++ b/samples/0_Intro/module_api/runKernel.cpp @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @@ -60,32 +63,37 @@ int main(){ uint32_t len = LEN; uint32_t one = 1; - std::vectorargBuffer(5); - uint32_t *ptr32_t = (uint32_t*)&argBuffer[0]; - memcpy(ptr32_t + 0, &one, sizeof(uint32_t)); - memcpy(ptr32_t + 1, &one, sizeof(uint32_t)); - memcpy(ptr32_t + 2, &one, sizeof(uint32_t)); - memcpy(ptr32_t + 3, &len, sizeof(uint32_t)); - memcpy(ptr32_t + 4, &one, sizeof(uint32_t)); - memcpy(ptr32_t + 5, &one, sizeof(uint32_t)); - memcpy(&argBuffer[3], &Ad, sizeof(void*)); - memcpy(&argBuffer[4], &Bd, sizeof(void*)); + struct { + uint32_t _hidden[6]; + void * _Ad; + void * _Bd; + } args; + + for (int i=0; i<6; i++) { + args._hidden[i] = 0; + } + args._Ad = Ad; + args._Bd = Bd; + #endif #ifdef __HIP_PLATFORM_NVCC__ - uint32_t one = 1; - std::vectorargBuffer(3); - uint32_t *ptr32_t = (uint32_t*)&argBuffer[0]; - memcpy(ptr32_t + 0, &one, sizeof(uint32_t)); - memcpy(&argBuffer[1], &Ad, sizeof(void*)); - memcpy(&argBuffer[2], &Bd, sizeof(void*)); + struct { + uint32_t _hidden[1]; + void * _Ad; + void * _Bd; + } args; + + args._hidden[0] = 0; + args._Ad = Ad; + args._Bd = Bd; #endif - size_t size = argBuffer.size()*sizeof(void*); + size_t size = sizeof(args); void *config[] = { - HIP_LAUNCH_PARAM_BUFFER_POINTER, &argBuffer[0], + HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END }; @@ -93,10 +101,20 @@ int main(){ hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config); hipMemcpyDtoH(B, Bd, SIZE); - for(uint32_t i=LEN-4;i #include @@ -26,9 +27,9 @@ THE SOFTWARE. {\ cudaError_t error = cmd;\ if (error != cudaSuccess) { \ - fprintf(stderr, "error: '%s'(%d) at %s:%d\n", cudaGetErrorString(error), error,__FILE__, __LINE__); \ - exit(EXIT_FAILURE);\ - }\ + fprintf(stderr, "error: '%s'(%d) at %s:%d\n", cudaGetErrorString(error), error,__FILE__, __LINE__); \ + exit(EXIT_FAILURE);\ + }\ } @@ -43,55 +44,55 @@ vector_square(T *C_d, const T *A_d, size_t N) size_t stride = blockDim.x * gridDim.x ; for (size_t i=offset; i>> (C_d, A_d, N); + printf ("info: launch 'vector_square' kernel\n"); + vector_square <<>> (C_d, A_d, N); - printf ("info: copy Device2Host\n"); + printf ("info: copy Device2Host\n"); CHECK ( cudaMemcpy(C_h, C_d, Nbytes, cudaMemcpyDeviceToHost)); - printf ("info: check result\n"); + printf ("info: check result\n"); for (size_t i=0; i #include "hip/hip_runtime.h" diff --git a/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp b/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp index e686c07683..b343386b5c 100644 --- a/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp +++ b/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp @@ -1,13 +1,16 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/samples/1_Utils/hipInfo/hipInfo.cpp b/samples/1_Utils/hipInfo/hipInfo.cpp index 46741a9c91..0403162bd1 100644 --- a/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/samples/1_Utils/hipInfo/hipInfo.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #include #include #include "hip/hip_runtime.h" diff --git a/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp b/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp index 42445374b0..91733c025a 100644 --- a/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp +++ b/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #include // hip header file diff --git a/samples/2_Cookbook/1_hipEvent/hipEvent.cpp b/samples/2_Cookbook/1_hipEvent/hipEvent.cpp index 76688a7b05..1abe1180da 100644 --- a/samples/2_Cookbook/1_hipEvent/hipEvent.cpp +++ b/samples/2_Cookbook/1_hipEvent/hipEvent.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #include // hip header file diff --git a/samples/2_Cookbook/2_HIP_ATP_MARKER/MatrixTranspose.cpp b/samples/2_Cookbook/2_HIP_ATP_MARKER/MatrixTranspose.cpp index 76688a7b05..1abe1180da 100644 --- a/samples/2_Cookbook/2_HIP_ATP_MARKER/MatrixTranspose.cpp +++ b/samples/2_Cookbook/2_HIP_ATP_MARKER/MatrixTranspose.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #include // hip header file diff --git a/samples/2_Cookbook/3_shared_memory/sharedMemory.cpp b/samples/2_Cookbook/3_shared_memory/sharedMemory.cpp index 433fada9d2..9950b8d020 100644 --- a/samples/2_Cookbook/3_shared_memory/sharedMemory.cpp +++ b/samples/2_Cookbook/3_shared_memory/sharedMemory.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #include // hip header file diff --git a/samples/2_Cookbook/4_shfl/shfl.cpp b/samples/2_Cookbook/4_shfl/shfl.cpp index 2819b1f042..07d5cd42d2 100644 --- a/samples/2_Cookbook/4_shfl/shfl.cpp +++ b/samples/2_Cookbook/4_shfl/shfl.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #include // hip header file diff --git a/samples/2_Cookbook/5_2dshfl/2dshfl.cpp b/samples/2_Cookbook/5_2dshfl/2dshfl.cpp index 783879b054..16e5c74892 100644 --- a/samples/2_Cookbook/5_2dshfl/2dshfl.cpp +++ b/samples/2_Cookbook/5_2dshfl/2dshfl.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #include // hip header file diff --git a/src/device_util.cpp b/src/device_util.cpp old mode 100755 new mode 100644 index 6c608d891e..b267c8cbd7 --- a/src/device_util.cpp +++ b/src/device_util.cpp @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ diff --git a/src/hip_context.cpp b/src/hip_context.cpp index d2199ec11f..4f45f049f6 100644 --- a/src/hip_context.cpp +++ b/src/hip_context.cpp @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ diff --git a/src/hip_device.cpp b/src/hip_device.cpp index a677402b69..6e36b935c8 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @@ -68,7 +71,7 @@ hipError_t hipGetDeviceCount(int *count) return e; } -hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig ) +hipError_t hipDeviceSetCacheConfig(hipFuncCache cacheConfig) { HIP_INIT_API(cacheConfig); @@ -77,7 +80,7 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig ) return ihipLogStatus(hipSuccess); } -hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig ) +hipError_t hipDeviceGetCacheConfig(hipFuncCache *cacheConfig) { HIP_INIT_API(cacheConfig); @@ -86,7 +89,23 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig ) return ihipLogStatus(hipSuccess); } -hipError_t hipFuncSetCacheConfig ( hipFuncCache cacheConfig ) +extern "C" size_t g_malloc_heap_size; + +hipError_t hipDeviceGetLimit (size_t *pValue, hipLimit_t limit) +{ + HIP_INIT_API(pValue, limit); + if(pValue == nullptr) { + return ihipLogStatus(hipErrorInvalidValue); + } + if(limit == hipLimitMallocHeapSize) { + *pValue = g_malloc_heap_size; + return ihipLogStatus(hipSuccess); + }else{ + return ihipLogStatus(hipErrorUnsupportedLimit); + } +} + +hipError_t hipFuncSetCacheConfig (hipFuncCache cacheConfig) { HIP_INIT_API(cacheConfig); @@ -95,7 +114,7 @@ hipError_t hipFuncSetCacheConfig ( hipFuncCache cacheConfig ) return ihipLogStatus(hipSuccess); } -hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) +hipError_t hipDeviceSetSharedMemConfig (hipSharedMemConfig config) { HIP_INIT_API(config); @@ -104,7 +123,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) return ihipLogStatus(hipSuccess); } -hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) +hipError_t hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig) { HIP_INIT_API(pConfig); diff --git a/src/hip_error.cpp b/src/hip_error.cpp index 840362f314..72e7ab0084 100644 --- a/src/hip_error.cpp +++ b/src/hip_error.cpp @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ diff --git a/src/hip_event.cpp b/src/hip_event.cpp index 084625b41d..441918d6c4 100644 --- a/src/hip_event.cpp +++ b/src/hip_event.cpp @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ diff --git a/src/hip_fp16.cpp b/src/hip_fp16.cpp index 5b179b9ba5..1a9d04474f 100644 --- a/src/hip_fp16.cpp +++ b/src/hip_fp16.cpp @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @@ -25,6 +28,13 @@ static const __half __half_value_zero_float = {0x0}; static const unsigned __half_pos_inf = 0x7C00; static const unsigned __half_neg_inf = 0xFC00; +typedef struct{ + union{ + float f; + unsigned u; + }; +} struct_float; + static __device__ float cvt_half_to_float(__half a){ struct_float ret = {0}; if(a.x == 0){ @@ -362,4 +372,3 @@ __device__ __half2 __lowhigh2highlow(const __half2 a){ __device__ __half2 __low2half2(const __half2 a, const __half2 b){ return {a.q, b.q}; } - diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 1fc0ced6bf..8bf7f1091c 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + /** * @file hip_hcc.cpp * @@ -67,6 +68,7 @@ int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU ident int HIP_NUM_KERNELS_INFLIGHT = 128; int HIP_BLOCKING_SYNC = 0; +#define HIP_USE_PRODUCT_NAME 0 //#define DISABLE_COPY_EXT 1 @@ -81,6 +83,84 @@ unsigned g_deviceCnt; std::vector g_hip_visible_devices; hsa_agent_t g_cpu_agent; +/* + Implementation of malloc and free device functions. + + This is the best place to put them because the device + global variables need to be initialized at the start. +*/ + +#define NUM_PAGES_PER_THREAD 16 +#define SIZE_OF_PAGE 64 +#define NUM_THREADS_PER_CU 64 +#define NUM_CUS_PER_GPU 64 +#define NUM_PAGES NUM_PAGES_PER_THREAD * NUM_THREADS_PER_CU * NUM_CUS_PER_GPU +#define SIZE_MALLOC NUM_PAGES * SIZE_OF_PAGE +#define SIZE_OF_HEAP SIZE_MALLOC + +size_t g_malloc_heap_size = SIZE_OF_HEAP; + +__attribute__((address_space(1))) char gpuHeap[SIZE_OF_HEAP]; +__attribute__((address_space(1))) uint32_t gpuFlags[NUM_PAGES]; + +__device__ void *__hip_hc_malloc(size_t size) +{ + char *heap = (char*)gpuHeap; + if(size > SIZE_OF_HEAP) + { + return (void*)nullptr; + } + uint32_t totalThreads = hipBlockDim_x * hipGridDim_x * hipBlockDim_y * hipGridDim_y * hipBlockDim_z * hipGridDim_z; + uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x; + + uint32_t numHeapsPerWorkItem = NUM_PAGES / totalThreads; + uint32_t heapSizePerWorkItem = SIZE_OF_HEAP / totalThreads; + + uint32_t stride = size / SIZE_OF_PAGE; + uint32_t start = numHeapsPerWorkItem * currentWorkItem; + + uint32_t k=0; + + while(gpuFlags[k] > 0) + { + k++; + } + + for(uint32_t i=0;iwait(hc::hcWaitModeActive); + // TODO - fix this so it goes through proper stream::wait() call.// direct wait OK since we know the stream is locked. + av->wait(hc::hcWaitModeActive); tprintf(DB_SYNC, " %s LAUNCH_BLOCKING for kernel completion\n", ToString(this).c_str()); } @@ -267,6 +347,7 @@ void ihipStream_t::lockclose_postKernelCommand(hc::accelerator_view *av) +#if USE_DISPATCH_HSA_KERNEL==0 // Precursor: the stream is already locked,specifically so this routine can enqueue work into the specified av. void ihipStream_t::launchModuleKernel( hc::accelerator_view av, @@ -318,6 +399,7 @@ void ihipStream_t::launchModuleKernel( hsa_queue_store_write_index_relaxed(Queue, packet_index + 1); hsa_signal_store_relaxed(Queue->doorbell_signal, packet_index); } +#endif //============================================================================= @@ -385,7 +467,7 @@ template<> void ihipCtxCriticalBase_t::printPeers(FILE *f) const { for (auto iter = _peers.begin(); iter!=_peers.end(); iter++) { - fprintf (f, "%s ", (*iter)->toString().c_str()); + fprintf (f, "%s ", (*iter)->toString().c_str()); }; } @@ -567,7 +649,6 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) // Set some defaults in case we don't find the appropriate regions: prop->totalGlobalMem = 0; prop->totalConstMem = 0; - prop->sharedMemPerBlock = 0; prop-> maxThreadsPerMultiProcessor = 0; prop->regsPerBlock = 0; @@ -585,7 +666,11 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop->isMultiGpuBoard = 0 ? gpuAgentsCount < 2 : 1; // Get agent name +#if HIP_USE_PRODUCT_NAME + err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, &(prop->name)); +#else err = hsa_agent_get_info(_hsaAgent, HSA_AGENT_INFO_NAME, &(prop->name)); +#endif DeviceErrorCheck(err); // Get agent node @@ -718,6 +803,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop->concurrentKernels = 1; // All ROCm hardware supports executing multiple kernels concurrently prop->canMapHostMemory = 1; // All ROCm devices can map host memory + prop->totalConstMem = 16384; #if 0 // TODO - code broken below since it always returns 1. // Are the flags part of the context or part of the device? @@ -1097,7 +1183,6 @@ void ihipInit() assert(deviceCnt == g_deviceCnt); } - tprintf(DB_SYNC, "pid=%u %-30s\n", getpid(), ""); } @@ -1351,10 +1436,10 @@ void ihipSetTs(hipEvent_t e) // Returns true if thisCtx can see the memory allocated on dstCtx and srcCtx. // The peer-list for a context controls which contexts have access to the memory allocated on that context. -// So we check dstCtx's and srcCtx's peerList to see if the booth include thisCtx. +// So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx. bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx) { - tprintf (DB_COPY1, "Checking if direct copy can be used. thisCtx:%s; dstCtx:%s ; srcCtx:%s\n", + tprintf (DB_COPY1, "Checking if direct copy can be used. thisCtx:%s; dstCtx:%s ; srcCtx:%s\n", thisCtx->toString().c_str(), dstCtx->toString().c_str(), srcCtx->toString().c_str()); // Use blocks to control scope of critical sections. @@ -1437,8 +1522,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, }; - // If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued) - // has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers + // If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued) + // has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers // and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (forceHostCopyEngine=true). bool forceHostCopyEngine = false; if (hcCopyDir == hc::hcMemcpyDeviceToDevice) { @@ -1509,13 +1594,13 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes crit->_av.copy_async(src, dst, sizeBytes); } catch (Kalmar::runtime_exception) { throw ihipException(hipErrorRuntimeOther); - }; + }; if (HIP_LAUNCH_BLOCKING) { tprintf(DB_SYNC, "LAUNCH_BLOCKING for completion of hipMemcpyAsync(%zu)\n", sizeBytes); this->wait(crit); - } + } } else { locked_copySync(dst, src, sizeBytes, kind); diff --git a/src/hip_ir.ll b/src/hip_ir.ll new file mode 100644 index 0000000000..6850293778 --- /dev/null +++ b/src/hip_ir.ll @@ -0,0 +1,15 @@ +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +target triple = "amdgcn--amdhsa" + + +define void @__threadfence() #1 { + fence syncscope(2) seq_cst + ret void +} + +define void @__threadfence_block() #1 { + fence syncscope(3) seq_cst + ret void +} + +attributes #1 = { alwaysinline nounwind } diff --git a/src/hip_ldg.cpp b/src/hip_ldg.cpp index c59bd6e66b..f3e593355a 100644 --- a/src/hip_ldg.cpp +++ b/src/hip_ldg.cpp @@ -1,13 +1,16 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 6a869269d3..8b030799fb 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1,21 +1,24 @@ /* - Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ #include #include "hsa/hsa.h" @@ -102,13 +105,13 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) HIP_INIT_API(ptr, sizeBytes); hipError_t hip_status = hipSuccess; - // return NULL pointer when malloc size is 0 + // return NULL pointer when malloc size is 0 if (sizeBytes == 0) { *ptr = NULL; return ihipLogStatus(hipSuccess); } - + auto ctx = ihipGetTlsDefaultCtx(); if (ctx) { @@ -174,8 +177,18 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) return ihipLogStatus(hip_status); } +hipError_t hipMallocHost(void** ptr, size_t sizeBytes) +{ + return hipHostMalloc(ptr, sizeBytes, 0); +} + +hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) +{ + return hipHostMalloc(ptr, sizeBytes, flags); +}; + // width in bytes -hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { HIP_INIT_API(ptr, pitch, width, height); @@ -218,7 +231,7 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height return ihipLogStatus(hip_status); } -hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) +hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { hipChannelFormatDesc cd; cd.x = x; cd.y = y; cd.z = z; cd.w = w; @@ -227,7 +240,7 @@ hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannel } hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, - size_t width, size_t height, unsigned int flags) + size_t width, size_t height, unsigned int flags) { HIP_INIT_API(array, desc, width, height, flags); @@ -375,22 +388,67 @@ hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t cou { HIP_INIT_API(symbolName, src, count, offset, kind); -#ifdef USE_MEMCPYTOSYMBOL - if(kind != hipMemcpyHostToDevice) + if(symbolName == nullptr) { - return ihipLogStatus(hipErrorInvalidValue); + return ihipLogStatus(hipErrorInvalidSymbol); } + auto ctx = ihipGetTlsDefaultCtx(); - //hsa_signal_t depSignal; - //int depSignalCnt = ctx._default_stream->preCopyCommand(NULL, &depSignal, ihipCommandCopyH2D); - assert(0); // Need to properly synchronize the copy - do something with depSignal if != NULL. + hc::accelerator acc = ctx->getDevice()->_acc; + + void *ptr = acc.get_symbol_address(symbolName); + + if(ptr == nullptr) + { + return ihipLogStatus(hipErrorInvalidSymbol); + } + + hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); + + stream->locked_copySync(ptr, src, count + offset, kind); - ctx->_acc.memcpy_symbol(symbolName, (void*) src,count, offset); -#endif return ihipLogStatus(hipSuccess); } +hipError_t hipMemcpyToSymbolAsync(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind, hipStream_t stream) +{ + HIP_INIT_API(symbolName, src, count, offset, kind, stream); + + if(symbolName == nullptr) + { + return ihipLogStatus(hipErrorInvalidSymbol); + } + + hipError_t e = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + + hc::accelerator acc = ctx->getDevice()->_acc; + + void *ptr = acc.get_symbol_address(symbolName); + + if(ptr == nullptr) + { + return ihipLogStatus(hipErrorInvalidSymbol); + } + + if (stream) { + try { + stream->locked_copyAsync(ptr, src, count + offset, kind); + } + catch (ihipException ex) { + e = ex._code; + } + } else { + e = hipErrorInvalidValue; + } + + return ihipLogStatus(e); + + +} + //--- hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { @@ -705,7 +763,7 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, // TODO - make member function of stream? template hc::completion_future -ihipMemsetKernel(hipStream_t stream, +ihipMemsetKernel(hipStream_t stream, LockedAccessor_StreamCrit_t &crit, T * ptr, T val, size_t sizeBytes) { @@ -930,6 +988,11 @@ hipError_t hipHostFree(void* ptr) return ihipLogStatus(hipStatus); }; +hipError_t hipFreeHost(void* ptr) +{ + return hipHostFree(ptr); +} + hipError_t hipFreeArray(hipArray* array) { HIP_INIT_API(array); @@ -954,16 +1017,7 @@ hipError_t hipFreeArray(hipArray* array) return ihipLogStatus(hipStatus); } -// Stubs of threadfence operations -__device__ void __threadfence_block(void){ - // no-op -} - -__device__ void __threadfence(void){ - // no-op -} __device__ void __threadfence_system(void){ // no-op } - diff --git a/src/hip_module.cpp b/src/hip_module.cpp index f556c85456..c8368abeec 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @@ -118,10 +121,10 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname){ ihipDevice_t *currentDevice = ihipGetDevice(deviceId); std::ifstream in(fname, std::ios::binary | std::ios::ate); - if(!in){ + if(!in.is_open() ){ return ihipLogStatus(hipErrorFileNotFound); - }else{ + } else { *module = new ihipModule_t; size_t size = std::string::size_type(in.tellg()); @@ -248,12 +251,12 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent; void *config[5] = {0}; - size_t kernSize; + size_t kernArgSize; if(extra != NULL){ memcpy(config, extra, sizeof(size_t)*5); if(config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END){ - kernSize = *(size_t*)(config[3]); + kernArgSize = *(size_t*)(config[3]); } else { return ihipLogStatus(hipErrorNotInitialized); } @@ -279,6 +282,33 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, grid_launch_parm lp; hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp, f->_kernelName); +#if USE_DISPATCH_HSA_KERNEL + + hsa_kernel_dispatch_packet_t aql; + + memset(&aql, 0, sizeof(aql)); + + //aql.completion_signal._handle = 0; + //aql.kernarg_address = 0; + + aql.workgroup_size_x = blockDimX; + aql.workgroup_size_y = blockDimY; + aql.workgroup_size_z = blockDimZ; + aql.grid_size_x = blockDimX * gridDimX; + aql.grid_size_y = blockDimY * gridDimY; + aql.grid_size_z = blockDimZ * gridDimZ; + aql.group_segment_size = groupSegmentSize; + aql.private_segment_size = privateSegmentSize; + aql.kernel_object = f->_kernel; + aql.setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + aql.header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | + (1 << HSA_PACKET_HEADER_BARRIER) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); + + lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize); +#else + /* Create signal */ @@ -286,11 +316,13 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, hsa_signal_t signal; status = hsa_signal_create(1, 0, NULL, &signal); + /* Launch AQL packet */ hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ, - gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernSize, f->_kernel); + gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernArgSize, f->_kernel); + /* Wait for signal @@ -298,6 +330,8 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); +#endif // USE_DISPATCH_HSA_KERNEL + ihipPostLaunchKernel(hStream, lp); diff --git a/src/hip_peer.cpp b/src/hip_peer.cpp index c0ebda311d..4f3227de82 100644 --- a/src/hip_peer.cpp +++ b/src/hip_peer.cpp @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index 751ebea12f..9bb615ebf7 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -1,19 +1,22 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ diff --git a/tests/src/deviceLib/hipTestDeviceLimit.cpp b/tests/src/deviceLib/hipTestDeviceLimit.cpp new file mode 100644 index 0000000000..2cf5c3a703 --- /dev/null +++ b/tests/src/deviceLib/hipTestDeviceLimit.cpp @@ -0,0 +1,29 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +int main() +{ + size_t heap; + assert(hipSuccess == hipDeviceGetLimit(&heap, hipLimitMallocHeapSize)); + assert(heap == 4194304); +} diff --git a/tests/src/deviceLib/hipTestDeviceSymbol.cpp b/tests/src/deviceLib/hipTestDeviceSymbol.cpp new file mode 100644 index 0000000000..1158bf3f9d --- /dev/null +++ b/tests/src/deviceLib/hipTestDeviceSymbol.cpp @@ -0,0 +1,75 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#define NUM 1024 +#define SIZE 1024*4 + +#ifdef __HIP_PLATFORM_HCC__ +__attribute__((address_space(1))) int global[NUM]; +#endif + +#ifdef __HIP_PLATFORM_NVCC__ +__device__ int global[NUM]; +#endif + +__global__ void Assign(hipLaunchParm lp, int* Out) +{ + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + Out[tid] = global[tid]; +} + +int main() +{ + int *A, *B, *Ad; + A = new int[NUM]; + B = new int[NUM]; + for(unsigned i=0;i +#include +#include + +#define NUM 1024 +#define SIZE NUM*sizeof(float) + +__global__ void vAdd(hipLaunchParm lp, float *In1, float *In2, float *In3, float *In4, float *Out) +{ + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + In4[tid] = In1[tid] + In2[tid]; + __threadfence(); + In3[tid] = In3[tid] + In4[tid]; + __threadfence_block(); + Out[tid] = In4[tid] + In3[tid]; + +} + +int main(){ + float *In1 = new float[1024]; + float *In2 = new float[1024]; + float *In3 = new float[1024]; + float *In4 = new float[1024]; + float *Out = new float[1024]; + + for(uint32_t i=0;i<1024;i++) + { + In1[i] = 1.0f; + In2[i] = 1.0f; + In3[i] = 1.0f; + In4[i] = 1.0f; + } + + float *In1d, *In2d, *In3d, *In4d, *Outd; + hipMalloc((void**)&In1d, SIZE); + hipMalloc((void**)&In2d, SIZE); + hipMalloc((void**)&In3d, SIZE); + hipMalloc((void**)&In4d, SIZE); + hipMalloc((void**)&Outd, SIZE); + + hipMemcpy(In1d, In1, SIZE, hipMemcpyHostToDevice); + hipMemcpy(In2d, In2, SIZE, hipMemcpyHostToDevice); + hipMemcpy(In3d, In3, SIZE, hipMemcpyHostToDevice); + hipMemcpy(In4d, In4, SIZE, hipMemcpyHostToDevice); + + hipLaunchKernel(vAdd, dim3(32,1,1), dim3(32,1,1), 0, 0, In1d, In2d, In3d, In4d, Outd); + hipMemcpy(Out, Outd, SIZE, hipMemcpyDeviceToHost); + assert(Out[10] == 2*In1[10] + 2*In2[10] + In3[10]); + +} diff --git a/tests/src/hipComplex.cpp b/tests/src/hipComplex.cpp index ab1e67036a..8a153b6bf0 100644 --- a/tests/src/hipComplex.cpp +++ b/tests/src/hipComplex.cpp @@ -18,10 +18,10 @@ THE SOFTWARE. */ -#include +#include #include "hip/hip_runtime.h" #include "hip/hip_runtime_api.h" -#include +#include "hip/hcc_detail/hip_complex.h" #define LEN 64 #define SIZE 64<<2 diff --git a/tests/src/kernel/hipTestMallocKernel.cpp b/tests/src/kernel/hipTestMallocKernel.cpp new file mode 100644 index 0000000000..efd38b5ad2 --- /dev/null +++ b/tests/src/kernel/hipTestMallocKernel.cpp @@ -0,0 +1,60 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#define HIP_ASSERT(status) assert(hipSuccess == status); + +#define NUM 1024 +#define SIZE NUM * 8 + +__global__ void Alloc(hipLaunchParm lp, uint64_t *Ptr) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + Ptr[tid] = (uint64_t)malloc(128); +} + +__global__ void Free(hipLaunchParm lp, uint64_t *Ptr) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + free((void*)Ptr[tid]); +} + +int main() +{ + uint64_t *hPtr, *dPtr; + hPtr = new uint64_t[NUM]; + for(uint32_t i=0;i +#include +#include + +#define LEN8 8 * 4 +#define LEN9 9 * 4 +#define LEN10 10 * 4 +#define LEN11 11 * 4 +#define LEN12 12 * 4 + +__global__ void MemCpy8(hipLaunchParm lp, uint8_t *In, uint8_t *Out) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + memcpy(Out + tid*8, In + tid*8, 8); +} + +__global__ void MemCpy9(hipLaunchParm lp, uint8_t *In, uint8_t *Out) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + memcpy(Out + tid*9, In + tid*9, 9); +} + +__global__ void MemCpy10(hipLaunchParm lp, uint8_t *In, uint8_t *Out) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + memcpy(Out + tid*10, In + tid*10, 10); +} + +__global__ void MemCpy11(hipLaunchParm lp, uint8_t *In, uint8_t *Out) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + memcpy(Out + tid*11, In + tid*11, 11); +} + +__global__ void MemCpy12(hipLaunchParm lp, uint8_t *In, uint8_t *Out) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + memcpy(Out + tid*12, In + tid*12, 12); +} + +__global__ void MemSet8(hipLaunchParm lp, uint8_t *In) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + memset(In + tid*8, 1, 8); +} + +__global__ void MemSet9(hipLaunchParm lp, uint8_t *In) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + memset(In + tid*9, 1, 9); +} + +__global__ void MemSet10(hipLaunchParm lp, uint8_t *In) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + memset(In + tid*10, 1, 10); +} + +__global__ void MemSet11(hipLaunchParm lp, uint8_t *In) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + memset(In + tid*11, 1, 11); +} + +__global__ void MemSet12(hipLaunchParm lp, uint8_t *In) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + memset(In + tid*12, 1, 12); +} + +int main(){ + uint8_t *A, *Ad, *B, *Bd, *C, *Cd; + A = new uint8_t[LEN8]; + B = new uint8_t[LEN8]; + C = new uint8_t[LEN8]; + for(uint32_t i=0;i