Merge branch 'amd-develop' into amd-master
Change-Id: I0e3bbaf5e872fe5c064f0d792c074d13be50e289
This commit is contained in:
+10
-9
@@ -143,7 +143,7 @@ add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER)
|
||||
#############################
|
||||
# Build steps
|
||||
#############################
|
||||
# Rebuild cmake cache updates .buildInfo and .version
|
||||
# Rebuild cmake cache updates .hipInfo and .hipVersion
|
||||
add_custom_target(update_build_and_version_info ALL COMMAND make rebuild_cache)
|
||||
|
||||
# Build clang hipify if enabled
|
||||
@@ -198,12 +198,12 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
set_source_files_properties(${SOURCE_FILES} PROPERTIES OBJECT_DEPENDS ${PROJECT_BINARY_DIR}/hcc_version.txt)
|
||||
add_dependencies(hip_hcc check_hcc_version update_build_and_version_info)
|
||||
|
||||
# Generate .buildInfo
|
||||
file(WRITE "${PROJECT_BINARY_DIR}/.buildInfo" ${_buildInfo})
|
||||
# Generate .hipInfo
|
||||
file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo})
|
||||
endif()
|
||||
|
||||
# Generate .version
|
||||
file(WRITE "${PROJECT_BINARY_DIR}/.version" ${_versionInfo})
|
||||
# Generate .hipVersion
|
||||
file(WRITE "${PROJECT_BINARY_DIR}/.hipVersion" ${_versionInfo})
|
||||
|
||||
# Build doxygen documentation
|
||||
add_custom_target(doc COMMAND HIP_PATH=${CMAKE_CURRENT_SOURCE_DIR} doxygen ${CMAKE_CURRENT_SOURCE_DIR}/docs/doxygen-input/doxy.cfg
|
||||
@@ -219,13 +219,14 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
else()
|
||||
install(TARGETS hip_hcc DESTINATION lib)
|
||||
endif()
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_ir.ll DESTINATION lib)
|
||||
|
||||
# Install .buildInfo
|
||||
install(FILES ${PROJECT_BINARY_DIR}/.buildInfo DESTINATION lib)
|
||||
# Install .hipInfo
|
||||
install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib)
|
||||
endif()
|
||||
|
||||
# Install .version
|
||||
install(FILES ${PROJECT_BINARY_DIR}/.version DESTINATION bin)
|
||||
# Install .hipVersion
|
||||
install(FILES ${PROJECT_BINARY_DIR}/.hipVersion DESTINATION bin)
|
||||
|
||||
# Install src, bin, include & cmake if necessary
|
||||
execute_process(COMMAND test ${CMAKE_INSTALL_PREFIX} -ef ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
|
||||
+18
-49
@@ -18,9 +18,9 @@ After making HIP, don't forget the "make install" step !
|
||||
|
||||
## Adding a new HIP API
|
||||
|
||||
- Add a translation to the bin/hipify tool ; many examples abound.
|
||||
- Add a translation to the hipify-clang tool ; many examples abound.
|
||||
- For stat tracking purposes, place the API into an appropriate stat category ("dev", "mem", "stream", etc).
|
||||
- Add a inlined NVCC implementation for the function in include/nvcc_detail/hip_runtime_api.h.
|
||||
- Add a inlined NVCC implementation for the function in include/hip/nvcc_detail/hip_runtime_api.h.
|
||||
- These are typically headers
|
||||
- Add an HCC definition and Doxygen comments for the function in include/hcc_detail/hip_runtime_api.h
|
||||
- Source implementation typically go in src/hcc_detail/hip_hcc.cpp. The implementation may involve
|
||||
@@ -59,7 +59,7 @@ Selected multilib: .;@m64
|
||||
|
||||
The unix `date` command can print the HCC-format work-week for a specific date , ie:
|
||||
```
|
||||
> date --utc +%y%W%w -d 2015-11-09
|
||||
> date --utc +%y%U%w -d 2015-11-09
|
||||
15451
|
||||
```
|
||||
|
||||
@@ -125,60 +125,29 @@ Differences or limitations of HIP APIs as compared to CUDA APIs should be clearl
|
||||
is used by the GetLastError and PeekLastError functions - if a HIP API simply returns, then the error will not be logged correctly.
|
||||
|
||||
|
||||
|
||||
#### Presubmit Testing:
|
||||
Before checking in or submitting a pull request, run all Rodinia tests and ensure pass results match starting point:
|
||||
Before checking in or submitting a pull request, run all directed tests (see tests/README.md) and all Rodinia tests.
|
||||
Ensure pass results match starting point:
|
||||
|
||||
```shell
|
||||
> cd examples/
|
||||
> ./run_all.sh
|
||||
```
|
||||
|
||||
Recent results :
|
||||
|
||||
```
|
||||
hip2/examples/rodinia_3.0/hip$ make test
|
||||
--TESTING: b+tree
|
||||
executing: ../../test/b+tree/run0.cmd... PASSED!
|
||||
--TESTING: backprop
|
||||
executing: ../../test/backprop/run0.cmd... PASSED!
|
||||
--TESTING: bfs
|
||||
executing: ../../test/bfs/run0.cmd... PASSED!
|
||||
executing: ../../test/bfs/run1.cmd... PASSED!
|
||||
--TESTING: cfd
|
||||
executing: ../../test/cfd/run0.cmd... PASSED!
|
||||
executing: ../../test/cfd/run1.cmd... PASSED!
|
||||
--TESTING: gaussian
|
||||
executing: ../../test/gaussian/run0.cmd... PASSED!
|
||||
--TESTING: heartwall
|
||||
executing: ../../test/heartwall/run0.cmd... PASSED!
|
||||
--TESTING: hotspot
|
||||
executing: ../../test/hotspot/run0.cmd... PASSED!
|
||||
--TESTING: kmeans
|
||||
executing: ../../test/kmeans/run0.cmd... PASSED!
|
||||
executing: ../../test/kmeans/run1.cmd... PASSED!
|
||||
executing: ../../test/kmeans/run2.cmd... PASSED!
|
||||
executing: ../../test/kmeans/run3.cmd... PASSED!
|
||||
--TESTING: lavaMD
|
||||
executing: ../../test/lavaMD/run0.cmd... PASSED!
|
||||
executing: ../../test/lavaMD/run1.cmd... PASSED!
|
||||
executing: ../../test/lavaMD/run2.cmd... PASSED!
|
||||
executing: ../../test/lavaMD/run3.cmd... PASSED!
|
||||
executing: ../../test/lavaMD/run4.cmd... PASSED!
|
||||
--TESTING: lud
|
||||
executing: ../../test/lud/run0.cmd... PASSED!
|
||||
--TESTING: myocyte
|
||||
executing: ../../test/myocyte/run0.cmd... PASSED!
|
||||
--TESTING: nn
|
||||
executing: ../../test/nn/run0.cmd... PASSED!
|
||||
--TESTING: nw
|
||||
executing: ../../test/nw/run0.cmd... PASSED!
|
||||
--TESTING: pathfinder
|
||||
executing: ../../test/pathfinder/run0.cmd... PASSED!
|
||||
--TESTING: srad
|
||||
executing: ../../test/srad/run0.cmd... PASSED!
|
||||
--TESTING: streamcluster
|
||||
executing: ../../test/streamcluster/run0.cmd... PASSED!
|
||||
```
|
||||
#### Checkin messages
|
||||
Follow existing best practice for writing a good Git commit message. Some tips:
|
||||
http://chris.beams.io/posts/git-commit/
|
||||
https://robots.thoughtbot.com/5-useful-tips-for-a-better-commit-message
|
||||
|
||||
In particular :
|
||||
- Use imperative voice, ie "Fix this bug", "Refactor the XYZ routine", "Update the doc".
|
||||
Not : "Fixing the bug", "Fixed the bug", "Bug fix", etc.
|
||||
- Subject should summarize the commit. Do not end subject with a period. Use a blank line
|
||||
after the subject.
|
||||
|
||||
|
||||
|
||||
## Doxygen Editing Guidelines
|
||||
|
||||
|
||||
+10
-2
@@ -52,9 +52,9 @@ $verbose = $ENV{'HIPCC_VERBOSE'} // 0;
|
||||
$HIP_PATH=$ENV{'HIP_PATH'} // dirname (dirname $0); # use parent directory of hipcc
|
||||
|
||||
#---
|
||||
# Read .buildInfo
|
||||
# Read .hipInfo
|
||||
my %hipConfig = ();
|
||||
parse_config_file("$HIP_PATH/lib/.buildInfo", \%hipConfig);
|
||||
parse_config_file("$HIP_PATH/lib/.hipInfo", \%hipConfig);
|
||||
|
||||
#---
|
||||
#HIP_PLATFORM controls whether to use NVCC or HCC for compilation:
|
||||
@@ -119,6 +119,9 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
if ($ROCM_TARGET eq "hawaii") {
|
||||
$HIPLDFLAGS .= " -amdgpu-target=AMD:AMDGPU:7:0:1";
|
||||
}
|
||||
if ($ROCM_TARGET eq "polaris") {
|
||||
$HIPLDFLAGS .= " -amdgpu-target=AMD:AMDGPU:8:0:3";
|
||||
}
|
||||
|
||||
# Add trace marker library:
|
||||
# TODO - once we cleanly separate the HIP API headers from HIP library headers this logic should move to CMakebuild option - apps do not need to see the marker library.
|
||||
@@ -196,6 +199,11 @@ if($HIP_PLATFORM eq "hcc"){
|
||||
}
|
||||
}
|
||||
|
||||
if(($HIP_PLATFORM eq "hcc") and defined $ENV{HIP_EXPERIMENTAL}){
|
||||
$EXPORT_LL=" ";
|
||||
$ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_ir.ll\n";
|
||||
}
|
||||
|
||||
if($HIP_PLATFORM eq "nvcc"){
|
||||
$ISACMD .= "$HIP_PATH/bin/hipcc -ptx ";
|
||||
if($ARGV[0] eq "--genco"){
|
||||
|
||||
+5
-4
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
$HIP_BASE_VERSION_MAJOR = "0";
|
||||
$HIP_BASE_VERSION_MINOR = "92";
|
||||
$HIP_BASE_VERSION_MAJOR = "1";
|
||||
$HIP_BASE_VERSION_MINOR = "0";
|
||||
|
||||
# Need perl > 5.10 to use logic-defined or
|
||||
use 5.006; use v5.10.1;
|
||||
@@ -86,9 +86,9 @@ if ($HIP_PLATFORM eq "nvcc") {
|
||||
};
|
||||
|
||||
#---
|
||||
# Read .version
|
||||
# Read .hipVersion
|
||||
my %hipVersion = ();
|
||||
parse_config_file("$HIP_PATH/bin/.version", \%hipVersion);
|
||||
parse_config_file("$HIP_PATH/bin/.hipVersion", \%hipVersion);
|
||||
$HIP_VERSION_MAJOR = $hipVersion{'HIP_VERSION_MAJOR'} // $HIP_BASE_VERSION_MAJOR;
|
||||
$HIP_VERSION_MINOR = $hipVersion{'HIP_VERSION_MINOR'} // $HIP_BASE_VERSION_MINOR;
|
||||
$HIP_VERSION_PATCH = $hipVersion{'HIP_VERSION_PATCH'} // "0";
|
||||
@@ -129,6 +129,7 @@ if (!$printed or $p_full) {
|
||||
print ("HSA_PATH : $HSA_PATH\n");
|
||||
print ("HCC_HOME : $HCC_HOME\n");
|
||||
system("$HCC_HOME/bin/hcc --version");
|
||||
system("$HCC_HOME/compiler/bin/llc --version");
|
||||
print ("HCC-cxxflags : ");
|
||||
system("$HCC_HOME/bin/hcc-config --cxxflags");
|
||||
print ("HCC-ldflags : ");
|
||||
|
||||
Executable → Regular
@@ -2,17 +2,17 @@
|
||||
|
||||
| **CUDA** | **HIP** | **CUDA description** |
|
||||
|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `cudaChooseDevice` | | Select compute-device which best matches criteria. |
|
||||
| `cudaChooseDevice` | `hipChooseDevice` | Select compute-device which best matches criteria. |
|
||||
| `cudaDeviceGetAttribute` | `hipDeviceGetAttribute` | Returns information about the device. |
|
||||
| `cudaDeviceGetByPCIBusId` | | Returns a handle to a compute device. |
|
||||
| `cudaDeviceGetCacheConfig` | `hipDeviceGetCacheConfig` | Returns the preferred cache configuration for the current device. |
|
||||
| `cudaDeviceGetLimit` | | Returns resource limits. |
|
||||
| `cudaDeviceGetLimit` | `hipDeviceGetLimit` | Returns resource limits. |
|
||||
| `cudaDeviceGetPCIBusId` | | Returns a PCI Bus Id string for the device. |
|
||||
| `cudaDeviceGetSharedMemConfig` | `hipDeviceGetSharedMemConfig` | Returns the shared memory configuration for the current device. |
|
||||
| `cudaDeviceGetStreamPriorityRange` | | Returns numerical values that correspond to the least and greatest stream priorities. |
|
||||
| `cudaDeviceReset` | `hipDeviceReset` | Destroy all allocations and reset all state on the current device in the current process. |
|
||||
| `cudaDeviceSetCacheConfig` | `hipDeviceSetCacheConfig` | Sets the preferred cache configuration for the current device. |
|
||||
| `cudaDeviceSetLimit` | | Set resource limits. |
|
||||
| `cudaDeviceSetLimit` | `hipDeviceSetLimit` | Set resource limits. |
|
||||
| `cudaDeviceSetSharedMemConfig` | `hipDeviceSetSharedMemConfig` | Sets the shared memory configuration for the current device. |
|
||||
| `cudaDeviceSynchronize` | `hipDeviceSynchronize` | Wait for compute device to finish. |
|
||||
| `cudaGetDevice` | `hipGetDevice` | Returns which device is currently being used. |
|
||||
|
||||
Executable → Regular
@@ -635,6 +635,17 @@ struct cuda2hipMap {
|
||||
cuda2hipRename["cudaSharedMemBankSizeFourByte"] = {"hipSharedMemBankSizeFourByte", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaSharedMemBankSizeEightByte"] = {"hipSharedMemBankSizeEightByte", CONV_DEV, API_RUNTIME};
|
||||
|
||||
// Limits
|
||||
cuda2hipRename["cudaLimit"] = {"hipLimit_t", CONV_DEV, API_RUNTIME};
|
||||
// unsupported yet
|
||||
//cuda2hipRename["cudaLimitStackSize"] = {"hipLimitStackSize", CONV_DEV, API_RUNTIME};
|
||||
//cuda2hipRename["cudaLimitPrintfFifoSize"] = {"hipLimitPrintfFifoSize", CONV_DEV, API_RUNTIME};
|
||||
// unsupported yet
|
||||
cuda2hipRename["cudaLimitMallocHeapSize"] = {"hipLimitMallocHeapSize", CONV_DEV, API_RUNTIME};
|
||||
//cuda2hipRename["cudaLimitDevRuntimeSyncDepth"] = {"hipLimitPrintfFifoSize", CONV_DEV, API_RUNTIME};
|
||||
//cuda2hipRename["cudaLimitDevRuntimePendingLaunchCount"] = {"hipLimitMallocHeapSize", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaDeviceGetLimit"] = {"hipDeviceGetLimit", CONV_DEV, API_RUNTIME};
|
||||
|
||||
// Profiler
|
||||
// unsupported yet
|
||||
//cuda2hipRename["cudaProfilerInitialize"] = {"hipProfilerInitialize", CONV_OTHER, API_RUNTIME};
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HCC_H
|
||||
#define HCC_H
|
||||
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HCC_ACC_H
|
||||
#define HCC_ACC_H
|
||||
#include "hip/hip_runtime_api.h"
|
||||
|
||||
@@ -1,258 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <hip_runtime_api.h>
|
||||
#include <hcblas.h>
|
||||
|
||||
//HGSOS for Kalmar leave it as C++, only cublas needs C linkage.
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef hcblasHandle_t* hipblasHandle_t;
|
||||
typedef hcComplex hipComplex ;
|
||||
|
||||
static hipblasHandle_t dummyGlobal;
|
||||
|
||||
/* Unsupported types
|
||||
"cublasFillMode_t",
|
||||
"cublasDiagType_t",
|
||||
"cublasSideMode_t",
|
||||
"cublasPointerMode_t",
|
||||
"cublasAtomicsMode_t",
|
||||
"cublasDataType_t"
|
||||
*/
|
||||
|
||||
inline static hcblasOperation_t hipOperationToHCCOperation( hipblasOperation_t op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case HIPBLAS_OP_N:
|
||||
return HCBLAS_OP_N;
|
||||
|
||||
case HIPBLAS_OP_T:
|
||||
return HCBLAS_OP_T;
|
||||
|
||||
case HIPBLAS_OP_C:
|
||||
return HCBLAS_OP_C;
|
||||
|
||||
default:
|
||||
throw "Non existent OP";
|
||||
}
|
||||
}
|
||||
|
||||
inline static hipblasOperation_t HCCOperationToHIPOperation( hcblasOperation_t op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case HCBLAS_OP_N :
|
||||
return HIPBLAS_OP_N;
|
||||
|
||||
case HCBLAS_OP_T :
|
||||
return HIPBLAS_OP_T;
|
||||
|
||||
case HCBLAS_OP_C :
|
||||
return HIPBLAS_OP_C;
|
||||
|
||||
default:
|
||||
throw "Non existent OP";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline static hipblasStatus_t hipHCBLASStatusToHIPStatus(hcblasStatus_t hcStatus)
|
||||
{
|
||||
switch(hcStatus)
|
||||
{
|
||||
case HCBLAS_STATUS_SUCCESS:
|
||||
return HIPBLAS_STATUS_SUCCESS;
|
||||
case HCBLAS_STATUS_NOT_INITIALIZED:
|
||||
return HIPBLAS_STATUS_NOT_INITIALIZED;
|
||||
case HCBLAS_STATUS_ALLOC_FAILED:
|
||||
return HIPBLAS_STATUS_ALLOC_FAILED;
|
||||
case HCBLAS_STATUS_INVALID_VALUE:
|
||||
return HIPBLAS_STATUS_INVALID_VALUE;
|
||||
case HCBLAS_STATUS_MAPPING_ERROR:
|
||||
return HIPBLAS_STATUS_MAPPING_ERROR;
|
||||
case HCBLAS_STATUS_EXECUTION_FAILED:
|
||||
return HIPBLAS_STATUS_EXECUTION_FAILED;
|
||||
case HCBLAS_STATUS_INTERNAL_ERROR:
|
||||
return HIPBLAS_STATUS_INTERNAL_ERROR;
|
||||
default:
|
||||
throw "Unimplemented status";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline static hipblasStatus_t hipblasCreate(hipblasHandle_t* handle) {
|
||||
hipblasStatus_t retVal = hipHCBLASStatusToHIPStatus(hcblasCreate(*handle));
|
||||
dummyGlobal = *handle;
|
||||
return retVal;
|
||||
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDestroy(hipblasHandle_t& handle) {
|
||||
return hipHCBLASStatusToHIPStatus(hcblasDestroy(handle));
|
||||
}
|
||||
|
||||
//note: no handle
|
||||
inline static hipblasStatus_t hipblasSetVector(int n, int elemSize, const void *x, int incx, void *y, int incy){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSetVector(dummyGlobal, n, elemSize, x, incx, y, incy)); //HGSOS no need for handle moving forward
|
||||
}
|
||||
|
||||
//note: no handle
|
||||
inline static hipblasStatus_t hipblasGetVector(int n, int elemSize, const void *x, int incx, void *y, int incy){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasGetVector(dummyGlobal, n, elemSize, x, incx, y, incy)); //HGSOS no need for handle
|
||||
}
|
||||
|
||||
//note: no handle
|
||||
inline static hipblasStatus_t hipblasSetMatrix(int rows, int cols, int elemSize, const void *A, int lda, void *B, int ldb){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSetMatrix(dummyGlobal, rows, cols, elemSize, A, lda, B, ldb));
|
||||
}
|
||||
|
||||
//note: no handle
|
||||
inline static hipblasStatus_t hipblasGetMatrix(int rows, int cols, int elemSize, const void *A, int lda, void *B, int ldb){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasGetMatrix(dummyGlobal, rows, cols, elemSize, A, lda, B, ldb));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSasum(hipblasHandle_t handle, int n, float *x, int incx, float *result){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSasum(handle, n, x, incx, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDasum(hipblasHandle_t handle, int n, double *x, int incx, double *result){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasDasum(handle, n, x, incx, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSasumBatched(hipblasHandle_t handle, int n, float *x, int incx, float *result, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSasumBatched( handle, n, x, incx, result, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDasumBatched(hipblasHandle_t handle, int n, double *x, int incx, double *result, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasDasumBatched(handle, n, x, incx, result, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSaxpy(hipblasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy) {
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSaxpy(handle, n, alpha, x, incx, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSaxpyBatched(hipblasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSaxpyBatched(handle, n, alpha, x, incx, y, incy, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasScopy(hipblasHandle_t handle, int n, const float *x, int incx, float *y, int incy){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasScopy( handle, n, x, incx, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDcopy(hipblasHandle_t handle, int n, const double *x, int incx, double *y, int incy){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasDcopy( handle, n, x, incx, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasScopyBatched(hipblasHandle_t handle, int n, const float *x, int incx, float *y, int incy, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasScopyBatched( handle, n, x, incx, y, incy, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDcopyBatched(hipblasHandle_t handle, int n, const double *x, int incx, double *y, int incy, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasDcopyBatched( handle, n, x, incx, y, incy, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSdot (hipblasHandle_t handle, int n, const float *x, int incx, const float *y, int incy, float *result){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSdot(handle, n, x, incx, y, incy, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDdot (hipblasHandle_t handle, int n, const double *x, int incx, const double *y, int incy, double *result){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasDdot(handle, n, x, incx, y, incy, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSdotBatched (hipblasHandle_t handle, int n, const float *x, int incx, const float *y, int incy, float *result, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSdotBatched(handle, n, x, incx, y, incy, result, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDdotBatched (hipblasHandle_t handle, int n, const double *x, int incx, const double *y, int incy, double *result, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasDdotBatched ( handle, n, x, incx, y, incy, result, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSscal(hipblasHandle_t handle, int n, const float *alpha, float *x, int incx){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSscal(handle, n, alpha, x, incx));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDscal(hipblasHandle_t handle, int n, const double *alpha, double *x, int incx){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasDscal(handle, n, alpha, x, incx));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSscalBatched(hipblasHandle_t handle, int n, const float *alpha, float *x, int incx, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSscalBatched(handle, n, alpha, x, incx, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDscalBatched(hipblasHandle_t handle, int n, const double *alpha, double *x, int incx, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasDscalBatched(handle, n, alpha, x, incx, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, float *A, int lda,
|
||||
float *x, int incx, const float *beta, float *y, int incy){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSgemv(handle, hipOperationToHCCOperation(trans), m, n, alpha, A, lda, x, incx, beta, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSgemvBatched(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, float *A, int lda,
|
||||
float *x, int incx, const float *beta, float *y, int incy, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSgemvBatched(handle, hipOperationToHCCOperation(trans), m, n, alpha, A, lda, x, incx, beta, y, incy, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSger(hipblasHandle_t handle, int m, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *A, int lda){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSger(handle, m, n, alpha, x, incx, y, incy, A, lda));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSgerBatched(hipblasHandle_t handle, int m, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *A, int lda, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSgerBatched(handle, m, n, alpha, x, incx, y, incy, A, lda, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb,
|
||||
int m, int n, int k, const float *alpha, float *A, int lda, float *B, int ldb, const float *beta, float *C, int ldc){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSgemm( handle, hipOperationToHCCOperation(transa), hipOperationToHCCOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasCgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb,
|
||||
int m, int n, int k, const hipComplex *alpha, hipComplex *A, int lda, hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasCgemm( handle, hipOperationToHCCOperation(transa), hipOperationToHCCOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb,
|
||||
int m, int n, int k, const float *alpha, float *A, int lda, float *B, int ldb, const float *beta, float *C, int ldc, int batchCount){
|
||||
return hipHCBLASStatusToHIPStatus(hcblasSgemmBatched( handle, hipOperationToHCCOperation(transa), hipOperationToHCCOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb,
|
||||
int m, int n, int k, const hipComplex *alpha, hipComplex *A, int lda, hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc, int batchCount){
|
||||
return HIPBLAS_STATUS_NOT_SUPPORTED;
|
||||
//return hipHCBLASStatusToHIPStatus(hcblasCgemmBatched( handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,23 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef HIPCOMPLEX_H
|
||||
#define HIPCOMPLEX_H
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@@ -34,13 +37,6 @@ typedef struct __attribute__((aligned(4))){
|
||||
typedef __half half;
|
||||
typedef __half2 half2;
|
||||
|
||||
typedef struct{
|
||||
union{
|
||||
float f;
|
||||
unsigned u;
|
||||
};
|
||||
} struct_float;
|
||||
|
||||
/*
|
||||
Arithmetic functions
|
||||
*/
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Link errors represented as this:Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@@ -29,7 +32,7 @@ THE SOFTWARE.
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
#endif
|
||||
|
||||
// #define USE_MEMCPYTOSYMBOL
|
||||
#define USE_DISPATCH_HSA_KERNEL 0
|
||||
//
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
|
||||
Executable → Regular
+28
-38
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file hcc_detail/hip_runtime.h
|
||||
* @brief Contains definitions of APIs for HIP runtime.
|
||||
@@ -533,7 +534,8 @@ __device__ float __dsqrt_rz(double x);
|
||||
*
|
||||
* @warning __threadfence_block is a stub and map to no-op.
|
||||
*/
|
||||
__device__ void __threadfence_block(void);
|
||||
// __device__ void __threadfence_block(void);
|
||||
extern "C" __device__ void __threadfence_block(void);
|
||||
|
||||
/**
|
||||
* @brief threadfence makes wirtes visible to other threads running on same GPU.
|
||||
@@ -544,7 +546,8 @@ __device__ void __threadfence_block(void);
|
||||
*
|
||||
* @warning __threadfence is a stub and map to no-op, application should set "export HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches.
|
||||
*/
|
||||
__device__ void __threadfence(void) __attribute__((deprecated("Provided for compile-time compatibility, not yet functional")));
|
||||
// __device__ void __threadfence(void) __attribute__((deprecated("Provided for compile-time compatibility, not yet functional")));
|
||||
extern "C" __device__ void __threadfence(void);
|
||||
|
||||
/**
|
||||
* @brief threadfence_system makes writes to pinned system memory visible on host CPU.
|
||||
@@ -583,55 +586,42 @@ __device__ void __threadfence_system(void) __attribute__((deprecated("Provided
|
||||
// loop unrolling
|
||||
__device__ static inline void* memcpy(void* dst, void* src, size_t size)
|
||||
{
|
||||
uint64_t i = 0;
|
||||
uint64_t totalLength = size/sizeof(uint32_t);
|
||||
for(i=hipThreadIdx_x+hipBlockIdx_x*hipBlockDim_x;
|
||||
i<(totalLength/4);
|
||||
i = i + hipBlockDim_x * hipGridDim_x)
|
||||
{
|
||||
((uint32_t*)dst)[4*i] = ((uint32_t*)src)[4*i];
|
||||
((uint32_t*)dst)[4*i+1] = ((uint32_t*)src)[4*i+1];
|
||||
((uint32_t*)dst)[4*i+2] = ((uint32_t*)src)[4*i+2];
|
||||
((uint32_t*)dst)[4*i+3] = ((uint32_t*)src)[4*i+3];
|
||||
}
|
||||
if(4*i < totalLength){
|
||||
((uint32_t*)dst)[4*i] = ((uint32_t*)src)[4*i];
|
||||
((uint32_t*)dst)[4*i+1] = ((uint32_t*)src)[4*i+1];
|
||||
((uint32_t*)dst)[4*i+2] = ((uint32_t*)src)[4*i+2];
|
||||
((uint32_t*)dst)[4*i+3] = ((uint32_t*)src)[4*i+3];
|
||||
|
||||
uint8_t *dstPtr, *srcPtr;
|
||||
dstPtr = (uint8_t*)dst;
|
||||
srcPtr = (uint8_t*)src;
|
||||
for(uint32_t i=0;i<size;i++) {
|
||||
dstPtr[i] = srcPtr[i];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
__device__ static inline void* memset(void* ptr, uint8_t val, size_t size)
|
||||
{
|
||||
uint32_t _val = 0;
|
||||
_val = (val | val << 8 | val << 16 | val << 24);
|
||||
uint64_t totalLength = size/sizeof(uint32_t);
|
||||
uint64_t i = 0;
|
||||
for(i=hipThreadIdx_x+hipBlockIdx_x*hipBlockDim_x;
|
||||
i<(totalLength/4);
|
||||
i = i + hipBlockDim_x * hipGridDim_x)
|
||||
{
|
||||
((uint32_t*)ptr)[4*i] = _val;
|
||||
((uint32_t*)ptr)[4*i+1] = _val;
|
||||
((uint32_t*)ptr)[4*i+2] = _val;
|
||||
((uint32_t*)ptr)[4*i+3] = _val;
|
||||
}
|
||||
if(4*i < totalLength){
|
||||
((uint32_t*)ptr)[4*i] = _val;
|
||||
((uint32_t*)ptr)[4*i+1] = _val;
|
||||
((uint32_t*)ptr)[4*i+2] = _val;
|
||||
((uint32_t*)ptr)[4*i+3] = _val;
|
||||
|
||||
uint8_t *dstPtr;
|
||||
dstPtr = (uint8_t*)ptr;
|
||||
for(uint32_t i=0;i<size;i++) {
|
||||
dstPtr[i] = val;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
extern "C" __device__ void* __hip_hc_malloc(size_t);
|
||||
extern "C" __device__ void* __hip_hc_free(void *ptr);
|
||||
|
||||
__device__ static inline void* malloc(size_t size)
|
||||
{
|
||||
return __hip_hc_malloc(size);
|
||||
}
|
||||
|
||||
__device__ static inline void* free(void *ptr)
|
||||
{
|
||||
return __hip_hc_free(ptr);
|
||||
}
|
||||
|
||||
#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
|
||||
|
||||
#define HIP_KERNEL_NAME(...) __VA_ARGS__
|
||||
#define HIP_SYMBOL(X) #X
|
||||
|
||||
#ifdef __HCC_CPP__
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr);
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//#pragma once
|
||||
#ifndef HIP_RUNTIME_API_H
|
||||
#define HIP_RUNTIME_API_H
|
||||
@@ -60,6 +61,10 @@ typedef void* hipDeviceptr_t;
|
||||
|
||||
typedef struct ihipEvent_t *hipEvent_t;
|
||||
|
||||
enum hipLimit_t
|
||||
{
|
||||
hipLimitMallocHeapSize = 0x02,
|
||||
};
|
||||
|
||||
/**
|
||||
* @addtogroup GlobalDefs More
|
||||
@@ -97,6 +102,7 @@ typedef struct ihipEvent_t *hipEvent_t;
|
||||
#define hipDeviceMapHost 0x8
|
||||
#define hipDeviceLmemResizeToMax 0x16
|
||||
|
||||
|
||||
/**
|
||||
* @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored.
|
||||
*/
|
||||
@@ -323,6 +329,18 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig );
|
||||
*/
|
||||
hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig );
|
||||
|
||||
/**
|
||||
* @brief Get Resource limits of current device
|
||||
*
|
||||
* @param [out] pValue
|
||||
* @param [in] limit
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue
|
||||
* Note: Currently, only hipLimitMallocHeapSize is available
|
||||
*
|
||||
*/
|
||||
hipError_t hipDeviceGetLimit(size_t *pValue, hipLimit_t limit);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Set Cache configuration for a specific function
|
||||
@@ -781,6 +799,18 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
|
||||
*/
|
||||
hipError_t hipMalloc(void** ptr, size_t size) ;
|
||||
|
||||
/**
|
||||
* @brief Allocate pinned host memory [Deprecated]
|
||||
*
|
||||
* @param[out] ptr Pointer to the allocated host pinned memory
|
||||
* @param[in] size Requested memory size
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorMemoryAllocation
|
||||
*
|
||||
* @deprecated use hipHostMalloc() instead
|
||||
*/
|
||||
hipError_t hipMallocHost(void** ptr, size_t size) __attribute__((deprecated("use hipHostMalloc instead"))) ;
|
||||
|
||||
/**
|
||||
* @brief Allocate device accessible page locked host memory
|
||||
*
|
||||
@@ -794,6 +824,19 @@ hipError_t hipMalloc(void** ptr, size_t size) ;
|
||||
*/
|
||||
hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) ;
|
||||
|
||||
/**
|
||||
* @brief Allocate device accessible page locked host memory [Deprecated]
|
||||
*
|
||||
* @param[out] ptr Pointer to the allocated host pinned memory
|
||||
* @param[in] size Requested memory size
|
||||
* @param[in] flags Type of host memory allocation
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorMemoryAllocation
|
||||
*
|
||||
* @deprecated use hipHostMalloc() instead
|
||||
*/
|
||||
hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) __attribute__((deprecated("use hipHostMalloc instead"))) ;
|
||||
|
||||
/**
|
||||
* @brief Get Device pointer from Host Pointer allocated through hipHostMalloc
|
||||
*
|
||||
@@ -892,6 +935,17 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
|
||||
*/
|
||||
hipError_t hipFree(void* ptr);
|
||||
|
||||
/**
|
||||
* @brief Free memory allocated by the hcc hip host memory allocation API. [Deprecated]
|
||||
*
|
||||
* @param[in] ptr Pointer to memory to be freed
|
||||
* @return #hipSuccess,
|
||||
* #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with hipMalloc)
|
||||
|
||||
* @deprecated use hipHostFree() instead
|
||||
*/
|
||||
hipError_t hipFreeHost(void* ptr) __attribute__((deprecated("use hipHostFree instead")));
|
||||
|
||||
/**
|
||||
* @brief Free memory allocated by the hcc hip host memory allocation API
|
||||
* This API performs an implicit hipDeviceSynchronize() call.
|
||||
@@ -1026,6 +1080,27 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz
|
||||
hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area pointed to by @p offset bytes from the start of symbol @p symbol
|
||||
*
|
||||
* The memory areas may not overlap. Symbol can either be a variable that resides in global or constant memory space, or it can be a character string,
|
||||
* naming a variable that resides in global or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice
|
||||
* hipMemcpyToSymbolAsync() is asynchronous with respect to the host, so the call may return before copy is complete.
|
||||
* TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use hipErrorUnknown for now.
|
||||
*
|
||||
* @param[in] symbolName - Symbol destination on device
|
||||
* @param[in] src - Data being copy from
|
||||
* @param[in] sizeBytes - Data size in bytes
|
||||
* @param[in] offset - Offset from start of symbol in bytes
|
||||
* @param[in] kind - Type of transfer
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
|
||||
*
|
||||
* @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol, hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, hipMemcpyFromSymbolAsync
|
||||
*/
|
||||
hipError_t hipMemcpyToSymbolAsync(const char* symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Copy data from src to dst asynchronously.
|
||||
*
|
||||
|
||||
@@ -1,23 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef HIP_UTIL_H
|
||||
#define HIP_UTIL_H
|
||||
|
||||
|
||||
@@ -1,416 +1,416 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file hcc_detail/hip_vector_types.h
|
||||
* @brief Defines the different newt vector types for HIP runtime.
|
||||
*/
|
||||
|
||||
#ifndef HIP_VECTOR_TYPES_H
|
||||
#define HIP_VECTOR_TYPES_H
|
||||
|
||||
#if defined (__HCC__) && (__hcc_workweek__ < 16032)
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
#endif
|
||||
|
||||
#if __HCC__
|
||||
#include <hc_short_vector.hpp>
|
||||
|
||||
using namespace hc::short_vector;
|
||||
|
||||
|
||||
//-- Signed
|
||||
// Define char vector types
|
||||
typedef hc::short_vector::char1 char1;
|
||||
typedef hc::short_vector::char2 char2;
|
||||
typedef hc::short_vector::char3 char3;
|
||||
typedef hc::short_vector::char4 char4;
|
||||
|
||||
// Define short vector types
|
||||
typedef hc::short_vector::short1 short1;
|
||||
typedef hc::short_vector::short2 short2;
|
||||
typedef hc::short_vector::short3 short3;
|
||||
typedef hc::short_vector::short4 short4;
|
||||
|
||||
// Define int vector types
|
||||
typedef hc::short_vector::int1 int1;
|
||||
typedef hc::short_vector::int2 int2;
|
||||
typedef hc::short_vector::int3 int3;
|
||||
typedef hc::short_vector::int4 int4;
|
||||
|
||||
// Define long vector types
|
||||
typedef hc::short_vector::long1 long1;
|
||||
typedef hc::short_vector::long2 long2;
|
||||
typedef hc::short_vector::long3 long3;
|
||||
typedef hc::short_vector::long4 long4;
|
||||
|
||||
// Define longlong vector types
|
||||
typedef hc::short_vector::longlong1 longlong1;
|
||||
typedef hc::short_vector::longlong2 longlong2;
|
||||
typedef hc::short_vector::longlong3 longlong3;
|
||||
typedef hc::short_vector::longlong4 longlong4;
|
||||
|
||||
|
||||
//-- Unsigned
|
||||
// Define uchar vector types
|
||||
typedef hc::short_vector::uchar1 uchar1;
|
||||
typedef hc::short_vector::uchar2 uchar2;
|
||||
typedef hc::short_vector::uchar3 uchar3;
|
||||
typedef hc::short_vector::uchar4 uchar4;
|
||||
|
||||
// Define ushort vector types
|
||||
typedef hc::short_vector::ushort1 ushort1;
|
||||
typedef hc::short_vector::ushort2 ushort2;
|
||||
typedef hc::short_vector::ushort3 ushort3;
|
||||
typedef hc::short_vector::ushort4 ushort4;
|
||||
|
||||
// Define uint vector types
|
||||
typedef hc::short_vector::uint1 uint1;
|
||||
typedef hc::short_vector::uint2 uint2;
|
||||
typedef hc::short_vector::uint3 uint3;
|
||||
typedef hc::short_vector::uint4 uint4;
|
||||
|
||||
// Define ulong vector types
|
||||
typedef hc::short_vector::ulong1 ulong1;
|
||||
typedef hc::short_vector::ulong2 ulong2;
|
||||
typedef hc::short_vector::ulong3 ulong3;
|
||||
typedef hc::short_vector::ulong4 ulong4;
|
||||
|
||||
// Define ulonglong vector types
|
||||
typedef hc::short_vector::ulonglong1 ulonglong1;
|
||||
typedef hc::short_vector::ulonglong2 ulonglong2;
|
||||
typedef hc::short_vector::ulonglong3 ulonglong3;
|
||||
typedef hc::short_vector::ulonglong4 ulonglong4;
|
||||
|
||||
|
||||
//-- Floating point
|
||||
// Define float vector types
|
||||
typedef hc::short_vector::float1 float1;
|
||||
typedef hc::short_vector::float2 float2;
|
||||
typedef hc::short_vector::float3 float3;
|
||||
typedef hc::short_vector::float4 float4;
|
||||
|
||||
// Define double vector types
|
||||
typedef hc::short_vector::double1 double1;
|
||||
typedef hc::short_vector::double2 double2;
|
||||
typedef hc::short_vector::double3 double3;
|
||||
typedef hc::short_vector::double4 double4;
|
||||
|
||||
#else
|
||||
|
||||
#define __hip_align(name, val, data) \
|
||||
__attribute__((aligned(val))) name \
|
||||
{ data }
|
||||
|
||||
struct __hip_align(char1, 1, signed char x;);
|
||||
struct __hip_align(uchar1, 1, unsigned char x;);
|
||||
|
||||
struct __hip_align(char2, 2, signed char x; signed char y;);
|
||||
struct __hip_align(uchar2, 2, unsigned char x; unsigned char y;);
|
||||
|
||||
struct char3
|
||||
{
|
||||
signed char x, y, z;
|
||||
};
|
||||
|
||||
struct uchar3
|
||||
{
|
||||
unsigned char x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(char4, 4, signed char x; signed char y; signed char z; signed char w;);
|
||||
struct __hip_align(uchar4, 4, unsigned char x; unsigned char y; unsigned char z; unsigned char w;);
|
||||
|
||||
struct __hip_align(short1, 2, signed short x;);
|
||||
struct __hip_align(ushort1, 2, unsigned short x;);
|
||||
|
||||
struct __hip_align(short2, 4, signed short x; signed short y;);
|
||||
struct __hip_align(ushort2, 4, unsigned short x; unsigned short y;);
|
||||
|
||||
struct short3
|
||||
{
|
||||
signed short x, y, z;
|
||||
};
|
||||
|
||||
struct ushort3
|
||||
{
|
||||
unsigned short x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(short4, 8, signed short x; signed short y; signed short z; signed short w;);
|
||||
struct __hip_align(ushort4, 8, unsigned short x; unsigned short y; unsigned short z; unsigned short w;);
|
||||
|
||||
struct __hip_align(int1, 4, signed int x;);
|
||||
struct __hip_align(uint1, 4, unsigned int x;);
|
||||
|
||||
struct __hip_align(int2, 8, signed int x; signed int y;);
|
||||
struct __hip_align(uint2, 8, unsigned int x; unsigned int y;);
|
||||
|
||||
struct int3{
|
||||
signed int x, y, z;
|
||||
};
|
||||
struct uint3{
|
||||
unsigned int x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(int4, 16, signed int x; signed int y; signed int z; signed int w;);
|
||||
struct __hip_align(uint4, 16, unsigned int x; unsigned int y; unsigned int z; unsigned int w;);
|
||||
|
||||
struct __hip_align(long1, 8, long int x;);
|
||||
struct __hip_align(ulong1, 8, unsigned long x;);
|
||||
|
||||
struct __hip_align(long2, 16, long int x; long int y;);
|
||||
struct __hip_align(ulong2, 16, unsigned long x; unsigned long y;);
|
||||
|
||||
struct long3{
|
||||
long int x, y, z;
|
||||
};
|
||||
struct ulong3{
|
||||
unsigned long x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(long4, 32, long int x; long int y; long int z; long int w;);
|
||||
struct __hip_align(ulong4, 32, unsigned long x; unsigned long y; unsigned long z; unsigned long w;);
|
||||
|
||||
struct float1
|
||||
{
|
||||
float x;
|
||||
};
|
||||
|
||||
struct __hip_align(float2, 8, float x; float y;);
|
||||
|
||||
struct float3
|
||||
{
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(float4, 16, float x; float y; float z; float w;);
|
||||
|
||||
struct __hip_align(longlong1, 16, long long int x;);
|
||||
struct __hip_align(ulonglong1, 16, unsigned long long int x;);
|
||||
|
||||
struct __attribute__((aligned(32))) longlong2
|
||||
{
|
||||
long long int x, y;
|
||||
};
|
||||
|
||||
struct __attribute__((aligned(32))) ulonglong2
|
||||
{
|
||||
unsigned long long int x, y;
|
||||
};
|
||||
|
||||
struct longlong3
|
||||
{
|
||||
long long int x, y, z;
|
||||
};
|
||||
|
||||
struct ulonglong3
|
||||
{
|
||||
unsigned long long int x, y, z;
|
||||
};
|
||||
|
||||
struct __attribute__((aligned(64))) longlong4
|
||||
{
|
||||
long long int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __attribute__((aligned(64))) ulonglong4
|
||||
{
|
||||
unsigned long long int x, y, z, w;
|
||||
};
|
||||
|
||||
struct double1
|
||||
{
|
||||
double x;
|
||||
};
|
||||
|
||||
struct __attribute__((aligned(16))) double2
|
||||
{
|
||||
double x, y;
|
||||
};
|
||||
|
||||
struct double3
|
||||
{
|
||||
double x, y, z;
|
||||
};
|
||||
|
||||
struct __attribute__((aligned(32))) double4
|
||||
{
|
||||
double x, y, z, w;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if __HCC__
|
||||
#include"hip/hcc_detail/host_defines.h"
|
||||
#define __HIP_DEVICE__ __device__ __host__
|
||||
#else
|
||||
#define __HIP_DEVICE__
|
||||
#endif
|
||||
|
||||
__HIP_DEVICE__ char1 make_char1(signed char );
|
||||
__HIP_DEVICE__ char2 make_char2(signed char, signed char );
|
||||
__HIP_DEVICE__ char3 make_char3(signed char, signed char, signed char );
|
||||
__HIP_DEVICE__ char4 make_char4(signed char, signed char, signed char, signed char );
|
||||
|
||||
__HIP_DEVICE__ short1 make_short1(short );
|
||||
__HIP_DEVICE__ short2 make_short2(short, short );
|
||||
__HIP_DEVICE__ short3 make_short3(short, short, short );
|
||||
__HIP_DEVICE__ short4 make_short4(short, short, short, short );
|
||||
|
||||
__HIP_DEVICE__ int1 make_int1(int );
|
||||
__HIP_DEVICE__ int2 make_int2(int, int );
|
||||
__HIP_DEVICE__ int3 make_int3(int, int, int );
|
||||
__HIP_DEVICE__ int4 make_int4(int, int, int, int );
|
||||
|
||||
__HIP_DEVICE__ long1 make_long1(long );
|
||||
__HIP_DEVICE__ long2 make_long2(long, long );
|
||||
__HIP_DEVICE__ long3 make_long3(long, long, long );
|
||||
__HIP_DEVICE__ long4 make_long4(long, long, long, long );
|
||||
|
||||
__HIP_DEVICE__ longlong1 make_longlong1(long long );
|
||||
__HIP_DEVICE__ longlong2 make_longlong2(long long, long long );
|
||||
__HIP_DEVICE__ longlong3 make_longlong3(long long, long long, long long );
|
||||
__HIP_DEVICE__ longlong4 make_longlong4(long long, long long, long long, long long );
|
||||
|
||||
__HIP_DEVICE__ uchar1 make_uchar1(unsigned char );
|
||||
__HIP_DEVICE__ uchar2 make_uchar2(unsigned char, unsigned char );
|
||||
__HIP_DEVICE__ uchar3 make_uchar3(unsigned char, unsigned char, unsigned char );
|
||||
__HIP_DEVICE__ uchar4 make_uchar4(unsigned char, unsigned char, unsigned char, unsigned char );
|
||||
|
||||
__HIP_DEVICE__ ushort1 make_ushort1(unsigned short );
|
||||
__HIP_DEVICE__ ushort2 make_ushort2(unsigned short, unsigned short );
|
||||
__HIP_DEVICE__ ushort3 make_ushort3(unsigned short, unsigned short, unsigned short );
|
||||
__HIP_DEVICE__ ushort4 make_ushort4(unsigned short, unsigned short, unsigned short, unsigned short );
|
||||
|
||||
__HIP_DEVICE__ uint1 make_uint1(unsigned int );
|
||||
__HIP_DEVICE__ uint2 make_uint2(unsigned int, unsigned int );
|
||||
__HIP_DEVICE__ uint3 make_uint3(unsigned int, unsigned int, unsigned int );
|
||||
__HIP_DEVICE__ uint4 make_uint4(unsigned int, unsigned int, unsigned int, unsigned int );
|
||||
|
||||
__HIP_DEVICE__ ulong1 make_ulong1(unsigned long );
|
||||
__HIP_DEVICE__ ulong2 make_ulong2(unsigned long, unsigned long );
|
||||
__HIP_DEVICE__ ulong3 make_ulong3(unsigned long, unsigned long, unsigned long );
|
||||
__HIP_DEVICE__ ulong4 make_ulong4(unsigned long, unsigned long, unsigned long, unsigned long );
|
||||
|
||||
__HIP_DEVICE__ ulonglong1 make_ulonglong1(unsigned long long );
|
||||
__HIP_DEVICE__ ulonglong2 make_ulonglong2(unsigned long long, unsigned long long);
|
||||
__HIP_DEVICE__ ulonglong3 make_ulonglong3(unsigned long long, unsigned long long, unsigned long long);
|
||||
__HIP_DEVICE__ ulonglong4 make_ulonglong4(unsigned long long, unsigned long long, unsigned long long, unsigned long long );
|
||||
|
||||
__HIP_DEVICE__ float1 make_float1(float );
|
||||
__HIP_DEVICE__ float2 make_float2(float, float );
|
||||
__HIP_DEVICE__ float3 make_float3(float, float, float );
|
||||
__HIP_DEVICE__ float4 make_float4(float, float, float, float );
|
||||
|
||||
__HIP_DEVICE__ double1 make_double1(double );
|
||||
__HIP_DEVICE__ double2 make_double2(double, double );
|
||||
__HIP_DEVICE__ double3 make_double3(double, double, double );
|
||||
__HIP_DEVICE__ double4 make_double4(double, double, double, double );
|
||||
|
||||
/*
|
||||
///---
|
||||
// Inline functions for creating vector types from basic types
|
||||
#define ONE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x) { VT t; t.x = x; return t; };
|
||||
#define TWO_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y) { VT t; t.x=x; t.y=y; return t; };
|
||||
#define THREE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y, T z) { VT t; t.x=x; t.y=y; t.z=z; return t; };
|
||||
#define FOUR_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y, T z, T w) { VT t; t.x=x; t.y=y; t.z=z; t.w=w; return t; };
|
||||
|
||||
|
||||
//signed:
|
||||
ONE_COMPONENT_ACCESS (signed char, char1);
|
||||
TWO_COMPONENT_ACCESS (signed char, char2);
|
||||
THREE_COMPONENT_ACCESS(signed char, char3);
|
||||
FOUR_COMPONENT_ACCESS (signed char, char4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (short, short1);
|
||||
TWO_COMPONENT_ACCESS (short, short2);
|
||||
THREE_COMPONENT_ACCESS(short, short3);
|
||||
FOUR_COMPONENT_ACCESS (short, short4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (int, int1);
|
||||
TWO_COMPONENT_ACCESS (int, int2);
|
||||
THREE_COMPONENT_ACCESS(int, int3);
|
||||
FOUR_COMPONENT_ACCESS (int, int4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (long int, long1);
|
||||
TWO_COMPONENT_ACCESS (long int, long2);
|
||||
THREE_COMPONENT_ACCESS(long int, long3);
|
||||
FOUR_COMPONENT_ACCESS (long int, long4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (long long int, ulong1);
|
||||
TWO_COMPONENT_ACCESS (long long int, ulong2);
|
||||
THREE_COMPONENT_ACCESS(long long int, ulong3);
|
||||
FOUR_COMPONENT_ACCESS (long long int, ulong4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (long long int, longlong1);
|
||||
TWO_COMPONENT_ACCESS (long long int, longlong2);
|
||||
THREE_COMPONENT_ACCESS(long long int, longlong3);
|
||||
FOUR_COMPONENT_ACCESS (long long int, longlong4);
|
||||
|
||||
|
||||
// unsigned:
|
||||
ONE_COMPONENT_ACCESS (unsigned char, uchar1);
|
||||
TWO_COMPONENT_ACCESS (unsigned char, uchar2);
|
||||
THREE_COMPONENT_ACCESS(unsigned char, uchar3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned char, uchar4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (unsigned short, ushort1);
|
||||
TWO_COMPONENT_ACCESS (unsigned short, ushort2);
|
||||
THREE_COMPONENT_ACCESS(unsigned short, ushort3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned short, ushort4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (unsigned int, uint1);
|
||||
TWO_COMPONENT_ACCESS (unsigned int, uint2);
|
||||
THREE_COMPONENT_ACCESS(unsigned int, uint3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned int, uint4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (unsigned long int, ulong1);
|
||||
TWO_COMPONENT_ACCESS (unsigned long int, ulong2);
|
||||
THREE_COMPONENT_ACCESS(unsigned long int, ulong3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned long int, ulong4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (unsigned long long int, ulong1);
|
||||
TWO_COMPONENT_ACCESS (unsigned long long int, ulong2);
|
||||
THREE_COMPONENT_ACCESS(unsigned long long int, ulong3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned long long int, ulong4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (unsigned long long int, ulonglong1);
|
||||
TWO_COMPONENT_ACCESS (unsigned long long int, ulonglong2);
|
||||
THREE_COMPONENT_ACCESS(unsigned long long int, ulonglong3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned long long int, ulonglong4);
|
||||
|
||||
|
||||
//Floating point
|
||||
ONE_COMPONENT_ACCESS (float, float1);
|
||||
TWO_COMPONENT_ACCESS (float, float2);
|
||||
THREE_COMPONENT_ACCESS(float, float3);
|
||||
FOUR_COMPONENT_ACCESS (float, float4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (double, double1);
|
||||
TWO_COMPONENT_ACCESS (double, double2);
|
||||
THREE_COMPONENT_ACCESS(double, double3);
|
||||
FOUR_COMPONENT_ACCESS (double, double4);
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file hcc_detail/hip_vector_types.h
|
||||
* @brief Defines the different newt vector types for HIP runtime.
|
||||
*/
|
||||
|
||||
#ifndef HIP_VECTOR_TYPES_H
|
||||
#define HIP_VECTOR_TYPES_H
|
||||
|
||||
#if defined (__HCC__) && (__hcc_workweek__ < 16032)
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
#endif
|
||||
|
||||
#if __HCC__
|
||||
#include <hc_short_vector.hpp>
|
||||
|
||||
using namespace hc::short_vector;
|
||||
|
||||
|
||||
//-- Signed
|
||||
// Define char vector types
|
||||
typedef hc::short_vector::char1 char1;
|
||||
typedef hc::short_vector::char2 char2;
|
||||
typedef hc::short_vector::char3 char3;
|
||||
typedef hc::short_vector::char4 char4;
|
||||
|
||||
// Define short vector types
|
||||
typedef hc::short_vector::short1 short1;
|
||||
typedef hc::short_vector::short2 short2;
|
||||
typedef hc::short_vector::short3 short3;
|
||||
typedef hc::short_vector::short4 short4;
|
||||
|
||||
// Define int vector types
|
||||
typedef hc::short_vector::int1 int1;
|
||||
typedef hc::short_vector::int2 int2;
|
||||
typedef hc::short_vector::int3 int3;
|
||||
typedef hc::short_vector::int4 int4;
|
||||
|
||||
// Define long vector types
|
||||
typedef hc::short_vector::long1 long1;
|
||||
typedef hc::short_vector::long2 long2;
|
||||
typedef hc::short_vector::long3 long3;
|
||||
typedef hc::short_vector::long4 long4;
|
||||
|
||||
// Define longlong vector types
|
||||
typedef hc::short_vector::longlong1 longlong1;
|
||||
typedef hc::short_vector::longlong2 longlong2;
|
||||
typedef hc::short_vector::longlong3 longlong3;
|
||||
typedef hc::short_vector::longlong4 longlong4;
|
||||
|
||||
|
||||
//-- Unsigned
|
||||
// Define uchar vector types
|
||||
typedef hc::short_vector::uchar1 uchar1;
|
||||
typedef hc::short_vector::uchar2 uchar2;
|
||||
typedef hc::short_vector::uchar3 uchar3;
|
||||
typedef hc::short_vector::uchar4 uchar4;
|
||||
|
||||
// Define ushort vector types
|
||||
typedef hc::short_vector::ushort1 ushort1;
|
||||
typedef hc::short_vector::ushort2 ushort2;
|
||||
typedef hc::short_vector::ushort3 ushort3;
|
||||
typedef hc::short_vector::ushort4 ushort4;
|
||||
|
||||
// Define uint vector types
|
||||
typedef hc::short_vector::uint1 uint1;
|
||||
typedef hc::short_vector::uint2 uint2;
|
||||
typedef hc::short_vector::uint3 uint3;
|
||||
typedef hc::short_vector::uint4 uint4;
|
||||
|
||||
// Define ulong vector types
|
||||
typedef hc::short_vector::ulong1 ulong1;
|
||||
typedef hc::short_vector::ulong2 ulong2;
|
||||
typedef hc::short_vector::ulong3 ulong3;
|
||||
typedef hc::short_vector::ulong4 ulong4;
|
||||
|
||||
// Define ulonglong vector types
|
||||
typedef hc::short_vector::ulonglong1 ulonglong1;
|
||||
typedef hc::short_vector::ulonglong2 ulonglong2;
|
||||
typedef hc::short_vector::ulonglong3 ulonglong3;
|
||||
typedef hc::short_vector::ulonglong4 ulonglong4;
|
||||
|
||||
|
||||
//-- Floating point
|
||||
// Define float vector types
|
||||
typedef hc::short_vector::float1 float1;
|
||||
typedef hc::short_vector::float2 float2;
|
||||
typedef hc::short_vector::float3 float3;
|
||||
typedef hc::short_vector::float4 float4;
|
||||
|
||||
// Define double vector types
|
||||
typedef hc::short_vector::double1 double1;
|
||||
typedef hc::short_vector::double2 double2;
|
||||
typedef hc::short_vector::double3 double3;
|
||||
typedef hc::short_vector::double4 double4;
|
||||
|
||||
#else
|
||||
|
||||
#define __hip_align(name, val, data) \
|
||||
__attribute__((aligned(val))) name \
|
||||
{ data }
|
||||
|
||||
struct __hip_align(char1, 1, signed char x;);
|
||||
struct __hip_align(uchar1, 1, unsigned char x;);
|
||||
|
||||
struct __hip_align(char2, 2, signed char x; signed char y;);
|
||||
struct __hip_align(uchar2, 2, unsigned char x; unsigned char y;);
|
||||
|
||||
struct char3
|
||||
{
|
||||
signed char x, y, z;
|
||||
};
|
||||
|
||||
struct uchar3
|
||||
{
|
||||
unsigned char x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(char4, 4, signed char x; signed char y; signed char z; signed char w;);
|
||||
struct __hip_align(uchar4, 4, unsigned char x; unsigned char y; unsigned char z; unsigned char w;);
|
||||
|
||||
struct __hip_align(short1, 2, signed short x;);
|
||||
struct __hip_align(ushort1, 2, unsigned short x;);
|
||||
|
||||
struct __hip_align(short2, 4, signed short x; signed short y;);
|
||||
struct __hip_align(ushort2, 4, unsigned short x; unsigned short y;);
|
||||
|
||||
struct short3
|
||||
{
|
||||
signed short x, y, z;
|
||||
};
|
||||
|
||||
struct ushort3
|
||||
{
|
||||
unsigned short x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(short4, 8, signed short x; signed short y; signed short z; signed short w;);
|
||||
struct __hip_align(ushort4, 8, unsigned short x; unsigned short y; unsigned short z; unsigned short w;);
|
||||
|
||||
struct __hip_align(int1, 4, signed int x;);
|
||||
struct __hip_align(uint1, 4, unsigned int x;);
|
||||
|
||||
struct __hip_align(int2, 8, signed int x; signed int y;);
|
||||
struct __hip_align(uint2, 8, unsigned int x; unsigned int y;);
|
||||
|
||||
struct int3{
|
||||
signed int x, y, z;
|
||||
};
|
||||
struct uint3{
|
||||
unsigned int x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(int4, 16, signed int x; signed int y; signed int z; signed int w;);
|
||||
struct __hip_align(uint4, 16, unsigned int x; unsigned int y; unsigned int z; unsigned int w;);
|
||||
|
||||
struct __hip_align(long1, 8, long int x;);
|
||||
struct __hip_align(ulong1, 8, unsigned long x;);
|
||||
|
||||
struct __hip_align(long2, 16, long int x; long int y;);
|
||||
struct __hip_align(ulong2, 16, unsigned long x; unsigned long y;);
|
||||
|
||||
struct long3{
|
||||
long int x, y, z;
|
||||
};
|
||||
struct ulong3{
|
||||
unsigned long x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(long4, 32, long int x; long int y; long int z; long int w;);
|
||||
struct __hip_align(ulong4, 32, unsigned long x; unsigned long y; unsigned long z; unsigned long w;);
|
||||
|
||||
struct float1
|
||||
{
|
||||
float x;
|
||||
};
|
||||
|
||||
struct __hip_align(float2, 8, float x; float y;);
|
||||
|
||||
struct float3
|
||||
{
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(float4, 16, float x; float y; float z; float w;);
|
||||
|
||||
struct __hip_align(longlong1, 16, long long int x;);
|
||||
struct __hip_align(ulonglong1, 16, unsigned long long int x;);
|
||||
|
||||
struct __attribute__((aligned(32))) longlong2
|
||||
{
|
||||
long long int x, y;
|
||||
};
|
||||
|
||||
struct __attribute__((aligned(32))) ulonglong2
|
||||
{
|
||||
unsigned long long int x, y;
|
||||
};
|
||||
|
||||
struct longlong3
|
||||
{
|
||||
long long int x, y, z;
|
||||
};
|
||||
|
||||
struct ulonglong3
|
||||
{
|
||||
unsigned long long int x, y, z;
|
||||
};
|
||||
|
||||
struct __attribute__((aligned(64))) longlong4
|
||||
{
|
||||
long long int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __attribute__((aligned(64))) ulonglong4
|
||||
{
|
||||
unsigned long long int x, y, z, w;
|
||||
};
|
||||
|
||||
struct double1
|
||||
{
|
||||
double x;
|
||||
};
|
||||
|
||||
struct __attribute__((aligned(16))) double2
|
||||
{
|
||||
double x, y;
|
||||
};
|
||||
|
||||
struct double3
|
||||
{
|
||||
double x, y, z;
|
||||
};
|
||||
|
||||
struct __attribute__((aligned(32))) double4
|
||||
{
|
||||
double x, y, z, w;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if __HCC__
|
||||
#include"hip/hcc_detail/host_defines.h"
|
||||
#define __HIP_DEVICE__ __device__ __host__
|
||||
#else
|
||||
#define __HIP_DEVICE__
|
||||
#endif
|
||||
|
||||
__HIP_DEVICE__ char1 make_char1(signed char );
|
||||
__HIP_DEVICE__ char2 make_char2(signed char, signed char );
|
||||
__HIP_DEVICE__ char3 make_char3(signed char, signed char, signed char );
|
||||
__HIP_DEVICE__ char4 make_char4(signed char, signed char, signed char, signed char );
|
||||
|
||||
__HIP_DEVICE__ short1 make_short1(short );
|
||||
__HIP_DEVICE__ short2 make_short2(short, short );
|
||||
__HIP_DEVICE__ short3 make_short3(short, short, short );
|
||||
__HIP_DEVICE__ short4 make_short4(short, short, short, short );
|
||||
|
||||
__HIP_DEVICE__ int1 make_int1(int );
|
||||
__HIP_DEVICE__ int2 make_int2(int, int );
|
||||
__HIP_DEVICE__ int3 make_int3(int, int, int );
|
||||
__HIP_DEVICE__ int4 make_int4(int, int, int, int );
|
||||
|
||||
__HIP_DEVICE__ long1 make_long1(long );
|
||||
__HIP_DEVICE__ long2 make_long2(long, long );
|
||||
__HIP_DEVICE__ long3 make_long3(long, long, long );
|
||||
__HIP_DEVICE__ long4 make_long4(long, long, long, long );
|
||||
|
||||
__HIP_DEVICE__ longlong1 make_longlong1(long long );
|
||||
__HIP_DEVICE__ longlong2 make_longlong2(long long, long long );
|
||||
__HIP_DEVICE__ longlong3 make_longlong3(long long, long long, long long );
|
||||
__HIP_DEVICE__ longlong4 make_longlong4(long long, long long, long long, long long );
|
||||
|
||||
__HIP_DEVICE__ uchar1 make_uchar1(unsigned char );
|
||||
__HIP_DEVICE__ uchar2 make_uchar2(unsigned char, unsigned char );
|
||||
__HIP_DEVICE__ uchar3 make_uchar3(unsigned char, unsigned char, unsigned char );
|
||||
__HIP_DEVICE__ uchar4 make_uchar4(unsigned char, unsigned char, unsigned char, unsigned char );
|
||||
|
||||
__HIP_DEVICE__ ushort1 make_ushort1(unsigned short );
|
||||
__HIP_DEVICE__ ushort2 make_ushort2(unsigned short, unsigned short );
|
||||
__HIP_DEVICE__ ushort3 make_ushort3(unsigned short, unsigned short, unsigned short );
|
||||
__HIP_DEVICE__ ushort4 make_ushort4(unsigned short, unsigned short, unsigned short, unsigned short );
|
||||
|
||||
__HIP_DEVICE__ uint1 make_uint1(unsigned int );
|
||||
__HIP_DEVICE__ uint2 make_uint2(unsigned int, unsigned int );
|
||||
__HIP_DEVICE__ uint3 make_uint3(unsigned int, unsigned int, unsigned int );
|
||||
__HIP_DEVICE__ uint4 make_uint4(unsigned int, unsigned int, unsigned int, unsigned int );
|
||||
|
||||
__HIP_DEVICE__ ulong1 make_ulong1(unsigned long );
|
||||
__HIP_DEVICE__ ulong2 make_ulong2(unsigned long, unsigned long );
|
||||
__HIP_DEVICE__ ulong3 make_ulong3(unsigned long, unsigned long, unsigned long );
|
||||
__HIP_DEVICE__ ulong4 make_ulong4(unsigned long, unsigned long, unsigned long, unsigned long );
|
||||
|
||||
__HIP_DEVICE__ ulonglong1 make_ulonglong1(unsigned long long );
|
||||
__HIP_DEVICE__ ulonglong2 make_ulonglong2(unsigned long long, unsigned long long);
|
||||
__HIP_DEVICE__ ulonglong3 make_ulonglong3(unsigned long long, unsigned long long, unsigned long long);
|
||||
__HIP_DEVICE__ ulonglong4 make_ulonglong4(unsigned long long, unsigned long long, unsigned long long, unsigned long long );
|
||||
|
||||
__HIP_DEVICE__ float1 make_float1(float );
|
||||
__HIP_DEVICE__ float2 make_float2(float, float );
|
||||
__HIP_DEVICE__ float3 make_float3(float, float, float );
|
||||
__HIP_DEVICE__ float4 make_float4(float, float, float, float );
|
||||
|
||||
__HIP_DEVICE__ double1 make_double1(double );
|
||||
__HIP_DEVICE__ double2 make_double2(double, double );
|
||||
__HIP_DEVICE__ double3 make_double3(double, double, double );
|
||||
__HIP_DEVICE__ double4 make_double4(double, double, double, double );
|
||||
|
||||
/*
|
||||
///---
|
||||
// Inline functions for creating vector types from basic types
|
||||
#define ONE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x) { VT t; t.x = x; return t; };
|
||||
#define TWO_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y) { VT t; t.x=x; t.y=y; return t; };
|
||||
#define THREE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y, T z) { VT t; t.x=x; t.y=y; t.z=z; return t; };
|
||||
#define FOUR_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT [[hc]] [[cpu]] (T x, T y, T z, T w) { VT t; t.x=x; t.y=y; t.z=z; t.w=w; return t; };
|
||||
|
||||
|
||||
//signed:
|
||||
ONE_COMPONENT_ACCESS (signed char, char1);
|
||||
TWO_COMPONENT_ACCESS (signed char, char2);
|
||||
THREE_COMPONENT_ACCESS(signed char, char3);
|
||||
FOUR_COMPONENT_ACCESS (signed char, char4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (short, short1);
|
||||
TWO_COMPONENT_ACCESS (short, short2);
|
||||
THREE_COMPONENT_ACCESS(short, short3);
|
||||
FOUR_COMPONENT_ACCESS (short, short4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (int, int1);
|
||||
TWO_COMPONENT_ACCESS (int, int2);
|
||||
THREE_COMPONENT_ACCESS(int, int3);
|
||||
FOUR_COMPONENT_ACCESS (int, int4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (long int, long1);
|
||||
TWO_COMPONENT_ACCESS (long int, long2);
|
||||
THREE_COMPONENT_ACCESS(long int, long3);
|
||||
FOUR_COMPONENT_ACCESS (long int, long4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (long long int, ulong1);
|
||||
TWO_COMPONENT_ACCESS (long long int, ulong2);
|
||||
THREE_COMPONENT_ACCESS(long long int, ulong3);
|
||||
FOUR_COMPONENT_ACCESS (long long int, ulong4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (long long int, longlong1);
|
||||
TWO_COMPONENT_ACCESS (long long int, longlong2);
|
||||
THREE_COMPONENT_ACCESS(long long int, longlong3);
|
||||
FOUR_COMPONENT_ACCESS (long long int, longlong4);
|
||||
|
||||
|
||||
// unsigned:
|
||||
ONE_COMPONENT_ACCESS (unsigned char, uchar1);
|
||||
TWO_COMPONENT_ACCESS (unsigned char, uchar2);
|
||||
THREE_COMPONENT_ACCESS(unsigned char, uchar3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned char, uchar4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (unsigned short, ushort1);
|
||||
TWO_COMPONENT_ACCESS (unsigned short, ushort2);
|
||||
THREE_COMPONENT_ACCESS(unsigned short, ushort3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned short, ushort4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (unsigned int, uint1);
|
||||
TWO_COMPONENT_ACCESS (unsigned int, uint2);
|
||||
THREE_COMPONENT_ACCESS(unsigned int, uint3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned int, uint4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (unsigned long int, ulong1);
|
||||
TWO_COMPONENT_ACCESS (unsigned long int, ulong2);
|
||||
THREE_COMPONENT_ACCESS(unsigned long int, ulong3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned long int, ulong4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (unsigned long long int, ulong1);
|
||||
TWO_COMPONENT_ACCESS (unsigned long long int, ulong2);
|
||||
THREE_COMPONENT_ACCESS(unsigned long long int, ulong3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned long long int, ulong4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (unsigned long long int, ulonglong1);
|
||||
TWO_COMPONENT_ACCESS (unsigned long long int, ulonglong2);
|
||||
THREE_COMPONENT_ACCESS(unsigned long long int, ulonglong3);
|
||||
FOUR_COMPONENT_ACCESS (unsigned long long int, ulonglong4);
|
||||
|
||||
|
||||
//Floating point
|
||||
ONE_COMPONENT_ACCESS (float, float1);
|
||||
TWO_COMPONENT_ACCESS (float, float2);
|
||||
THREE_COMPONENT_ACCESS(float, float3);
|
||||
FOUR_COMPONENT_ACCESS (float, float4);
|
||||
|
||||
ONE_COMPONENT_ACCESS (double, double1);
|
||||
TWO_COMPONENT_ACCESS (double, double2);
|
||||
THREE_COMPONENT_ACCESS(double, double3);
|
||||
FOUR_COMPONENT_ACCESS (double, double4);
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,21 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//#pragma once
|
||||
|
||||
#ifndef TRACE_HELPER_H
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@@ -22,9 +25,9 @@ THE SOFTWARE.
|
||||
#include <hip/hip_common.h>
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__)
|
||||
#include <hip/hcc_detail/hipComplex.h>
|
||||
#include <hip/hcc_detail/hip_complex.h>
|
||||
#elif defined(__HIP_PLATFORM_NVCC__) && !defined (__HIP_PLATFORM_HCC__)
|
||||
#include <hip/nvcc_detail/hipComplex.h>
|
||||
#include <hip/nvcc_detail/hip_complex.h>
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file hip_runtime_api.h
|
||||
*
|
||||
@@ -183,7 +184,7 @@ typedef enum hipError_t {
|
||||
hipErrorInvalidHandle = 400,
|
||||
hipErrorNotFound = 500,
|
||||
hipErrorIllegalAddress = 700,
|
||||
|
||||
hipErrorInvalidSymbol = 701,
|
||||
// Runtime Error Codes start here.
|
||||
hipErrorMissingConfiguration = 1001,
|
||||
hipErrorMemoryAllocation = 1002, ///< Memory allocation error.
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//! hip_vector_types.h : Defines the HIP vector types.
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//! HIP = Heterogeneous-compute Interface for Portability
|
||||
//!
|
||||
//! Define a extremely thin runtime layer that allows source code to be compiled unmodified
|
||||
//! through either AMD HCC or NVCC. Key features tend to be in the spirit
|
||||
//! and terminology of CUDA, but with a portable path to other accelerators as well.
|
||||
//!
|
||||
//! This is the master include file for hipblas, wrapping around hcblas and cublas "version 1"
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
enum hipblasStatus_t {
|
||||
HIPBLAS_STATUS_SUCCESS, // Function succeeds
|
||||
HIPBLAS_STATUS_NOT_INITIALIZED, // HIPBLAS library not initialized
|
||||
HIPBLAS_STATUS_ALLOC_FAILED, // resource allocation failed
|
||||
HIPBLAS_STATUS_INVALID_VALUE, // unsupported numerical value was passed to function
|
||||
HIPBLAS_STATUS_MAPPING_ERROR, // access to GPU memory space failed
|
||||
HIPBLAS_STATUS_EXECUTION_FAILED, // GPU program failed to execute
|
||||
HIPBLAS_STATUS_INTERNAL_ERROR, // an internal HIPBLAS operation failed
|
||||
HIPBLAS_STATUS_NOT_SUPPORTED // cublas supports this, but not hcblas
|
||||
};
|
||||
|
||||
enum hipblasOperation_t {
|
||||
HIPBLAS_OP_N,
|
||||
HIPBLAS_OP_T,
|
||||
HIPBLAS_OP_C
|
||||
};
|
||||
|
||||
// Some standard header files, these are included by hc.hpp and so want to make them avail on both
|
||||
// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
|
||||
// on NVCC path:
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) and not defined (__HIP_PLATFORM_NVCC__)
|
||||
#include <hcc_detail/hip_blas.h>
|
||||
#elif defined(__HIP_PLATFORM_NVCC__) and not defined (__HIP_PLATFORM_HCC__)
|
||||
#include <nvcc_detail/hip_blas.h>
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,268 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cublas.h>
|
||||
#include <cublas_v2.h>
|
||||
|
||||
//HGSOS for Kalmar leave it as C++, only cublas needs C linkage.
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
typedef cublasHandle_t hipblasHandle_t ;
|
||||
typedef cuComplex hipComplex;
|
||||
|
||||
/* Unsupported types
|
||||
"cublasFillMode_t",
|
||||
"cublasDiagType_t",
|
||||
"cublasSideMode_t",
|
||||
"cublasPointerMode_t",
|
||||
"cublasAtomicsMode_t",
|
||||
"cublasDataType_t"
|
||||
*/
|
||||
|
||||
|
||||
inline static cublasOperation_t hipOperationToCudaOperation( hipblasOperation_t op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case HIPBLAS_OP_N:
|
||||
return CUBLAS_OP_N;
|
||||
|
||||
case HIPBLAS_OP_T:
|
||||
return CUBLAS_OP_T;
|
||||
|
||||
case HIPBLAS_OP_C:
|
||||
return CUBLAS_OP_C;
|
||||
|
||||
default:
|
||||
throw "Non existent OP";
|
||||
}
|
||||
}
|
||||
|
||||
inline static hipblasOperation_t CudaOperationToHIPOperation( cublasOperation_t op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case CUBLAS_OP_N :
|
||||
return HIPBLAS_OP_N;
|
||||
|
||||
case CUBLAS_OP_T :
|
||||
return HIPBLAS_OP_T;
|
||||
|
||||
case CUBLAS_OP_C :
|
||||
return HIPBLAS_OP_C;
|
||||
|
||||
default:
|
||||
throw "Non existent OP";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline static hipblasStatus_t hipCUBLASStatusToHIPStatus(cublasStatus_t cuStatus)
|
||||
{
|
||||
switch(cuStatus)
|
||||
{
|
||||
case CUBLAS_STATUS_SUCCESS:
|
||||
return HIPBLAS_STATUS_SUCCESS;
|
||||
case CUBLAS_STATUS_NOT_INITIALIZED:
|
||||
return HIPBLAS_STATUS_NOT_INITIALIZED;
|
||||
case CUBLAS_STATUS_ALLOC_FAILED:
|
||||
return HIPBLAS_STATUS_ALLOC_FAILED;
|
||||
case CUBLAS_STATUS_INVALID_VALUE:
|
||||
return HIPBLAS_STATUS_INVALID_VALUE;
|
||||
case CUBLAS_STATUS_MAPPING_ERROR:
|
||||
return HIPBLAS_STATUS_MAPPING_ERROR;
|
||||
case CUBLAS_STATUS_EXECUTION_FAILED:
|
||||
return HIPBLAS_STATUS_EXECUTION_FAILED;
|
||||
case CUBLAS_STATUS_INTERNAL_ERROR:
|
||||
return HIPBLAS_STATUS_INTERNAL_ERROR;
|
||||
case CUBLAS_STATUS_NOT_SUPPORTED:
|
||||
return HIPBLAS_STATUS_NOT_SUPPORTED;
|
||||
default:
|
||||
throw "Unimplemented status";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline static hipblasStatus_t hipblasCreate(hipblasHandle_t* handle) {
|
||||
return hipCUBLASStatusToHIPStatus(cublasCreate(&*handle));
|
||||
}
|
||||
|
||||
//TODO broke common API semantics, think about this again.
|
||||
inline static hipblasStatus_t hipblasDestroy(hipblasHandle_t handle) {
|
||||
return hipCUBLASStatusToHIPStatus(cublasDestroy(handle));
|
||||
}
|
||||
|
||||
//note: no handle
|
||||
inline static hipblasStatus_t hipblasSetVector(int n, int elemSize, const void *x, int incx, void *y, int incy){
|
||||
return hipCUBLASStatusToHIPStatus(cublasSetVector(n, elemSize, x, incx, y, incy)); //HGSOS no need for handle
|
||||
}
|
||||
|
||||
//note: no handle
|
||||
inline static hipblasStatus_t hipblasGetVector(int n, int elemSize, const void *x, int incx, void *y, int incy){
|
||||
return hipCUBLASStatusToHIPStatus(cublasGetVector(n, elemSize, x, incx, y, incy)); //HGSOS no need for handle
|
||||
}
|
||||
|
||||
//note: no handle
|
||||
inline static hipblasStatus_t hipblasSetMatrix(int rows, int cols, int elemSize, const void *A, int lda, void *B, int ldb){
|
||||
return hipCUBLASStatusToHIPStatus(cublasSetMatrix(rows, cols, elemSize, A, lda, B, ldb));
|
||||
}
|
||||
|
||||
//note: no handle
|
||||
inline static hipblasStatus_t hipblasGetMatrix(int rows, int cols, int elemSize, const void *A, int lda, void *B, int ldb){
|
||||
return hipCUBLASStatusToHIPStatus(cublasGetMatrix(rows, cols, elemSize, A, lda, B, ldb));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSasum(hipblasHandle_t handle, int n, float *x, int incx, float *result){
|
||||
return hipCUBLASStatusToHIPStatus(cublasSasum(handle, n, x, incx, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDasum(hipblasHandle_t handle, int n, double *x, int incx, double *result){
|
||||
return hipCUBLASStatusToHIPStatus(cublasDasum( handle, n, x, incx, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSasumBatched(hipblasHandle_t handle, int n, float *x, int incx, float *result, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasSasum( handle, n, x, incx, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDasumBatched(hipblasHandle_t handle, int n, double *x, int incx, double *result, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasDasum(handle, n, x, incx, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSaxpy(hipblasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy) {
|
||||
return hipCUBLASStatusToHIPStatus(cublasSaxpy(handle, n, alpha, x, incx, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSaxpyBatched(hipblasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasSaxpy(handle, n, alpha, x, incx, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasScopy(hipblasHandle_t handle, int n, const float *x, int incx, float *y, int incy){
|
||||
return hipCUBLASStatusToHIPStatus(cublasScopy( handle, n, x, incx, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDcopy(hipblasHandle_t handle, int n, const double *x, int incx, double *y, int incy){
|
||||
return hipCUBLASStatusToHIPStatus(cublasDcopy( handle, n, x, incx, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasScopyBatched(hipblasHandle_t handle, int n, const float *x, int incx, float *y, int incy, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasScopy( handle, n, x, incx, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDcopyBatched(hipblasHandle_t handle, int n, const double *x, int incx, double *y, int incy, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasDcopy( handle, n, x, incx, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSdot (hipblasHandle_t handle, int n, const float *x, int incx, const float *y, int incy, float *result){
|
||||
return hipCUBLASStatusToHIPStatus(cublasSdot ( handle, n, x, incx, y, incy, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDdot (hipblasHandle_t handle, int n, const double *x, int incx, const double *y, int incy, double *result){
|
||||
return hipCUBLASStatusToHIPStatus(cublasDdot ( handle, n, x, incx, y, incy, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSdotBatched (hipblasHandle_t handle, int n, const float *x, int incx, const float *y, int incy, float *result, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasSdot ( handle, n, x, incx, y, incy, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasDdotBatched (hipblasHandle_t handle, int n, const double *x, int incx, const double *y, int incy, double *result, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasDdot ( handle, n, x, incx, y, incy, result));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSscal(hipblasHandle_t handle, int n, const float *alpha, float *x, int incx){
|
||||
return hipCUBLASStatusToHIPStatus(cublasSscal(handle, n, alpha, x, incx));
|
||||
}
|
||||
inline static hipblasStatus_t hipblasDscal(hipblasHandle_t handle, int n, const double *alpha, double *x, int incx){
|
||||
return hipCUBLASStatusToHIPStatus(cublasDscal(handle, n, alpha, x, incx));
|
||||
}
|
||||
inline static hipblasStatus_t hipblasSscalBatched(hipblasHandle_t handle, int n, const float *alpha, float *x, int incx, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasSscal(handle, n, alpha, x, incx));
|
||||
}
|
||||
inline static hipblasStatus_t hipblasDscalBatched(hipblasHandle_t handle, int n, const double *alpha, double *x, int incx, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasDscal(handle, n, alpha, x, incx));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, float *A, int lda,
|
||||
float *x, int incx, const float *beta, float *y, int incy){
|
||||
return hipCUBLASStatusToHIPStatus(cublasSgemv(handle, hipOperationToCudaOperation(trans), m, n, alpha, A, lda, x, incx, beta, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSgemvBatched(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, float *A, int lda,
|
||||
float *x, int incx, const float *beta, float *y, int incy, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasSgemv(handle, hipOperationToCudaOperation(trans), m, n, alpha, A, lda, x, incx, beta, y, incy));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSger(hipblasHandle_t handle, int m, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *A, int lda){
|
||||
return hipCUBLASStatusToHIPStatus(cublasSger(handle, m, n, alpha, x, incx, y, incy, A, lda));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSgerBatched(hipblasHandle_t handle, int m, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *A, int lda, int batchCount){
|
||||
//TODO warn user that function was demoted to ignore batch
|
||||
return hipCUBLASStatusToHIPStatus(cublasSger(handle, m, n, alpha, x, incx, y, incy, A, lda));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb,
|
||||
int m, int n, int k, const float *alpha, float *A, int lda, float *B, int ldb, const float *beta, float *C, int ldc){
|
||||
return hipCUBLASStatusToHIPStatus(cublasSgemm( handle, hipOperationToCudaOperation(transa), hipOperationToCudaOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasCgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb,
|
||||
int m, int n, int k, const hipComplex *alpha, hipComplex *A, int lda, hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc){
|
||||
return hipCUBLASStatusToHIPStatus(cublasCgemm( handle, hipOperationToCudaOperation(transa), hipOperationToCudaOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc));
|
||||
}
|
||||
|
||||
inline static hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb,
|
||||
int m, int n, int k, const float *alpha, float *A, int lda, float *B, int ldb, const float *beta, float *C, int ldc, int batchCount){
|
||||
//TODO incompatible API
|
||||
return HIPBLAS_STATUS_NOT_SUPPORTED;
|
||||
//return hipCUBLASStatusToHIPStatus(cublasSgemmBatched( handle, hipOperationToCudaOperation(transa), hipOperationToCudaOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount));
|
||||
}
|
||||
|
||||
|
||||
inline static hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb,
|
||||
int m, int n, int k, const hipComplex *alpha, hipComplex *A, int lda, hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc, int batchCount){
|
||||
|
||||
//TODO incompatible API
|
||||
return HIPBLAS_STATUS_NOT_SUPPORTED;
|
||||
//return hipCUBLASStatusToHIPStatus(cublasCgemmBatched( handle, hipOperationToCudaOperation(transa), hipOperationToCudaOperation(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount));
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIPCOMPLEX_H
|
||||
#define HIPCOMPLEX_H
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
@@ -45,7 +46,7 @@ kernelName<<<numblocks,numthreads,memperblock,streamId>>>(0, ##__VA_ARGS__);\
|
||||
#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 110)
|
||||
#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (__CUDA_ARCH__ >= 120)
|
||||
#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 120)
|
||||
#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__
|
||||
#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (__CUDA_ARCH__ >= 200)
|
||||
|
||||
// 64-bit Atomics:
|
||||
#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (__CUDA_ARCH__ >= 200)
|
||||
@@ -92,6 +93,8 @@ kernelName<<<numblocks,numthreads,memperblock,streamId>>>(0, ##__VA_ARGS__);\
|
||||
#define hipGridDim_y gridDim.y
|
||||
#define hipGridDim_z gridDim.z
|
||||
|
||||
#define HIP_SYMBOL(X) X
|
||||
|
||||
/**
|
||||
* extern __shared__
|
||||
*/
|
||||
@@ -102,5 +105,3 @@ kernelName<<<numblocks,numthreads,memperblock,streamId>>>(0, ##__VA_ARGS__);\
|
||||
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
@@ -67,11 +68,13 @@ hipMemcpyHostToHost
|
||||
#define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
|
||||
#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
|
||||
#define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
|
||||
#define hipLimitMallocHeapSize cudaLimitMallocHeapSize
|
||||
|
||||
typedef cudaEvent_t hipEvent_t;
|
||||
typedef cudaStream_t hipStream_t;
|
||||
typedef cudaIpcEventHandle_t hipIpcEventHandle_t;
|
||||
typedef cudaIpcMemHandle_t hipIpcMemHandle_t;
|
||||
typedef cudaLimit hipLimit_t;
|
||||
typedef CUcontext hipCtx_t;
|
||||
typedef CUsharedconfig hipSharedMemConfig;
|
||||
typedef CUfunc_cache hipFuncCache;
|
||||
@@ -101,6 +104,7 @@ switch(cuError) {
|
||||
case cudaErrorPeerAccessAlreadyEnabled : return hipErrorPeerAccessAlreadyEnabled ;
|
||||
case cudaErrorHostMemoryAlreadyRegistered : return hipErrorHostMemoryAlreadyRegistered ;
|
||||
case cudaErrorHostMemoryNotRegistered : return hipErrorHostMemoryNotRegistered ;
|
||||
case cudaErrorUnsupportedLimit : return hipErrorUnsupportedLimit ;
|
||||
default : return hipErrorUnknown; // Note - translated error.
|
||||
};
|
||||
}
|
||||
@@ -185,6 +189,16 @@ inline static hipError_t hipFree(void* ptr) {
|
||||
return hipCUDAErrorTohipError(cudaFree(ptr));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMallocHost(void** ptr, size_t size) __attribute__((deprecated("use hipHostMalloc instead")));
|
||||
inline static hipError_t hipMallocHost(void** ptr, size_t size) {
|
||||
return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
|
||||
}
|
||||
|
||||
inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) __attribute__((deprecated("use hipHostMalloc instead")));
|
||||
inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags){
|
||||
return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
|
||||
}
|
||||
|
||||
inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags){
|
||||
return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
|
||||
}
|
||||
@@ -205,6 +219,11 @@ inline static hipError_t hipHostUnregister(void* ptr){
|
||||
return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
|
||||
}
|
||||
|
||||
inline static hipError_t hipFreeHost(void* ptr) __attribute__((deprecated("use hipHostFree instead")));
|
||||
inline static hipError_t hipFreeHost(void* ptr) {
|
||||
return hipCUDAErrorTohipError(cudaFreeHost(ptr));
|
||||
}
|
||||
|
||||
inline static hipError_t hipHostFree(void* ptr) {
|
||||
return hipCUDAErrorTohipError(cudaFreeHost(ptr));
|
||||
}
|
||||
@@ -282,9 +301,14 @@ inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeB
|
||||
}
|
||||
|
||||
|
||||
inline static hipError_t hipMemcpyToSymbol(const char * symbolName, const void* src, size_t sizeBytes, size_t offset = 0, hipMemcpyKind copyType = hipMemcpyHostToDevice) {
|
||||
return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbolName, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType)));
|
||||
inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, size_t offset = 0, hipMemcpyKind copyType = hipMemcpyHostToDevice) {
|
||||
return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType)));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind copyType, hipStream_t stream) {
|
||||
return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType)));
|
||||
}
|
||||
|
||||
inline static hipError_t hipDeviceSynchronize() {
|
||||
return hipCUDAErrorTohipError(cudaDeviceSynchronize());
|
||||
}
|
||||
@@ -710,12 +734,17 @@ inline static hipError_t hipDeviceGetName(char *name,int len,hipDevice_t device)
|
||||
return hipCUResultTohipError(cuDeviceGetName(name,len,device));
|
||||
}
|
||||
|
||||
inline static hipError_t hipDeviceGetPCIBusId (int *pciBusId,int len,hipDevice_t device)
|
||||
inline static hipError_t hipDeviceGetPCIBusId(int *pciBusId,int len,hipDevice_t device)
|
||||
{
|
||||
return hipCUResultTohipError(cuDeviceGetPCIBusId((char*)pciBusId,len,device));
|
||||
}
|
||||
|
||||
inline static hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device)
|
||||
inline static hipError_t hipDeviceGetLimit(size_t *pValue, hipLimit_t limit)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit);
|
||||
}
|
||||
|
||||
inline static hipError_t hipDeviceTotalMem(size_t *bytes,hipDevice_t device)
|
||||
{
|
||||
return hipCUResultTohipError(cuDeviceTotalMem(bytes,device));
|
||||
}
|
||||
|
||||
@@ -19,7 +19,8 @@ for f in $HIPBINFILES
|
||||
do
|
||||
ln -s $f $(basename $f)
|
||||
done
|
||||
popd >/dev/null
|
||||
ln -s $HIPDIR/bin/.hipVersion .hipVersion
|
||||
popd
|
||||
|
||||
# Soft-link to headers
|
||||
HIPINCDIR=$HIPDIR/include/hip
|
||||
|
||||
@@ -19,6 +19,7 @@ for f in $HIPBINFILES
|
||||
do
|
||||
rm $(basename $f)
|
||||
done
|
||||
rm .hipVersion
|
||||
popd
|
||||
rmdir --ignore-fail-on-non-empty $ROCMBINDIR
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ if(@BUILD_HIPIFY_CLANG@)
|
||||
install(PROGRAMS @PROJECT_BINARY_DIR@/hipify-clang/hipify-clang DESTINATION bin)
|
||||
endif()
|
||||
install(DIRECTORY @hip_SOURCE_DIR@/include DESTINATION .)
|
||||
install(FILES @PROJECT_BINARY_DIR@/.version DESTINATION bin)
|
||||
install(FILES @PROJECT_BINARY_DIR@/.hipVersion DESTINATION bin)
|
||||
install(DIRECTORY @hip_SOURCE_DIR@/cmake DESTINATION .)
|
||||
|
||||
#############################
|
||||
|
||||
@@ -19,5 +19,6 @@ for f in $HIPLIBFILES
|
||||
do
|
||||
ln -s $f $(basename $f)
|
||||
done
|
||||
ln -s $HIPDIR/lib/.hipInfo .hipInfo
|
||||
popd
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ for f in $HIPLIBFILES
|
||||
do
|
||||
rm $(basename $f)
|
||||
done
|
||||
rm .hipInfo
|
||||
popd
|
||||
rmdir --ignore-fail-on-non-empty $ROCMLIBDIR
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ elseif(@HIP_LIB_TYPE@ EQUAL 1)
|
||||
else()
|
||||
install(FILES @PROJECT_BINARY_DIR@/libhip_hcc.so DESTINATION lib)
|
||||
endif()
|
||||
install(FILES @PROJECT_BINARY_DIR@/.buildInfo DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib)
|
||||
|
||||
#############################
|
||||
# Packaging steps
|
||||
@@ -27,12 +27,20 @@ set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}
|
||||
set(CPACK_GENERATOR "TGZ;DEB;RPM")
|
||||
set(CPACK_BINARY_DEB "ON")
|
||||
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), hcc_lc (= @HCC_VERSION@), rocm-profiler")
|
||||
if(@COMPILE_HIP_ATP_MARKER@)
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), hcc_lc (= @HCC_VERSION@), rocm-profiler")
|
||||
else()
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), hcc_lc (= @HCC_VERSION@)")
|
||||
endif()
|
||||
set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64")
|
||||
set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst")
|
||||
set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm")
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, hcc_lc = @HCC_VERSION@, rocm-profiler")
|
||||
if(@COMPILE_HIP_ATP_MARKER@)
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, hcc_lc = @HCC_VERSION@, rocm-profiler")
|
||||
else()
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, hcc_lc = @HCC_VERSION@")
|
||||
endif()
|
||||
set(CPACK_SOURCE_GENERATOR "TGZ")
|
||||
include(CPack)
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// Simple test showing how to use C++AMP syntax with array_view.
|
||||
// The code uses AMP's array_view class, which provides automatic data synchronization
|
||||
// of data between the host and the accelerator. As noted below, the HCC runtime
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// Simple test showing how to use HC syntax with AM (accelerator memory).
|
||||
// AM provides a set of c-style memory management routines for allocating,
|
||||
// freeing, and copying memory. am_alloc returns a device pointer
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// Simple test showing how to use HC syntax with array.
|
||||
// Array provides a type-safe C++ mechanism to allocate accelerator memory.
|
||||
// Like array_view, hc::array provides multi-dimensional indexing capability,
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hc.hpp>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// Simple test showing how to use HC syntax with array_view.
|
||||
// The code uses AMP's array_view class, which provides automatic data synchronization
|
||||
// of data between the host and the accelerator. As noted below, the HCC runtime
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
__global__ void vadd_hip(hipLaunchParm lp, const float *a, const float *b, float *c, int N)
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@@ -60,32 +63,37 @@ int main(){
|
||||
uint32_t len = LEN;
|
||||
uint32_t one = 1;
|
||||
|
||||
std::vector<void*>argBuffer(5);
|
||||
uint32_t *ptr32_t = (uint32_t*)&argBuffer[0];
|
||||
memcpy(ptr32_t + 0, &one, sizeof(uint32_t));
|
||||
memcpy(ptr32_t + 1, &one, sizeof(uint32_t));
|
||||
memcpy(ptr32_t + 2, &one, sizeof(uint32_t));
|
||||
memcpy(ptr32_t + 3, &len, sizeof(uint32_t));
|
||||
memcpy(ptr32_t + 4, &one, sizeof(uint32_t));
|
||||
memcpy(ptr32_t + 5, &one, sizeof(uint32_t));
|
||||
memcpy(&argBuffer[3], &Ad, sizeof(void*));
|
||||
memcpy(&argBuffer[4], &Bd, sizeof(void*));
|
||||
struct {
|
||||
uint32_t _hidden[6];
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
|
||||
for (int i=0; i<6; i++) {
|
||||
args._hidden[i] = 0;
|
||||
}
|
||||
args._Ad = Ad;
|
||||
args._Bd = Bd;
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
uint32_t one = 1;
|
||||
std::vector<void*>argBuffer(3);
|
||||
uint32_t *ptr32_t = (uint32_t*)&argBuffer[0];
|
||||
memcpy(ptr32_t + 0, &one, sizeof(uint32_t));
|
||||
memcpy(&argBuffer[1], &Ad, sizeof(void*));
|
||||
memcpy(&argBuffer[2], &Bd, sizeof(void*));
|
||||
struct {
|
||||
uint32_t _hidden[1];
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
|
||||
args._hidden[0] = 0;
|
||||
args._Ad = Ad;
|
||||
args._Bd = Bd;
|
||||
#endif
|
||||
|
||||
|
||||
size_t size = argBuffer.size()*sizeof(void*);
|
||||
size_t size = sizeof(args);
|
||||
|
||||
void *config[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &argBuffer[0],
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
@@ -93,10 +101,20 @@ int main(){
|
||||
hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config);
|
||||
|
||||
hipMemcpyDtoH(B, Bd, SIZE);
|
||||
for(uint32_t i=LEN-4;i<LEN;i++){
|
||||
std::cout<<A[i]<<" - "<<B[i]<<std::endl;
|
||||
int mismatchCount = 0;
|
||||
for(uint32_t i=0;i<LEN;i++){
|
||||
if (A[i] != B[i]) {
|
||||
mismatchCount++;
|
||||
std::cout<<"error: mismatch " << A[i]<<" != "<<B[i]<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatchCount == 0) {
|
||||
std::cout << "PASSED!\n";
|
||||
} else {
|
||||
std::cout << "FAILED!\n";
|
||||
};
|
||||
|
||||
hipCtxDestroy(context);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b)
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
@@ -26,9 +27,9 @@ THE SOFTWARE.
|
||||
{\
|
||||
cudaError_t error = cmd;\
|
||||
if (error != cudaSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", cudaGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", cudaGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
}
|
||||
|
||||
|
||||
@@ -43,55 +44,55 @@ vector_square(T *C_d, const T *A_d, size_t N)
|
||||
size_t stride = blockDim.x * gridDim.x ;
|
||||
|
||||
for (size_t i=offset; i<N; i+=stride) {
|
||||
C_d[i] = A_d[i] * A_d[i];
|
||||
}
|
||||
C_d[i] = A_d[i] * A_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
float *A_d, *C_d;
|
||||
float *A_h, *C_h;
|
||||
size_t N = 1000000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
float *A_d, *C_d;
|
||||
float *A_h, *C_h;
|
||||
size_t N = 1000000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
cudaDeviceProp props;
|
||||
CHECK(cudaGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
printf ("info: running on device %s\n", props.name);
|
||||
cudaDeviceProp props;
|
||||
CHECK(cudaGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
printf ("info: running on device %s\n", props.name);
|
||||
|
||||
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
A_h = (float*)malloc(Nbytes);
|
||||
CHECK(A_h == 0 ? cudaErrorMemoryAllocation : cudaSuccess );
|
||||
C_h = (float*)malloc(Nbytes);
|
||||
CHECK(C_h == 0 ? cudaErrorMemoryAllocation : cudaSuccess );
|
||||
// Fill with Phi + i
|
||||
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
A_h = (float*)malloc(Nbytes);
|
||||
CHECK(A_h == 0 ? cudaErrorMemoryAllocation : cudaSuccess );
|
||||
C_h = (float*)malloc(Nbytes);
|
||||
CHECK(C_h == 0 ? cudaErrorMemoryAllocation : cudaSuccess );
|
||||
// Fill with Phi + i
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
A_h[i] = 1.618f + i;
|
||||
}
|
||||
{
|
||||
A_h[i] = 1.618f + i;
|
||||
}
|
||||
|
||||
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
CHECK(cudaMalloc(&A_d, Nbytes));
|
||||
CHECK(cudaMalloc(&C_d, Nbytes));
|
||||
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
CHECK(cudaMalloc(&A_d, Nbytes));
|
||||
CHECK(cudaMalloc(&C_d, Nbytes));
|
||||
|
||||
|
||||
printf ("info: copy Host2Device\n");
|
||||
printf ("info: copy Host2Device\n");
|
||||
CHECK ( cudaMemcpy(A_d, A_h, Nbytes, cudaMemcpyHostToDevice));
|
||||
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
|
||||
printf ("info: launch 'vector_square' kernel\n");
|
||||
vector_square <<<blocks, threadsPerBlock>>> (C_d, A_d, N);
|
||||
printf ("info: launch 'vector_square' kernel\n");
|
||||
vector_square <<<blocks, threadsPerBlock>>> (C_d, A_d, N);
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
printf ("info: copy Device2Host\n");
|
||||
CHECK ( cudaMemcpy(C_h, C_d, Nbytes, cudaMemcpyDeviceToHost));
|
||||
|
||||
printf ("info: check result\n");
|
||||
printf ("info: check result\n");
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
CHECK(cudaErrorUnknown);
|
||||
}
|
||||
}
|
||||
printf ("PASSED!\n");
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
CHECK(cudaErrorUnknown);
|
||||
}
|
||||
}
|
||||
printf ("PASSED!\n");
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<iostream>
|
||||
|
||||
// hip header file
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<iostream>
|
||||
|
||||
// hip header file
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<iostream>
|
||||
|
||||
// hip header file
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<iostream>
|
||||
|
||||
// hip header file
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<iostream>
|
||||
|
||||
// hip header file
|
||||
|
||||
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<iostream>
|
||||
|
||||
// hip header file
|
||||
|
||||
Executable → Regular
+9
-6
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
+30
-11
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@@ -68,7 +71,7 @@ hipError_t hipGetDeviceCount(int *count)
|
||||
return e;
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig )
|
||||
hipError_t hipDeviceSetCacheConfig(hipFuncCache cacheConfig)
|
||||
{
|
||||
HIP_INIT_API(cacheConfig);
|
||||
|
||||
@@ -77,7 +80,7 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig )
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig )
|
||||
hipError_t hipDeviceGetCacheConfig(hipFuncCache *cacheConfig)
|
||||
{
|
||||
HIP_INIT_API(cacheConfig);
|
||||
|
||||
@@ -86,7 +89,23 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig )
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipFuncSetCacheConfig ( hipFuncCache cacheConfig )
|
||||
extern "C" size_t g_malloc_heap_size;
|
||||
|
||||
hipError_t hipDeviceGetLimit (size_t *pValue, hipLimit_t limit)
|
||||
{
|
||||
HIP_INIT_API(pValue, limit);
|
||||
if(pValue == nullptr) {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
if(limit == hipLimitMallocHeapSize) {
|
||||
*pValue = g_malloc_heap_size;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}else{
|
||||
return ihipLogStatus(hipErrorUnsupportedLimit);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipFuncSetCacheConfig (hipFuncCache cacheConfig)
|
||||
{
|
||||
HIP_INIT_API(cacheConfig);
|
||||
|
||||
@@ -95,7 +114,7 @@ hipError_t hipFuncSetCacheConfig ( hipFuncCache cacheConfig )
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config )
|
||||
hipError_t hipDeviceSetSharedMemConfig (hipSharedMemConfig config)
|
||||
{
|
||||
HIP_INIT_API(config);
|
||||
|
||||
@@ -104,7 +123,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config )
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig )
|
||||
hipError_t hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig)
|
||||
{
|
||||
HIP_INIT_API(pConfig);
|
||||
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
+16
-7
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@@ -25,6 +28,13 @@ static const __half __half_value_zero_float = {0x0};
|
||||
static const unsigned __half_pos_inf = 0x7C00;
|
||||
static const unsigned __half_neg_inf = 0xFC00;
|
||||
|
||||
typedef struct{
|
||||
union{
|
||||
float f;
|
||||
unsigned u;
|
||||
};
|
||||
} struct_float;
|
||||
|
||||
static __device__ float cvt_half_to_float(__half a){
|
||||
struct_float ret = {0};
|
||||
if(a.x == 0){
|
||||
@@ -362,4 +372,3 @@ __device__ __half2 __lowhigh2highlow(const __half2 a){
|
||||
__device__ __half2 __low2half2(const __half2 a, const __half2 b){
|
||||
return {a.q, b.q};
|
||||
}
|
||||
|
||||
|
||||
+96
-11
@@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file hip_hcc.cpp
|
||||
*
|
||||
@@ -67,6 +68,7 @@ int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU ident
|
||||
int HIP_NUM_KERNELS_INFLIGHT = 128;
|
||||
int HIP_BLOCKING_SYNC = 0;
|
||||
|
||||
#define HIP_USE_PRODUCT_NAME 0
|
||||
//#define DISABLE_COPY_EXT 1
|
||||
|
||||
|
||||
@@ -81,6 +83,84 @@ unsigned g_deviceCnt;
|
||||
std::vector<int> g_hip_visible_devices;
|
||||
hsa_agent_t g_cpu_agent;
|
||||
|
||||
/*
|
||||
Implementation of malloc and free device functions.
|
||||
|
||||
This is the best place to put them because the device
|
||||
global variables need to be initialized at the start.
|
||||
*/
|
||||
|
||||
#define NUM_PAGES_PER_THREAD 16
|
||||
#define SIZE_OF_PAGE 64
|
||||
#define NUM_THREADS_PER_CU 64
|
||||
#define NUM_CUS_PER_GPU 64
|
||||
#define NUM_PAGES NUM_PAGES_PER_THREAD * NUM_THREADS_PER_CU * NUM_CUS_PER_GPU
|
||||
#define SIZE_MALLOC NUM_PAGES * SIZE_OF_PAGE
|
||||
#define SIZE_OF_HEAP SIZE_MALLOC
|
||||
|
||||
size_t g_malloc_heap_size = SIZE_OF_HEAP;
|
||||
|
||||
__attribute__((address_space(1))) char gpuHeap[SIZE_OF_HEAP];
|
||||
__attribute__((address_space(1))) uint32_t gpuFlags[NUM_PAGES];
|
||||
|
||||
__device__ void *__hip_hc_malloc(size_t size)
|
||||
{
|
||||
char *heap = (char*)gpuHeap;
|
||||
if(size > SIZE_OF_HEAP)
|
||||
{
|
||||
return (void*)nullptr;
|
||||
}
|
||||
uint32_t totalThreads = hipBlockDim_x * hipGridDim_x * hipBlockDim_y * hipGridDim_y * hipBlockDim_z * hipGridDim_z;
|
||||
uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x;
|
||||
|
||||
uint32_t numHeapsPerWorkItem = NUM_PAGES / totalThreads;
|
||||
uint32_t heapSizePerWorkItem = SIZE_OF_HEAP / totalThreads;
|
||||
|
||||
uint32_t stride = size / SIZE_OF_PAGE;
|
||||
uint32_t start = numHeapsPerWorkItem * currentWorkItem;
|
||||
|
||||
uint32_t k=0;
|
||||
|
||||
while(gpuFlags[k] > 0)
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
for(uint32_t i=0;i<stride-1;i++)
|
||||
{
|
||||
gpuFlags[i+start+k] = 1;
|
||||
}
|
||||
|
||||
gpuFlags[start+stride-1+k] = 2;
|
||||
|
||||
void* ptr = (void*)(heap + heapSizePerWorkItem * currentWorkItem + k*SIZE_OF_PAGE);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
__device__ void* __hip_hc_free(void *ptr)
|
||||
{
|
||||
if(ptr == nullptr)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32_t offsetByte = (uint64_t)ptr - (uint64_t)gpuHeap;
|
||||
uint32_t offsetPage = offsetByte / SIZE_OF_PAGE;
|
||||
|
||||
while(gpuFlags[offsetPage] != 0) {
|
||||
if(gpuFlags[offsetPage] == 2) {
|
||||
gpuFlags[offsetPage] = 0;
|
||||
offsetPage++;
|
||||
break;
|
||||
} else {
|
||||
gpuFlags[offsetPage] = 0;
|
||||
offsetPage++;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
@@ -256,8 +336,8 @@ void ihipStream_t::lockclose_postKernelCommand(hc::accelerator_view *av)
|
||||
{
|
||||
|
||||
if (HIP_LAUNCH_BLOCKING) {
|
||||
// TODO - fix this so it goes through proper stream::wait() call.// direct wait OK since we know the stream is locked.
|
||||
av->wait(hc::hcWaitModeActive);
|
||||
// TODO - fix this so it goes through proper stream::wait() call.// direct wait OK since we know the stream is locked.
|
||||
av->wait(hc::hcWaitModeActive);
|
||||
tprintf(DB_SYNC, " %s LAUNCH_BLOCKING for kernel completion\n", ToString(this).c_str());
|
||||
}
|
||||
|
||||
@@ -267,6 +347,7 @@ void ihipStream_t::lockclose_postKernelCommand(hc::accelerator_view *av)
|
||||
|
||||
|
||||
|
||||
#if USE_DISPATCH_HSA_KERNEL==0
|
||||
// Precursor: the stream is already locked,specifically so this routine can enqueue work into the specified av.
|
||||
void ihipStream_t::launchModuleKernel(
|
||||
hc::accelerator_view av,
|
||||
@@ -318,6 +399,7 @@ void ihipStream_t::launchModuleKernel(
|
||||
hsa_queue_store_write_index_relaxed(Queue, packet_index + 1);
|
||||
hsa_signal_store_relaxed(Queue->doorbell_signal, packet_index);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//=============================================================================
|
||||
@@ -385,7 +467,7 @@ template<>
|
||||
void ihipCtxCriticalBase_t<CtxMutex>::printPeers(FILE *f) const
|
||||
{
|
||||
for (auto iter = _peers.begin(); iter!=_peers.end(); iter++) {
|
||||
fprintf (f, "%s ", (*iter)->toString().c_str());
|
||||
fprintf (f, "%s ", (*iter)->toString().c_str());
|
||||
};
|
||||
}
|
||||
|
||||
@@ -567,7 +649,6 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
// Set some defaults in case we don't find the appropriate regions:
|
||||
prop->totalGlobalMem = 0;
|
||||
prop->totalConstMem = 0;
|
||||
prop->sharedMemPerBlock = 0;
|
||||
prop-> maxThreadsPerMultiProcessor = 0;
|
||||
prop->regsPerBlock = 0;
|
||||
|
||||
@@ -585,7 +666,11 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
prop->isMultiGpuBoard = 0 ? gpuAgentsCount < 2 : 1;
|
||||
|
||||
// Get agent name
|
||||
#if HIP_USE_PRODUCT_NAME
|
||||
err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, &(prop->name));
|
||||
#else
|
||||
err = hsa_agent_get_info(_hsaAgent, HSA_AGENT_INFO_NAME, &(prop->name));
|
||||
#endif
|
||||
DeviceErrorCheck(err);
|
||||
|
||||
// Get agent node
|
||||
@@ -718,6 +803,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
prop->concurrentKernels = 1; // All ROCm hardware supports executing multiple kernels concurrently
|
||||
|
||||
prop->canMapHostMemory = 1; // All ROCm devices can map host memory
|
||||
prop->totalConstMem = 16384;
|
||||
#if 0
|
||||
// TODO - code broken below since it always returns 1.
|
||||
// Are the flags part of the context or part of the device?
|
||||
@@ -1097,7 +1183,6 @@ void ihipInit()
|
||||
assert(deviceCnt == g_deviceCnt);
|
||||
}
|
||||
|
||||
|
||||
tprintf(DB_SYNC, "pid=%u %-30s\n", getpid(), "<ihipInit>");
|
||||
}
|
||||
|
||||
@@ -1351,10 +1436,10 @@ void ihipSetTs(hipEvent_t e)
|
||||
|
||||
// Returns true if thisCtx can see the memory allocated on dstCtx and srcCtx.
|
||||
// The peer-list for a context controls which contexts have access to the memory allocated on that context.
|
||||
// So we check dstCtx's and srcCtx's peerList to see if the booth include thisCtx.
|
||||
// So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx.
|
||||
bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx)
|
||||
{
|
||||
tprintf (DB_COPY1, "Checking if direct copy can be used. thisCtx:%s; dstCtx:%s ; srcCtx:%s\n",
|
||||
tprintf (DB_COPY1, "Checking if direct copy can be used. thisCtx:%s; dstCtx:%s ; srcCtx:%s\n",
|
||||
thisCtx->toString().c_str(), dstCtx->toString().c_str(), srcCtx->toString().c_str());
|
||||
|
||||
// Use blocks to control scope of critical sections.
|
||||
@@ -1437,8 +1522,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
};
|
||||
|
||||
|
||||
// If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued)
|
||||
// has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers
|
||||
// If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued)
|
||||
// has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers
|
||||
// and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (forceHostCopyEngine=true).
|
||||
bool forceHostCopyEngine = false;
|
||||
if (hcCopyDir == hc::hcMemcpyDeviceToDevice) {
|
||||
@@ -1509,13 +1594,13 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
crit->_av.copy_async(src, dst, sizeBytes);
|
||||
} catch (Kalmar::runtime_exception) {
|
||||
throw ihipException(hipErrorRuntimeOther);
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
if (HIP_LAUNCH_BLOCKING) {
|
||||
tprintf(DB_SYNC, "LAUNCH_BLOCKING for completion of hipMemcpyAsync(%zu)\n", sizeBytes);
|
||||
this->wait(crit);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
locked_copySync(dst, src, sizeBytes, kind);
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
target triple = "amdgcn--amdhsa"
|
||||
|
||||
|
||||
define void @__threadfence() #1 {
|
||||
fence syncscope(2) seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__threadfence_block() #1 {
|
||||
fence syncscope(3) seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #1 = { alwaysinline nounwind }
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
|
||||
+94
-40
@@ -1,21 +1,24 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hc_am.hpp>
|
||||
#include "hsa/hsa.h"
|
||||
@@ -102,13 +105,13 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
|
||||
HIP_INIT_API(ptr, sizeBytes);
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// return NULL pointer when malloc size is 0
|
||||
// return NULL pointer when malloc size is 0
|
||||
if (sizeBytes == 0)
|
||||
{
|
||||
*ptr = NULL;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (ctx) {
|
||||
@@ -174,8 +177,18 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipMallocHost(void** ptr, size_t sizeBytes)
|
||||
{
|
||||
return hipHostMalloc(ptr, sizeBytes, 0);
|
||||
}
|
||||
|
||||
hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
{
|
||||
return hipHostMalloc(ptr, sizeBytes, flags);
|
||||
};
|
||||
|
||||
// width in bytes
|
||||
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height)
|
||||
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height)
|
||||
{
|
||||
HIP_INIT_API(ptr, pitch, width, height);
|
||||
|
||||
@@ -218,7 +231,7 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f)
|
||||
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f)
|
||||
{
|
||||
hipChannelFormatDesc cd;
|
||||
cd.x = x; cd.y = y; cd.z = z; cd.w = w;
|
||||
@@ -227,7 +240,7 @@ hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannel
|
||||
}
|
||||
|
||||
hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
size_t width, size_t height, unsigned int flags)
|
||||
size_t width, size_t height, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(array, desc, width, height, flags);
|
||||
|
||||
@@ -375,22 +388,67 @@ hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t cou
|
||||
{
|
||||
HIP_INIT_API(symbolName, src, count, offset, kind);
|
||||
|
||||
#ifdef USE_MEMCPYTOSYMBOL
|
||||
if(kind != hipMemcpyHostToDevice)
|
||||
if(symbolName == nullptr)
|
||||
{
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
return ihipLogStatus(hipErrorInvalidSymbol);
|
||||
}
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
//hsa_signal_t depSignal;
|
||||
//int depSignalCnt = ctx._default_stream->preCopyCommand(NULL, &depSignal, ihipCommandCopyH2D);
|
||||
assert(0); // Need to properly synchronize the copy - do something with depSignal if != NULL.
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
|
||||
void *ptr = acc.get_symbol_address(symbolName);
|
||||
|
||||
if(ptr == nullptr)
|
||||
{
|
||||
return ihipLogStatus(hipErrorInvalidSymbol);
|
||||
}
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
stream->locked_copySync(ptr, src, count + offset, kind);
|
||||
|
||||
ctx->_acc.memcpy_symbol(symbolName, (void*) src,count, offset);
|
||||
#endif
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyToSymbolAsync(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(symbolName, src, count, offset, kind, stream);
|
||||
|
||||
if(symbolName == nullptr)
|
||||
{
|
||||
return ihipLogStatus(hipErrorInvalidSymbol);
|
||||
}
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
|
||||
void *ptr = acc.get_symbol_address(symbolName);
|
||||
|
||||
if(ptr == nullptr)
|
||||
{
|
||||
return ihipLogStatus(hipErrorInvalidSymbol);
|
||||
}
|
||||
|
||||
if (stream) {
|
||||
try {
|
||||
stream->locked_copyAsync(ptr, src, count + offset, kind);
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
|
||||
|
||||
}
|
||||
|
||||
//---
|
||||
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind)
|
||||
{
|
||||
@@ -705,7 +763,7 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,
|
||||
// TODO - make member function of stream?
|
||||
template <typename T>
|
||||
hc::completion_future
|
||||
ihipMemsetKernel(hipStream_t stream,
|
||||
ihipMemsetKernel(hipStream_t stream,
|
||||
LockedAccessor_StreamCrit_t &crit,
|
||||
T * ptr, T val, size_t sizeBytes)
|
||||
{
|
||||
@@ -930,6 +988,11 @@ hipError_t hipHostFree(void* ptr)
|
||||
return ihipLogStatus(hipStatus);
|
||||
};
|
||||
|
||||
hipError_t hipFreeHost(void* ptr)
|
||||
{
|
||||
return hipHostFree(ptr);
|
||||
}
|
||||
|
||||
hipError_t hipFreeArray(hipArray* array)
|
||||
{
|
||||
HIP_INIT_API(array);
|
||||
@@ -954,16 +1017,7 @@ hipError_t hipFreeArray(hipArray* array)
|
||||
return ihipLogStatus(hipStatus);
|
||||
}
|
||||
|
||||
// Stubs of threadfence operations
|
||||
__device__ void __threadfence_block(void){
|
||||
// no-op
|
||||
}
|
||||
|
||||
__device__ void __threadfence(void){
|
||||
// no-op
|
||||
}
|
||||
|
||||
__device__ void __threadfence_system(void){
|
||||
// no-op
|
||||
}
|
||||
|
||||
|
||||
+45
-11
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@@ -118,10 +121,10 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname){
|
||||
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
|
||||
std::ifstream in(fname, std::ios::binary | std::ios::ate);
|
||||
|
||||
if(!in){
|
||||
if(!in.is_open() ){
|
||||
return ihipLogStatus(hipErrorFileNotFound);
|
||||
|
||||
}else{
|
||||
} else {
|
||||
|
||||
*module = new ihipModule_t;
|
||||
size_t size = std::string::size_type(in.tellg());
|
||||
@@ -248,12 +251,12 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent;
|
||||
|
||||
void *config[5] = {0};
|
||||
size_t kernSize;
|
||||
size_t kernArgSize;
|
||||
|
||||
if(extra != NULL){
|
||||
memcpy(config, extra, sizeof(size_t)*5);
|
||||
if(config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END){
|
||||
kernSize = *(size_t*)(config[3]);
|
||||
kernArgSize = *(size_t*)(config[3]);
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorNotInitialized);
|
||||
}
|
||||
@@ -279,6 +282,33 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
grid_launch_parm lp;
|
||||
hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp, f->_kernelName);
|
||||
|
||||
#if USE_DISPATCH_HSA_KERNEL
|
||||
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
|
||||
memset(&aql, 0, sizeof(aql));
|
||||
|
||||
//aql.completion_signal._handle = 0;
|
||||
//aql.kernarg_address = 0;
|
||||
|
||||
aql.workgroup_size_x = blockDimX;
|
||||
aql.workgroup_size_y = blockDimY;
|
||||
aql.workgroup_size_z = blockDimZ;
|
||||
aql.grid_size_x = blockDimX * gridDimX;
|
||||
aql.grid_size_y = blockDimY * gridDimY;
|
||||
aql.grid_size_z = blockDimZ * gridDimZ;
|
||||
aql.group_segment_size = groupSegmentSize;
|
||||
aql.private_segment_size = privateSegmentSize;
|
||||
aql.kernel_object = f->_kernel;
|
||||
aql.setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
|
||||
aql.header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
|
||||
lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize);
|
||||
#else
|
||||
|
||||
/*
|
||||
Create signal
|
||||
*/
|
||||
@@ -286,11 +316,13 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
hsa_signal_t signal;
|
||||
status = hsa_signal_create(1, 0, NULL, &signal);
|
||||
|
||||
|
||||
/*
|
||||
Launch AQL packet
|
||||
*/
|
||||
hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ,
|
||||
gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernSize, f->_kernel);
|
||||
gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernArgSize, f->_kernel);
|
||||
|
||||
|
||||
/*
|
||||
Wait for signal
|
||||
@@ -298,6 +330,8 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
|
||||
hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
#endif // USE_DISPATCH_HSA_KERNEL
|
||||
|
||||
|
||||
ihipPostLaunchKernel(hStream, lp);
|
||||
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<hip/hip_runtime_api.h>
|
||||
#include<iostream>
|
||||
#include<assert.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
size_t heap;
|
||||
assert(hipSuccess == hipDeviceGetLimit(&heap, hipLimitMallocHeapSize));
|
||||
assert(heap == 4194304);
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<hip/hip_runtime.h>
|
||||
#include<hip/hip_runtime_api.h>
|
||||
#include<iostream>
|
||||
|
||||
#define NUM 1024
|
||||
#define SIZE 1024*4
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
__attribute__((address_space(1))) int global[NUM];
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
__device__ int global[NUM];
|
||||
#endif
|
||||
|
||||
__global__ void Assign(hipLaunchParm lp, int* Out)
|
||||
{
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
Out[tid] = global[tid];
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int *A, *B, *Ad;
|
||||
A = new int[NUM];
|
||||
B = new int[NUM];
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
A[i] = -1*i;
|
||||
B[i] = 0;
|
||||
}
|
||||
|
||||
hipMalloc((void**)&Ad, SIZE);
|
||||
|
||||
hipStream_t stream;
|
||||
hipStreamCreate(&stream);
|
||||
hipMemcpyToSymbolAsync(HIP_SYMBOL(global), A, SIZE, 0, hipMemcpyHostToDevice, stream);
|
||||
hipStreamSynchronize(stream);
|
||||
hipLaunchKernel(Assign, dim3(1,1,1), dim3(NUM,1,1), 0, 0, Ad);
|
||||
hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
|
||||
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
assert(A[i] == B[i]);
|
||||
}
|
||||
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
A[i] = -2*i;
|
||||
B[i] = 0;
|
||||
}
|
||||
|
||||
hipMemcpyToSymbol(HIP_SYMBOL(global), A, SIZE, 0, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(Assign, dim3(1,1,1), dim3(NUM,1,1), 0, 0, Ad);
|
||||
hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
assert(A[i] == B[i]);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<iostream>
|
||||
#include<hip/hip_runtime_api.h>
|
||||
#include<hip/hip_runtime.h>
|
||||
|
||||
#define NUM 1024
|
||||
#define SIZE NUM*sizeof(float)
|
||||
|
||||
__global__ void vAdd(hipLaunchParm lp, float *In1, float *In2, float *In3, float *In4, float *Out)
|
||||
{
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
In4[tid] = In1[tid] + In2[tid];
|
||||
__threadfence();
|
||||
In3[tid] = In3[tid] + In4[tid];
|
||||
__threadfence_block();
|
||||
Out[tid] = In4[tid] + In3[tid];
|
||||
|
||||
}
|
||||
|
||||
int main(){
|
||||
float *In1 = new float[1024];
|
||||
float *In2 = new float[1024];
|
||||
float *In3 = new float[1024];
|
||||
float *In4 = new float[1024];
|
||||
float *Out = new float[1024];
|
||||
|
||||
for(uint32_t i=0;i<1024;i++)
|
||||
{
|
||||
In1[i] = 1.0f;
|
||||
In2[i] = 1.0f;
|
||||
In3[i] = 1.0f;
|
||||
In4[i] = 1.0f;
|
||||
}
|
||||
|
||||
float *In1d, *In2d, *In3d, *In4d, *Outd;
|
||||
hipMalloc((void**)&In1d, SIZE);
|
||||
hipMalloc((void**)&In2d, SIZE);
|
||||
hipMalloc((void**)&In3d, SIZE);
|
||||
hipMalloc((void**)&In4d, SIZE);
|
||||
hipMalloc((void**)&Outd, SIZE);
|
||||
|
||||
hipMemcpy(In1d, In1, SIZE, hipMemcpyHostToDevice);
|
||||
hipMemcpy(In2d, In2, SIZE, hipMemcpyHostToDevice);
|
||||
hipMemcpy(In3d, In3, SIZE, hipMemcpyHostToDevice);
|
||||
hipMemcpy(In4d, In4, SIZE, hipMemcpyHostToDevice);
|
||||
|
||||
hipLaunchKernel(vAdd, dim3(32,1,1), dim3(32,1,1), 0, 0, In1d, In2d, In3d, In4d, Outd);
|
||||
hipMemcpy(Out, Outd, SIZE, hipMemcpyDeviceToHost);
|
||||
assert(Out[10] == 2*In1[10] + 2*In2[10] + In3[10]);
|
||||
|
||||
}
|
||||
@@ -18,10 +18,10 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include<iostream>
|
||||
#include <iostream>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include<hip/hcc_detail/hipComplex.h>
|
||||
#include "hip/hcc_detail/hip_complex.h"
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE 64<<2
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<hip/hip_runtime.h>
|
||||
#include<hip/hip_runtime_api.h>
|
||||
#include<iostream>
|
||||
|
||||
#define HIP_ASSERT(status) assert(hipSuccess == status);
|
||||
|
||||
#define NUM 1024
|
||||
#define SIZE NUM * 8
|
||||
|
||||
__global__ void Alloc(hipLaunchParm lp, uint64_t *Ptr) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
Ptr[tid] = (uint64_t)malloc(128);
|
||||
}
|
||||
|
||||
__global__ void Free(hipLaunchParm lp, uint64_t *Ptr) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
free((void*)Ptr[tid]);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
uint64_t *hPtr, *dPtr;
|
||||
hPtr = new uint64_t[NUM];
|
||||
for(uint32_t i=0;i<NUM;i++) {
|
||||
hPtr[i] = 1;
|
||||
}
|
||||
int devCnt;
|
||||
hipGetDeviceCount(&devCnt);
|
||||
for(uint32_t i=0;i<devCnt;i++){
|
||||
HIP_ASSERT(hipSetDevice(i));
|
||||
HIP_ASSERT(hipMalloc((void**)&dPtr, SIZE));
|
||||
HIP_ASSERT(hipMemcpy(dPtr, hPtr, SIZE, hipMemcpyHostToDevice));
|
||||
hipLaunchKernel(Alloc, dim3(1,1,1), dim3(NUM,1,1), 0, 0, dPtr);
|
||||
HIP_ASSERT(hipMemcpy(hPtr, dPtr, SIZE, hipMemcpyDeviceToHost));
|
||||
hipLaunchKernel(Free, dim3(1,1,1), dim3(NUM,1,1), 0, 0, dPtr);
|
||||
HIP_ASSERT(hipFree(dPtr));
|
||||
for(uint32_t i=1;i<NUM;i++) {
|
||||
assert(hPtr[i] == hPtr[i-1] + 4096);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
#include<hip/hip_runtime_api.h>
|
||||
#include<hip/hip_runtime.h>
|
||||
#include<iostream>
|
||||
|
||||
#define LEN8 8 * 4
|
||||
#define LEN9 9 * 4
|
||||
#define LEN10 10 * 4
|
||||
#define LEN11 11 * 4
|
||||
#define LEN12 12 * 4
|
||||
|
||||
__global__ void MemCpy8(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
memcpy(Out + tid*8, In + tid*8, 8);
|
||||
}
|
||||
|
||||
__global__ void MemCpy9(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
memcpy(Out + tid*9, In + tid*9, 9);
|
||||
}
|
||||
|
||||
__global__ void MemCpy10(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
memcpy(Out + tid*10, In + tid*10, 10);
|
||||
}
|
||||
|
||||
__global__ void MemCpy11(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
memcpy(Out + tid*11, In + tid*11, 11);
|
||||
}
|
||||
|
||||
__global__ void MemCpy12(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
memcpy(Out + tid*12, In + tid*12, 12);
|
||||
}
|
||||
|
||||
__global__ void MemSet8(hipLaunchParm lp, uint8_t *In) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
memset(In + tid*8, 1, 8);
|
||||
}
|
||||
|
||||
__global__ void MemSet9(hipLaunchParm lp, uint8_t *In) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
memset(In + tid*9, 1, 9);
|
||||
}
|
||||
|
||||
__global__ void MemSet10(hipLaunchParm lp, uint8_t *In) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
memset(In + tid*10, 1, 10);
|
||||
}
|
||||
|
||||
__global__ void MemSet11(hipLaunchParm lp, uint8_t *In) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
memset(In + tid*11, 1, 11);
|
||||
}
|
||||
|
||||
__global__ void MemSet12(hipLaunchParm lp, uint8_t *In) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
memset(In + tid*12, 1, 12);
|
||||
}
|
||||
|
||||
int main(){
|
||||
uint8_t *A, *Ad, *B, *Bd, *C, *Cd;
|
||||
A = new uint8_t[LEN8];
|
||||
B = new uint8_t[LEN8];
|
||||
C = new uint8_t[LEN8];
|
||||
for(uint32_t i=0;i<LEN8;i++) {
|
||||
A[i] = i;
|
||||
B[i] = 0;
|
||||
C[i] = 0;
|
||||
}
|
||||
hipMalloc((void**)&Ad, LEN8);
|
||||
hipMalloc((void**)&Bd, LEN8);
|
||||
hipMalloc((void**)&Cd, LEN8);
|
||||
hipMemcpy(Ad, A, LEN8, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(MemCpy8, dim3(2,1,1), dim3(2,1,1), 0, 0, Ad, Bd);
|
||||
hipLaunchKernel(MemSet8, dim3(2,1,1), dim3(2,1,1), 0, 0, Cd);
|
||||
hipMemcpy(B, Bd, LEN8, hipMemcpyDeviceToHost);
|
||||
hipMemcpy(C, Cd, LEN8, hipMemcpyDeviceToHost);
|
||||
for(uint32_t i=0;i<LEN8;i++) {
|
||||
assert(A[i] == B[i]);
|
||||
assert(C[i] == 1);
|
||||
}
|
||||
|
||||
delete A;
|
||||
delete B;
|
||||
delete C;
|
||||
|
||||
A = new uint8_t[LEN9];
|
||||
B = new uint8_t[LEN9];
|
||||
C = new uint8_t[LEN9];
|
||||
for(uint32_t i=0;i<LEN9;i++) {
|
||||
A[i] = i;
|
||||
B[i] = 0;
|
||||
C[i] = 0;
|
||||
}
|
||||
hipMalloc((void**)&Ad, LEN9);
|
||||
hipMalloc((void**)&Bd, LEN9);
|
||||
hipMalloc((void**)&Cd, LEN9);
|
||||
hipMemcpy(Ad, A, LEN9, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(MemCpy9, dim3(2,1,1), dim3(2,1,1), 0, 0, Ad, Bd);
|
||||
hipLaunchKernel(MemSet9, dim3(2,1,1), dim3(2,1,1), 0, 0, Cd);
|
||||
hipMemcpy(B, Bd, LEN9, hipMemcpyDeviceToHost);
|
||||
hipMemcpy(C, Cd, LEN9, hipMemcpyDeviceToHost);
|
||||
for(uint32_t i=0;i<LEN9;i++) {
|
||||
assert(A[i] == B[i]);
|
||||
assert(C[i] == 1);
|
||||
}
|
||||
|
||||
delete A;
|
||||
delete B;
|
||||
delete C;
|
||||
|
||||
A = new uint8_t[LEN10];
|
||||
B = new uint8_t[LEN10];
|
||||
C = new uint8_t[LEN10];
|
||||
for(uint32_t i=0;i<LEN10;i++) {
|
||||
A[i] = i;
|
||||
B[i] = 0;
|
||||
C[i] = 0;
|
||||
}
|
||||
hipMalloc((void**)&Ad, LEN10);
|
||||
hipMalloc((void**)&Bd, LEN10);
|
||||
hipMalloc((void**)&Cd, LEN10);
|
||||
hipMemcpy(Ad, A, LEN10, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(MemCpy10, dim3(2,1,1), dim3(2,1,1), 0, 0, Ad, Bd);
|
||||
hipLaunchKernel(MemSet10, dim3(2,1,1), dim3(2,1,1), 0, 0, Cd);
|
||||
hipMemcpy(B, Bd, LEN10, hipMemcpyDeviceToHost);
|
||||
hipMemcpy(C, Cd, LEN10, hipMemcpyDeviceToHost);
|
||||
for(uint32_t i=0;i<LEN10;i++) {
|
||||
assert(A[i] == B[i]);
|
||||
assert(C[i] == 1);
|
||||
}
|
||||
|
||||
delete A;
|
||||
delete B;
|
||||
delete C;
|
||||
|
||||
A = new uint8_t[LEN11];
|
||||
B = new uint8_t[LEN11];
|
||||
C = new uint8_t[LEN11];
|
||||
for(uint32_t i=0;i<LEN11;i++) {
|
||||
A[i] = i;
|
||||
B[i] = 0;
|
||||
C[i] = 0;
|
||||
}
|
||||
hipMalloc((void**)&Ad, LEN11);
|
||||
hipMalloc((void**)&Bd, LEN11);
|
||||
hipMalloc((void**)&Cd, LEN11);
|
||||
hipMemcpy(Ad, A, LEN11, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(MemCpy11, dim3(2,1,1), dim3(2,1,1), 0, 0, Ad, Bd);
|
||||
hipLaunchKernel(MemSet11, dim3(2,1,1), dim3(2,1,1), 0, 0, Cd);
|
||||
hipMemcpy(B, Bd, LEN11, hipMemcpyDeviceToHost);
|
||||
hipMemcpy(C, Cd, LEN11, hipMemcpyDeviceToHost);
|
||||
for(uint32_t i=0;i<LEN11;i++) {
|
||||
assert(A[i] == B[i]);
|
||||
assert(C[i] == 1);
|
||||
}
|
||||
|
||||
delete A;
|
||||
delete B;
|
||||
delete C;
|
||||
|
||||
A = new uint8_t[LEN12];
|
||||
B = new uint8_t[LEN12];
|
||||
C = new uint8_t[LEN12];
|
||||
for(uint32_t i=0;i<LEN12;i++) {
|
||||
A[i] = i;
|
||||
B[i] = 0;
|
||||
C[i] = 0;
|
||||
}
|
||||
hipMalloc((void**)&Ad, LEN12);
|
||||
hipMalloc((void**)&Bd, LEN12);
|
||||
hipMalloc((void**)&Cd, LEN12);
|
||||
hipMemcpy(Ad, A, LEN12, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(MemCpy12, dim3(2,1,1), dim3(2,1,1), 0, 0, Ad, Bd);
|
||||
hipLaunchKernel(MemSet12, dim3(2,1,1), dim3(2,1,1), 0, 0, Cd);
|
||||
hipMemcpy(B, Bd, LEN12, hipMemcpyDeviceToHost);
|
||||
hipMemcpy(C, Cd, LEN12, hipMemcpyDeviceToHost);
|
||||
for(uint32_t i=0;i<LEN12;i++) {
|
||||
assert(A[i] == B[i]);
|
||||
assert(C[i] == 1);
|
||||
}
|
||||
|
||||
delete A;
|
||||
delete B;
|
||||
delete C;
|
||||
}
|
||||
Reference in New Issue
Block a user