Merge branch 'master' into roc-1.6.x
Change-Id: I8c5861c83032c6006731595ec40e09fdc9102749
Este commit está contenido en:
+1
-1
@@ -8,7 +8,7 @@ hip-amdinternal
|
||||
HIP-Examples
|
||||
lib
|
||||
packages
|
||||
|
||||
build
|
||||
bin/hipInfo
|
||||
bin/hipBusBandwidth
|
||||
bin/hipDispatchLatency
|
||||
|
||||
+2
-10
@@ -141,9 +141,6 @@ add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER)
|
||||
#############################
|
||||
# Build steps
|
||||
#############################
|
||||
# Rebuild cmake cache updates .hipInfo and .hipVersion
|
||||
add_custom_target(update_build_and_version_info COMMAND make rebuild_cache)
|
||||
|
||||
# Build clang hipify if enabled
|
||||
add_subdirectory(hipify-clang)
|
||||
|
||||
@@ -179,7 +176,9 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
src/hip_peer.cpp
|
||||
src/hip_stream.cpp
|
||||
src/hip_module.cpp
|
||||
src/hip_db.cpp
|
||||
src/grid_launch.cpp
|
||||
src/hip_texture.cpp
|
||||
src/env.cpp)
|
||||
|
||||
set(SOURCE_FILES_DEVICE
|
||||
@@ -211,13 +210,6 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
endforeach()
|
||||
target_link_libraries(hip_hcc INTERFACE hcc::hccrt;hcc::hc_am)
|
||||
|
||||
# Generate hcc_version.txt
|
||||
add_custom_target(query_hcc_version COMMAND ${HCC_HOME}/bin/hcc --version > ${PROJECT_BINARY_DIR}/hcc_version.tmp)
|
||||
add_custom_target(check_hcc_version COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PROJECT_BINARY_DIR}/hcc_version.tmp ${PROJECT_BINARY_DIR}/hcc_version.txt DEPENDS query_hcc_version)
|
||||
set_source_files_properties(${PROJECT_BINARY_DIR}/hcc_version.txt PROPERTIES GENERATED TRUE)
|
||||
set_source_files_properties(${SOURCE_FILES_RUNTIME} ${SOURCE_FILES_DEVICE} PROPERTIES OBJECT_DEPENDS ${PROJECT_BINARY_DIR}/hcc_version.txt)
|
||||
add_dependencies(hip_hcc check_hcc_version update_build_and_version_info)
|
||||
|
||||
# Generate .hipInfo
|
||||
file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo})
|
||||
endif()
|
||||
|
||||
+4
-3
@@ -51,15 +51,16 @@ Run hipconfig (instructions below assume default installation path) :
|
||||
/opt/rocm/bin/hipconfig --full
|
||||
```
|
||||
|
||||
Compile and run the [square sample](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/tree/master/samples/0_Intro/square).
|
||||
Compile and run the [square sample](https://github.com/ROCm-Developer-Tools/HIP/tree/master/samples/0_Intro/square).
|
||||
|
||||
|
||||
# Building HIP from source
|
||||
HIP source code is available and the project can be built from source on the HCC platform.
|
||||
|
||||
1. Follow the above steps to install and validate the binary packages.
|
||||
2. Download HIP source code (from the [GitHub repot](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP).)
|
||||
3. Build and install HIP (This is the simple version assuming default paths ; see below for additional options.)
|
||||
2. Download HIP source code (from the [GitHub repot](https://github.com/ROCm-Developer-Tools/HIP).)
|
||||
3. Install HIP build-time dependencies using ```sudo apt-get install libelf-dev```.
|
||||
4. Build and install HIP (This is the simple version assuming default paths ; see below for additional options.)
|
||||
```
|
||||
cd HIP
|
||||
mkdir build
|
||||
|
||||
vendido
+466
@@ -0,0 +1,466 @@
|
||||
#!/usr/bin/env groovy
|
||||
|
||||
// Generated from snippet generator 'properties; set job properties'
|
||||
properties([buildDiscarder(logRotator(
|
||||
artifactDaysToKeepStr: '',
|
||||
artifactNumToKeepStr: '',
|
||||
daysToKeepStr: '',
|
||||
numToKeepStr: '10')),
|
||||
disableConcurrentBuilds(),
|
||||
parameters([booleanParam( name: 'push_image_to_docker_hub', defaultValue: false, description: 'Push hip & hcc image to rocm docker-hub' )]),
|
||||
[$class: 'CopyArtifactPermissionProperty', projectNames: '*']
|
||||
])
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// -- AUXILLARY HELPER FUNCTIONS
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Return build number of upstream job
|
||||
@NonCPS
|
||||
int get_upstream_build_num( )
|
||||
{
|
||||
def upstream_cause = currentBuild.rawBuild.getCause( hudson.model.Cause$UpstreamCause )
|
||||
if( upstream_cause == null)
|
||||
return 0
|
||||
|
||||
return upstream_cause.getUpstreamBuild()
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Return project name of upstream job
|
||||
@NonCPS
|
||||
String get_upstream_build_project( )
|
||||
{
|
||||
def upstream_cause = currentBuild.rawBuild.getCause( hudson.model.Cause$UpstreamCause )
|
||||
if( upstream_cause == null)
|
||||
return null
|
||||
|
||||
return upstream_cause.getUpstreamProject()
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Construct the relative path of the build directory
|
||||
String build_directory_rel( String build_config )
|
||||
{
|
||||
if( build_config.equalsIgnoreCase( 'release' ) )
|
||||
{
|
||||
return "build/release"
|
||||
}
|
||||
else
|
||||
{
|
||||
return "build/debug"
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Lots of images are created above; no apparent way to delete images:tags with docker global variable
|
||||
def docker_clean_images( String org, String image_name )
|
||||
{
|
||||
// Check if any images exist first grepping for image names
|
||||
int docker_images = sh( script: "docker images | grep \"${org}/${image_name}\"", returnStatus: true )
|
||||
|
||||
// The script returns a 0 for success (images were found )
|
||||
if( docker_images == 0 )
|
||||
{
|
||||
// run bash script to clean images:tags after successful pushing
|
||||
sh "docker images | grep \"${org}/${image_name}\" | awk '{print \$1 \":\" \$2}' | xargs docker rmi"
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// -- BUILD RELATED FUNCTIONS
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Checkout source code, source dependencies and update version number numbers
|
||||
// Returns a relative path to the directory where the source exists in the workspace
|
||||
String checkout_and_version( String platform )
|
||||
{
|
||||
String source_dir_rel = "src"
|
||||
String source_hip_rel = "${source_dir_rel}/hip"
|
||||
|
||||
stage("${platform} clone")
|
||||
{
|
||||
dir( "${source_hip_rel}" )
|
||||
{
|
||||
// checkout hip
|
||||
checkout([
|
||||
$class: 'GitSCM',
|
||||
branches: scm.branches,
|
||||
doGenerateSubmoduleConfigurations: scm.doGenerateSubmoduleConfigurations,
|
||||
extensions: scm.extensions + [[$class: 'CleanCheckout']],
|
||||
userRemoteConfigs: scm.userRemoteConfigs
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
return source_hip_rel
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// This creates the docker image that we use to build the project in
|
||||
// The docker images contains all dependencies, including OS platform, to build
|
||||
def docker_build_image( String platform, String org, String optional_build_parm, String source_hip_rel, String from_image )
|
||||
{
|
||||
String build_image_name = "build-ubuntu-16.04"
|
||||
String dockerfile_name = "dockerfile-build-ubuntu-16.04"
|
||||
def build_image = null
|
||||
|
||||
stage("${platform} build image")
|
||||
{
|
||||
dir("${source_hip_rel}")
|
||||
{
|
||||
def user_uid = sh( script: 'id -u', returnStdout: true ).trim()
|
||||
|
||||
// Docker 17.05 introduced the ability to use ARG values in FROM statements
|
||||
// Docker inspect failing on FROM statements with ARG https://issues.jenkins-ci.org/browse/JENKINS-44836
|
||||
// build_image = docker.build( "${org}/${build_image_name}:latest", "--pull -f docker/${dockerfile_name} --build-arg user_uid=${user_uid} --build-arg base_image=${from_image} ." )
|
||||
|
||||
// JENKINS-44836 workaround by using a bash script instead of docker.build()
|
||||
sh "docker build -t ${org}/${build_image_name}:latest -f docker/${dockerfile_name} ${optional_build_parm} --build-arg user_uid=${user_uid} --build-arg base_image=${from_image} ."
|
||||
build_image = docker.image( "${org}/${build_image_name}:latest" )
|
||||
}
|
||||
}
|
||||
|
||||
return build_image
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// This encapsulates the cmake configure, build and package commands
|
||||
// Leverages docker containers to encapsulate the build in a fixed environment
|
||||
def docker_build_inside_image( def build_image, String inside_args, String platform, String optional_configure, String build_config, String source_hip_rel, String build_dir_rel )
|
||||
{
|
||||
String source_hip_abs = pwd() + "/" + source_hip_rel
|
||||
|
||||
build_image.inside( inside_args )
|
||||
{
|
||||
stage("${platform} make ${build_config}")
|
||||
{
|
||||
// The rm command needs to run as sudo because the test steps below create files owned by root
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
sudo rm -rf ${build_dir_rel}
|
||||
mkdir -p ${build_dir_rel}
|
||||
cd ${build_dir_rel}
|
||||
cmake -DCMAKE_BUILD_TYPE=${build_config} -DCMAKE_INSTALL_PREFIX=staging ${optional_configure} ${source_hip_abs}
|
||||
make -j\$(nproc)
|
||||
"""
|
||||
}
|
||||
|
||||
// Cap the maximum amount of testing, in case of hangs
|
||||
timeout(time: 1, unit: 'HOURS')
|
||||
{
|
||||
stage("${platform} unit testing")
|
||||
{
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
cd ${build_dir_rel}
|
||||
make install -j\$(nproc)
|
||||
make build_tests -i -j\$(nproc)
|
||||
make test
|
||||
"""
|
||||
// If unit tests output a junit or xunit file in the future, jenkins can parse that file
|
||||
// to display test results on the dashboard
|
||||
// junit "${build_dir_rel}/*.xml"
|
||||
}
|
||||
}
|
||||
|
||||
// Only create packages from hcc based builds
|
||||
if( platform.toLowerCase( ).startsWith( 'hcc-' ) )
|
||||
{
|
||||
stage("${platform} packaging")
|
||||
{
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
cd ${build_dir_rel}
|
||||
make package
|
||||
"""
|
||||
|
||||
// No matter the base platform, all packages have the same name
|
||||
// Only upload 1 set of packages, so we don't have a race condition uploading packages
|
||||
if( platform.toLowerCase( ).startsWith( 'hcc-ctu' ) )
|
||||
{
|
||||
archiveArtifacts artifacts: "${build_dir_rel}/*.deb", fingerprint: true
|
||||
archiveArtifacts artifacts: "${build_dir_rel}/*.rpm", fingerprint: true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return void
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// This builds a fresh docker image FROM a clean base image, with no build dependencies included
|
||||
// Uploads the new docker image to internal artifactory
|
||||
String docker_upload_artifactory( String hcc_ver, String artifactory_org, String from_image, String source_hip_rel, String build_dir_rel )
|
||||
{
|
||||
def hip_install_image = null
|
||||
String image_name = "hip-${hcc_ver}-ubuntu-16.04"
|
||||
|
||||
stage( 'artifactory' )
|
||||
{
|
||||
println "artifactory_org: ${artifactory_org}"
|
||||
|
||||
// We copy the docker files into the bin directory where the .deb lives so that it's a clean build everytime
|
||||
sh "cp -r ${source_hip_rel}/docker/* ${build_dir_rel}"
|
||||
|
||||
// Docker 17.05 introduced the ability to use ARG values in FROM statements
|
||||
// Docker inspect failing on FROM statements with ARG https://issues.jenkins-ci.org/browse/JENKINS-44836
|
||||
// hip_install_image = docker.build( "${artifactory_org}/${image_name}:${env.BUILD_NUMBER}", "--pull -f ${build_dir_rel}/dockerfile-hip-ubuntu-16.04 --build-arg base_image=${from_image} ${build_dir_rel}" )
|
||||
|
||||
// JENKINS-44836 workaround by using a bash script instead of docker.build()
|
||||
sh "docker build -t ${artifactory_org}/${image_name} --pull -f ${build_dir_rel}/dockerfile-hip-ubuntu-16.04 --build-arg base_image=${from_image} ${build_dir_rel}"
|
||||
hip_install_image = docker.image( "${artifactory_org}/${image_name}" )
|
||||
|
||||
// The connection to artifactory can fail sometimes, but this should not be treated as a build fail
|
||||
try
|
||||
{
|
||||
// Don't push pull requests to artifactory, these tend to accumulate over time
|
||||
if( env.BRANCH_NAME.toLowerCase( ).startsWith( 'pr-' ) )
|
||||
{
|
||||
println 'Pull Request (PR-xxx) detected; NOT pushing to artifactory'
|
||||
}
|
||||
else
|
||||
{
|
||||
docker.withRegistry('http://compute-artifactory:5001', 'artifactory-cred' )
|
||||
{
|
||||
hip_install_image.push( "${env.BUILD_NUMBER}" )
|
||||
hip_install_image.push( 'latest' )
|
||||
}
|
||||
}
|
||||
}
|
||||
catch( err )
|
||||
{
|
||||
currentBuild.result = 'SUCCESS'
|
||||
}
|
||||
}
|
||||
|
||||
return image_name
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Uploads the new docker image to the public docker-hub
|
||||
def docker_upload_dockerhub( String local_org, String image_name, String remote_org )
|
||||
{
|
||||
stage( 'docker-hub' )
|
||||
{
|
||||
// Do not treat failures to push to docker-hub as a build fail
|
||||
try
|
||||
{
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
echo inside sh
|
||||
docker tag ${local_org}/${image_name} ${remote_org}/${image_name}
|
||||
"""
|
||||
|
||||
docker_hub_image = docker.image( "${remote_org}/${image_name}" )
|
||||
|
||||
docker.withRegistry('https://registry.hub.docker.com', 'docker-hub-cred' )
|
||||
{
|
||||
docker_hub_image.push( "${env.BUILD_NUMBER}" )
|
||||
docker_hub_image.push( 'latest' )
|
||||
}
|
||||
}
|
||||
catch( err )
|
||||
{
|
||||
currentBuild.result = 'SUCCESS'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// hcc_integration_testing
|
||||
// This function is sets up compilation and testing of HiP on a compiler downloaded from an upstream build
|
||||
// Integration testing is centered around docker and constructing clean test environments every time
|
||||
|
||||
// NOTES: I have implemeneted integration testing 3 different ways, and I've come to the conclusion nothing is perfect
|
||||
// 1. I've tried having HCC push the test compiler to artifactory, and having HiP download the test docker image from artifactory
|
||||
// a. The act of uploading and downloading images from artifactory takes minutes
|
||||
// b. There is no good way of deleting images from a repository. You have to use an arcane CURL command and I don't know how
|
||||
// to keep the password secret. These test integration images are meant to be ephemeral.
|
||||
// 2. I tried 'docker save' to export a docker image into a tarball, and transfering the image through 'copy artifacts plugin'
|
||||
// a. The HCC docker image uncompressed is over 1GB
|
||||
// b. Compressing the docker image takes even longer than uploading the image to artifactory
|
||||
// 3. Download the HCC .deb and dockerfile through 'copy artifacts plugin'. Create a new HCC image on the fly
|
||||
// a. There is inefficency in building a new ubuntu image and installing HCC twice (once in HCC build, once here)
|
||||
// b. This solution doesn't scale when we start testing downstream libraries
|
||||
|
||||
// I've implemented solution #3 above, probably transitioning to #2 down the line (probably without compression)
|
||||
String hcc_integration_testing( String inside_args, String job, String build_config )
|
||||
{
|
||||
// Attempt to make unique docker image names for each build, to support concurrent builds
|
||||
// Mangle docker org name with upstream build info
|
||||
String testing_org_name = 'hcc-test-' + get_upstream_build_project( ).replaceAll('/','-') + '-' + get_upstream_build_num( )
|
||||
|
||||
// Tag image name with this build number
|
||||
String hcc_test_image_name = "hcc:${env.BUILD_NUMBER}"
|
||||
|
||||
def hip_integration_image = null
|
||||
|
||||
dir( 'integration-testing' )
|
||||
{
|
||||
deleteDir( )
|
||||
|
||||
// This invokes 'copy artifact plugin' to copy archived files from upstream build
|
||||
step([$class: 'CopyArtifact', filter: 'archive/**/*.deb, docker/dockerfile-*',
|
||||
fingerprintArtifacts: true, projectName: get_upstream_build_project( ), flatten: true,
|
||||
selector: [$class: 'TriggeredBuildSelector', allowUpstreamDependencies: false, fallbackToLastSuccessful: false, upstreamFilterStrategy: 'UseGlobalSetting'],
|
||||
target: '.' ])
|
||||
// // The following 'copy artifact' is supposed to copy direct from workspace, but it doesn't seem to work across machines
|
||||
// step( [$class: 'CopyArtifact', filter: '**', fingerprintArtifacts: true, flatten: true,
|
||||
// projectName: "${params.upstream_hcc}", selector: [$class: 'WorkspaceSelector'], target: 'integration-testing'] )
|
||||
|
||||
docker.build( "${testing_org_name}/${hcc_test_image_name}", "-f dockerfile-hcc-lc-ubuntu-16.04 ." )
|
||||
}
|
||||
|
||||
// Checkout source code, dependencies and version files
|
||||
String source_hip_rel = checkout_and_version( job )
|
||||
|
||||
// Conctruct a binary directory path based on build config
|
||||
String build_hip_rel = build_directory_rel( build_config );
|
||||
|
||||
// Build hip inside of the build environment
|
||||
hip_integration_image = docker_build_image( job, testing_org_name, '', source_hip_rel, "${testing_org_name}/${hcc_test_image_name}" )
|
||||
|
||||
docker_build_inside_image( hip_integration_image, inside_args, job, '', build_config, source_hip_rel, build_hip_rel )
|
||||
|
||||
docker_clean_images( testing_org_name, '*' )
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// -- MAIN
|
||||
// Following this line is the start of MAIN of this Jenkinsfile
|
||||
String build_config = 'Release'
|
||||
String job_name = env.JOB_NAME.toLowerCase( )
|
||||
|
||||
// Integration testing is a special path which implies testing of an upsteam build of hcc,
|
||||
// but does not need testing across older builds of hcc or cuda. This is more of a compiler
|
||||
// hcc unit test
|
||||
// params.hcc_integration_test is set in HCC build
|
||||
if( params.hcc_integration_test )
|
||||
{
|
||||
println "HCC integration testing"
|
||||
|
||||
node('docker && rocm')
|
||||
{
|
||||
hcc_integration_testing( '--device=/dev/kfd', 'hcc-ctu', build_config )
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// The following launches 3 builds in parallel: hcc-ctu, hcc-1.6 and cuda
|
||||
parallel hcc_ctu:
|
||||
{
|
||||
node('docker && rocm')
|
||||
{
|
||||
String hcc_ver = 'hcc-ctu'
|
||||
String from_image = 'compute-artifactory:5001/radeonopencompute/hcc/clang_tot_upgrade/hcc-lc-ubuntu-16.04:latest'
|
||||
String inside_args = '--device=/dev/kfd'
|
||||
|
||||
// Checkout source code, dependencies and version files
|
||||
String source_hip_rel = checkout_and_version( hcc_ver )
|
||||
|
||||
// Create/reuse a docker image that represents the hip build environment
|
||||
def hip_build_image = docker_build_image( hcc_ver, 'hip', ' --pull', source_hip_rel, from_image )
|
||||
|
||||
// Print system information for the log
|
||||
hip_build_image.inside( inside_args )
|
||||
{
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
/opt/rocm/bin/rocm_agent_enumerator -t ALL
|
||||
/opt/rocm/bin/hcc --version
|
||||
"""
|
||||
}
|
||||
|
||||
// Conctruct a binary directory path based on build config
|
||||
String build_hip_rel = build_directory_rel( build_config );
|
||||
|
||||
// Build hip inside of the build environment
|
||||
docker_build_inside_image( hip_build_image, inside_args, hcc_ver, '', build_config, source_hip_rel, build_hip_rel )
|
||||
|
||||
// After a successful build, upload a docker image of the results
|
||||
String hip_image_name = docker_upload_artifactory( hcc_ver, job_name, from_image, source_hip_rel, build_hip_rel )
|
||||
|
||||
if( params.push_image_to_docker_hub )
|
||||
{
|
||||
docker_upload_dockerhub( job_name, hip_image_name, 'rocm' )
|
||||
docker_clean_images( 'rocm', hip_image_name )
|
||||
}
|
||||
docker_clean_images( job_name, hip_image_name )
|
||||
}
|
||||
},
|
||||
hcc_1_6:
|
||||
{
|
||||
node('docker && rocm')
|
||||
{
|
||||
String hcc_ver = 'hcc-1.6'
|
||||
String from_image = 'compute-artifactory:5001/radeonopencompute/hcc/roc-1.6.x/hcc-lc-ubuntu-16.04:latest'
|
||||
String inside_args = '--device=/dev/kfd'
|
||||
|
||||
// Checkout source code, dependencies and version files
|
||||
String source_hip_rel = checkout_and_version( hcc_ver )
|
||||
|
||||
// Create/reuse a docker image that represents the hip build environment
|
||||
def hip_build_image = docker_build_image( hcc_ver, 'hip', ' --pull', source_hip_rel, from_image )
|
||||
|
||||
// Print system information for the log
|
||||
hip_build_image.inside( inside_args )
|
||||
{
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
/opt/rocm/bin/rocm_agent_enumerator -t ALL
|
||||
/opt/rocm/bin/hcc --version
|
||||
"""
|
||||
}
|
||||
|
||||
// Conctruct a binary directory path based on build config
|
||||
String build_hip_rel = build_directory_rel( build_config );
|
||||
|
||||
// Build hip inside of the build environment
|
||||
docker_build_inside_image( hip_build_image, inside_args, hcc_ver, '', build_config, source_hip_rel, build_hip_rel )
|
||||
|
||||
// Not pushing hip-hcc-1.6 builds at this time; saves a minute and nobody needs?
|
||||
// String hip_image_name = docker_upload_artifactory( hcc_ver, job_name, from_image, source_hip_rel, build_hip_rel )
|
||||
// docker_clean_images( job_name, hip_image_name )
|
||||
}
|
||||
},
|
||||
nvcc:
|
||||
{
|
||||
node('docker && cuda')
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Block of string constants customizing behavior for cuda
|
||||
String nvcc_ver = 'nvcc-8.0'
|
||||
String from_image = 'nvidia/cuda:8.0-devel'
|
||||
|
||||
// This unfortunately hardcodes the driver version nvidia_driver_375.74 in the volume mount. Research if a way
|
||||
// exists to get volume driver to customize the volume names to leave out driver version
|
||||
String inside_args = '''--device=/dev/nvidiactl --device=/dev/nvidia0 --device=/dev/nvidia-uvm --device=/dev/nvidia-uvm-tools
|
||||
--volume-driver=nvidia-docker --volume=nvidia_driver_375.74:/usr/local/nvidia:ro''';
|
||||
|
||||
// Checkout source code, dependencies and version files
|
||||
String source_hip_rel = checkout_and_version( nvcc_ver )
|
||||
|
||||
// We pull public nvidia images
|
||||
def hip_build_image = docker_build_image( nvcc_ver, 'hip', ' --pull', source_hip_rel, from_image )
|
||||
|
||||
// Print system information for the log
|
||||
hip_build_image.inside( inside_args )
|
||||
{
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
nvidia-smi
|
||||
nvcc --version
|
||||
"""
|
||||
}
|
||||
|
||||
// Conctruct a binary directory path based on build config
|
||||
String build_hip_rel = build_directory_rel( build_config );
|
||||
|
||||
// Build hip inside of the build environment
|
||||
docker_build_inside_image( hip_build_image, inside_args, nvcc_ver, "-DHIP_NVCC_FLAGS=--Wno-deprecated-gpu-targets", build_config, source_hip_rel, build_hip_rel )
|
||||
}
|
||||
}
|
||||
+5
-4
@@ -29,9 +29,10 @@ HIP releases are typically of two types. The tag naming convention is different
|
||||
- [Installation](INSTALL.md)
|
||||
- [HIP FAQ](docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Porting Driver Guide](docs/markdown/hip_porting_driver_api.md)
|
||||
- [HIP Programming Guide](docs/markdown/hip_programming_guide.md)
|
||||
- [HIP Profiling ](docs/markdown/hip_profiling.md)
|
||||
- [HIP Debugging](docs/markdown/hip_debugging.md)
|
||||
- [HIP Terminology](docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
@@ -122,8 +123,8 @@ make
|
||||
|
||||
|
||||
## More Examples
|
||||
The GitHub repository [HIP-Examples](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP-Examples.git) contains a hipified version of the popular Rodinia benchmark suite.
|
||||
The README with the procedures and tips the team used during this porting effort is here: [Rodinia Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP-Examples/blob/master/rodinia_3.0/hip/README.hip_porting)
|
||||
The GitHub repository [HIP-Examples](https://github.com/ROCm-Developer-Tools/HIP-Examples.git) contains a hipified version of the popular Rodinia benchmark suite.
|
||||
The README with the procedures and tips the team used during this porting effort is here: [Rodinia Porting Guide](https://github.com/ROCm-Developer-Tools/HIP-Examples/blob/master/rodinia_3.0/hip/README.hip_porting)
|
||||
|
||||
## Tour of the HIP Directories
|
||||
* **include**:
|
||||
@@ -141,6 +142,6 @@ The README with the procedures and tips the team used during this porting effort
|
||||
* **doc**: Documentation - markdown and doxygen info
|
||||
|
||||
## Reporting an issue
|
||||
Use the [GitHub issue tracker] (https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/issues).
|
||||
Use the [GitHub issue tracker](https://github.com/ROCm-Developer-Tools/HIP/issues).
|
||||
If reporting a bug, include the output of "hipconfig --full" and samples/1_hipInfo/hipInfo (if possible).
|
||||
|
||||
|
||||
+15
-2
@@ -1,12 +1,25 @@
|
||||
# Release notes
|
||||
|
||||
We have attempted to document known bugs and limitations - in particular the [HIP Kernel Language](docs/markdown/hip_kernel_language.md) document uses the phrase "Under Development", and the [HIP Runtime API bug list](http://gpuopen-professionalcompute-tools.github.io/HIP/bug.html) lists known bugs.
|
||||
We have attempted to document known bugs and limitations - in particular the [HIP Kernel Language](docs/markdown/hip_kernel_language.md) document uses the phrase "Under Development", and the [HIP Runtime API bug list](http://rocm-developer-tools.github.io/HIP/bug.html) lists known bugs.
|
||||
|
||||
|
||||
===================================================================================================
|
||||
|
||||
|
||||
## Revision History:
|
||||
|
||||
===================================================================================================
|
||||
Release: 1.3
|
||||
Date: 2017.08.16
|
||||
- hipcc now auto-detects amdgcn arch. No need to specify the arch when building for same system.
|
||||
- HIP texture support
|
||||
- Implemented __threadfence_support
|
||||
- Improvements in HIP context management logic
|
||||
- Bug fixes in several APIs including hipDeviceGetPCIBusId, hipEventDestroy, hipMemcpy2DAsync
|
||||
- Updates to hipify-clang and documentation
|
||||
- HIP development now fully open and on GitHub. Developers should submit pull requests.
|
||||
|
||||
|
||||
===================================================================================================
|
||||
Release: 1.2
|
||||
Date: 2017.06.29
|
||||
@@ -113,7 +126,7 @@ Date: 2016.06.06
|
||||
- Add cross-linking support between G++ and HCC, in particular for interfaces that use
|
||||
standard C++ libraries (ie std::vectors, std::strings). HIPCC now uses libstdc++ by default on the HCC
|
||||
compilation path.
|
||||
- More samples including gpu-burn, SHOC, nbody, rtm. See [HIP-Examples](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP-Examples)
|
||||
- More samples including gpu-burn, SHOC, nbody, rtm. See [HIP-Examples](https://github.com/ROCm-Developer-Tools/HIP-Examples)
|
||||
|
||||
|
||||
===================================================================================================
|
||||
|
||||
+80
-56
@@ -47,7 +47,7 @@ sub parse_config_file {
|
||||
}
|
||||
|
||||
$verbose = $ENV{'HIPCC_VERBOSE'} // 0;
|
||||
# Verbose: 0x1=commands, 0x2=paths, 0x4=hippc args
|
||||
# Verbose: 0x1=commands, 0x2=paths, 0x4=hipcc args
|
||||
|
||||
$HIP_PATH=$ENV{'HIP_PATH'} // dirname (dirname $0); # use parent directory of hipcc
|
||||
|
||||
@@ -75,6 +75,7 @@ $target_gfx801 = 0;
|
||||
$target_gfx802 = 0;
|
||||
$target_gfx803 = 0;
|
||||
$target_gfx900 = 0;
|
||||
$default_amdgpu_target = 1;
|
||||
|
||||
if ($HIP_PLATFORM eq "hcc") {
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // "/opt/rocm/hsa";
|
||||
@@ -103,31 +104,9 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
|
||||
$HIPLDFLAGS = `${HCC_HOME}/bin/hcc-config --ldflags`;
|
||||
|
||||
$ROCM_AGENT_ENUM = "${ROCM_PATH}/bin/rocm_agent_enumerator";
|
||||
|
||||
my $myAgents = `${ROCM_AGENT_ENUM} -t GPU`;
|
||||
my @agentsLine = split('\n', $myAgents);
|
||||
|
||||
foreach my $val (@agentsLine) {
|
||||
if($val eq "gfx701") {
|
||||
$target_gfx701 = 1;
|
||||
}
|
||||
if($val eq "gfx801") {
|
||||
$target_gfx801 = 1;
|
||||
}
|
||||
if($val eq "gfx802") {
|
||||
$target_gfx802 = 1;
|
||||
}
|
||||
if($val eq "gfx803") {
|
||||
$target_gfx803 = 1;
|
||||
}
|
||||
if($val eq "gfx900") {
|
||||
$target_gfx900 = 1;
|
||||
}
|
||||
}
|
||||
|
||||
#### GCC system includes workaround ####
|
||||
$HCC_WA_FLAGS = " ";
|
||||
$HOST_OSNAME= `cat /etc/os-release | grep "^ID\=" | cut -d= -f2 | tr -d '\n'`;
|
||||
if ($HCC_VERSION_MAJOR eq 1) {
|
||||
my $GCC_CUR_VER = `gcc -dumpversion`;
|
||||
my $GPP_CUR_VER = `g++ -dumpversion`;
|
||||
@@ -138,7 +117,8 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
|
||||
# Only include the libstdc++ headers and libraries flags explicitly if the g++ is older than version 5.
|
||||
# That's because HCC already uses libstdc++ by default if a newer g++/libstdc++ is available
|
||||
if (${GCC_CUR_VER} eq ${GPP_CUR_VER} and $GPP_VER_FIELDS[0] < 5) {
|
||||
# Cent OS 7 and RHEL 7.4 cannot use libstdc++ for compilation, default to libc++
|
||||
if (${GCC_CUR_VER} eq ${GPP_CUR_VER} and $GPP_VER_FIELDS[0] < 5 and ($HOST_OSNAME ne "\"centos\"") and ($HOST_OSNAME ne "\"rhel\"")) {
|
||||
$HCC_WA_FLAGS .= " -stdlib=libstdc++ -I/usr/include/x86_64-linux-gnu -I/usr/include/x86_64-linux-gnu/c++/${GCC_CUR_VER} -I/usr/include/c++/${GCC_CUR_VER} ";
|
||||
# Add C++ libs for GCC.
|
||||
$HIPLDFLAGS .= " -lstdc++";
|
||||
@@ -146,7 +126,6 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
}
|
||||
|
||||
# Force -stdlib=libc++ on UB14.04
|
||||
$HOST_OSNAME= `cat /etc/os-release | grep "^ID\=" | cut -d= -f2 | tr -d '\n'`;
|
||||
$HOST_OSVER= `cat /etc/os-release | grep "^VERSION_ID\=" | cut -d= -f2 | tr -d '\n'`;
|
||||
if ($HOST_OSNAME eq "ubuntu" and $HOST_OSVER eq "\"14.04\"") {
|
||||
$HIPCXXFLAGS .= " -stdlib=libc++";
|
||||
@@ -278,25 +257,32 @@ foreach $arg (@ARGV)
|
||||
$HIPCXXFLAGS .= " -stdlib=libc++";
|
||||
$setStdLib = 1;
|
||||
}
|
||||
|
||||
# TODO: Add support for comma separated list like HCC_AMDGPU_TARGET
|
||||
if($arg eq '--amdgpu-target=gfx701')
|
||||
{
|
||||
$target_gfx701 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($arg eq '--amdgpu-target=gfx801')
|
||||
{
|
||||
$target_gfx801 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($arg eq '--amdgpu-target=gfx802')
|
||||
{
|
||||
$target_gfx802 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($arg eq '--amdgpu-target=gfx803')
|
||||
{
|
||||
$target_gfx803 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($arg eq '--amdgpu-target=gfx900')
|
||||
{
|
||||
$target_gfx900 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
|
||||
if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0))
|
||||
@@ -358,38 +344,76 @@ foreach $arg (@ARGV)
|
||||
}
|
||||
$toolArgs .= " $arg" unless $swallowArg;
|
||||
}
|
||||
if(defined $ENV{HCC_AMDGPU_TARGET})
|
||||
{
|
||||
foreach my $target (split(/,/, $ENV{HCC_AMDGPU_TARGET}))
|
||||
{
|
||||
if($target eq 'gfx701')
|
||||
{
|
||||
$target_gfx701 = 1;
|
||||
}
|
||||
if($target eq 'gfx801')
|
||||
{
|
||||
$target_gfx801 = 1;
|
||||
}
|
||||
if($target eq 'gfx802')
|
||||
{
|
||||
$target_gfx802 = 1;
|
||||
}
|
||||
if($target eq 'gfx803')
|
||||
{
|
||||
$target_gfx803 = 1;
|
||||
}
|
||||
if($target eq 'gfx900')
|
||||
{
|
||||
$target_gfx900 = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($target_gfx701 eq 0 and $target_gfx801 eq 0 and $target_gfx802 eq 0 and $target_gfx803 eq 0 and $target_gfx900 eq 0)
|
||||
{
|
||||
$target_gfx803 = 1;
|
||||
}
|
||||
|
||||
if($HIP_PLATFORM eq "hcc"){
|
||||
# No AMDGPU target specified at commandline. So look for HCC_AMDGPU_TARGET
|
||||
if($default_amdgpu_target eq 1 and defined $ENV{HCC_AMDGPU_TARGET})
|
||||
{
|
||||
foreach my $target (split(/,/, $ENV{HCC_AMDGPU_TARGET}))
|
||||
{
|
||||
if($target eq 'gfx701')
|
||||
{
|
||||
$target_gfx701 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($target eq 'gfx801')
|
||||
{
|
||||
$target_gfx801 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($target eq 'gfx802')
|
||||
{
|
||||
$target_gfx802 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($target eq 'gfx803')
|
||||
{
|
||||
$target_gfx803 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($target eq 'gfx900')
|
||||
{
|
||||
$target_gfx900 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
# Else try using rocm_agent_enumerator
|
||||
if($default_amdgpu_target eq 1)
|
||||
{
|
||||
$ROCM_AGENT_ENUM = "${ROCM_PATH}/bin/rocm_agent_enumerator";
|
||||
|
||||
my $myAgents = `${ROCM_AGENT_ENUM} -t GPU`;
|
||||
my @agentsLine = split('\n', $myAgents);
|
||||
|
||||
foreach my $val (@agentsLine) {
|
||||
if($val eq "gfx701") {
|
||||
$target_gfx701 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($val eq "gfx801") {
|
||||
$target_gfx801 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($val eq "gfx802") {
|
||||
$target_gfx802 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($val eq "gfx803") {
|
||||
$target_gfx803 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
if($val eq "gfx900") {
|
||||
$target_gfx900 = 1;
|
||||
$default_amdgpu_target = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
# rocm_agent_enumerator failed! Throw an error and die if linking is required
|
||||
if ($default_amdgpu_target eq 1 and $compileOnly eq 0)
|
||||
{
|
||||
print "No valid AMD GPU target was either specified or found. Please specify a valid target using --amdgpu-target=" and die();
|
||||
}
|
||||
|
||||
$ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n";
|
||||
|
||||
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
$HIP_BASE_VERSION_MAJOR = "1";
|
||||
$HIP_BASE_VERSION_MINOR = "2";
|
||||
$HIP_BASE_VERSION_MINOR = "3";
|
||||
|
||||
# Need perl > 5.10 to use logic-defined or
|
||||
use 5.006; use v5.10.1;
|
||||
|
||||
+12
-7
@@ -247,6 +247,7 @@ while (@ARGV) {
|
||||
$ft{'err'} += s/\bcudaErrorInvalidValue\b/hipErrorInvalidValue/g;
|
||||
$ft{'err'} += s/\bcudaErrorInvalidResourceHandle\b/hipErrorInvalidResourceHandle/g;
|
||||
$ft{'err'} += s/\bcudaErrorInvalidDevice\b/hipErrorInvalidDevice/g;
|
||||
$ft{'err'} += s/\bcudaErrorInvalidDevicePointer\b/hipErrorInvalidDevicePointer/g;
|
||||
$ft{'err'} += s/\bcudaErrorNoDevice\b/hipErrorNoDevice/g;
|
||||
$ft{'err'} += s/\bcudaErrorNotReady\b/hipErrorNotReady/g;
|
||||
$ft{'err'} += s/\bcudaErrorUnknown\b/hipErrorUnknown/g;
|
||||
@@ -280,9 +281,11 @@ while (@ARGV) {
|
||||
$ft{'mem'} += s/\bcudaMemcpyKind\b/hipMemcpyKind/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaPointerAttributes\b/hipPointerAttribute_t/g;
|
||||
$ft{'mem'} += s/\bcudaPointerGetAttributes\b/hipPointerGetAttribute_t/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaMemcpy2D\b/hipMemcpy2D/g;
|
||||
$ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g;
|
||||
$ft{'mem'} += s/\bcudaMemcpyToArray\b/hipMemcpyToArray/g;
|
||||
|
||||
#--------
|
||||
# Memory management:
|
||||
@@ -302,6 +305,7 @@ while (@ARGV) {
|
||||
$ft{'mem'} += s/\bcudaHostGetDevicePointer\b/hipHostGetDevicePointer/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaMallocArray\b/hipMallocArray/g;
|
||||
$ft{'mem'} += s/\bcudaFreeArray\b/hipFreeArray/g;
|
||||
$ft{'mem'} += s/\bcudaMallocPitch\b/hipMallocPitch/g;
|
||||
|
||||
|
||||
@@ -323,9 +327,6 @@ while (@ARGV) {
|
||||
$ft{'coord_func'} += s/\bgridDim\.y\b/hipGridDim_y/g;
|
||||
$ft{'coord_func'} += s/\bgridDim\.z\b/hipGridDim_z/g;
|
||||
|
||||
# hack to avoid replacing hipDeviceProp.warpSize call
|
||||
$ft{'special_func'} += s/([^.])\bwarpSize\b/$1hipWarpSize/g;
|
||||
|
||||
|
||||
#--------
|
||||
# Events
|
||||
@@ -337,6 +338,7 @@ while (@ARGV) {
|
||||
$ft{'event'} += s/\bcudaEventElapsedTime\b/hipEventElapsedTime/g;
|
||||
$ft{'event'} += s/\bcudaEventSynchronize\b/hipEventSynchronize/g;
|
||||
$ft{'event'} += s/\bcudaEventDisableTiming\b/hipEventDisableTiming/g;
|
||||
$ft{'event'} += s/\bcudaEventQuery\b/hipEventQuery/g;
|
||||
|
||||
#--------
|
||||
# Streams
|
||||
@@ -487,15 +489,15 @@ while (@ARGV) {
|
||||
my $kernelName;
|
||||
|
||||
# Handle the <<numBlocks, blockDim, sharedSize, stream>>> syntax:
|
||||
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), $5, $6, /g;
|
||||
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernelGGL(($1$2), dim3($3), dim3($4), $5, $6, /g;
|
||||
$kernelName = $1 if $k;
|
||||
|
||||
# Handle the <<numBlocks, blockDim, sharedSize>>> syntax:
|
||||
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), $5, 0, /g;
|
||||
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernelGGL(($1$2), dim3($3), dim3($4), $5, 0, /g;
|
||||
$kernelName = $1 if $k;
|
||||
|
||||
# Handle the <<numBlocks, blockDim>>> syntax:
|
||||
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>([\s\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), 0, 0, /g;
|
||||
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>([\s\\]*)\(/hipLaunchKernelGGL(($1$2), dim3($3), dim3($4), 0, 0, /g;
|
||||
$kernelName = $1 if $k;
|
||||
|
||||
$ft{'kern'} += $k;
|
||||
@@ -511,11 +513,14 @@ while (@ARGV) {
|
||||
$ft{'tex'} += s/\bcudaFilterModePoint\b/hipFilterModePoint/g;
|
||||
$ft{'tex'} += s/\bcudaReadModeElementType\b/hipReadModeElementType/g;
|
||||
|
||||
$ft{'tex'} += s/\bcudaArray\b/hipArrary/g;
|
||||
$ft{'tex'} += s/\bcudaArray\b/hipArray/g;
|
||||
$ft{'tex'} += s/\bcudaCreateChannelDesc\b/hipCreateChannelDesc/g;
|
||||
$ft{'tex'} += s/\bcudaBindTexture\b/hipBindTexture/g;
|
||||
$ft{'tex'} += s/\bcudaBindTextureToArray\b/hipBindTextureToArray/g;
|
||||
$ft{'tex'} += s/\bcudaUnbindTexture\b/hipUnbindTexture/g;
|
||||
$ft{'tex'} += s/\bcudaChannelFormatKindFloat\b/hipChannelFormatKindFloat/g;
|
||||
$ft{'tex'} += s/\bcudaAddressMode/hipAddressMode/g;
|
||||
$ft{'tex'} += s/\bcudaFilterMode/hipFilterMode/g;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -520,7 +520,7 @@ macro(HIP_ADD_EXECUTABLE hip_target)
|
||||
if("x${HCC_HOME}" STREQUAL "x")
|
||||
set(HCC_HOME "/opt/rocm/hcc")
|
||||
endif()
|
||||
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET>")
|
||||
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
|
||||
add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources})
|
||||
set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP)
|
||||
endmacro()
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
# Parameters related to building hip
|
||||
ARG base_image
|
||||
|
||||
FROM ${base_image}
|
||||
MAINTAINER Kent Knox <kent.knox@amd>
|
||||
|
||||
ARG user_uid
|
||||
|
||||
# Install Packages
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
sudo \
|
||||
build-essential \
|
||||
cmake \
|
||||
git \
|
||||
libelf-dev \
|
||||
rpm \
|
||||
&& \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# docker pipeline runs containers with particular uid
|
||||
# create a jenkins user with this specific uid so it can use sudo priviledges
|
||||
# Grant any member of sudo group password-less sudo privileges
|
||||
RUN useradd --create-home -u ${user_uid} -G sudo --shell /bin/bash jenkins && \
|
||||
mkdir -p /etc/sudoers.d/ && \
|
||||
echo '%sudo ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd
|
||||
@@ -0,0 +1,19 @@
|
||||
# Parameters related to building hip
|
||||
ARG base_image
|
||||
|
||||
FROM ${base_image}
|
||||
MAINTAINER Kent Knox <kent.knox@amd>
|
||||
|
||||
# Copy the debian package of hip into the container from host
|
||||
COPY *.deb /tmp/
|
||||
|
||||
# Install the debian package
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y curl \
|
||||
&& apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends --allow-unauthenticated -y \
|
||||
/tmp/hip_base-*.deb \
|
||||
/tmp/hip_hcc-*.deb \
|
||||
/tmp/hip_doc-*.deb \
|
||||
/tmp/hip_samples-* \
|
||||
&& rm -f /tmp/*.deb \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
La diferencia del archivo ha sido suprimido porque es demasiado grande
Cargar Diff
La diferencia del archivo ha sido suprimido porque es demasiado grande
Cargar Diff
+14
-19
@@ -48,31 +48,24 @@ The HIP API documentation describes each API and its limitations, if any, compar
|
||||
|
||||
### What is not supported?
|
||||
#### Runtime/Driver API features
|
||||
At a high-level, the following features are not supported:
|
||||
- Textures
|
||||
)t a high-level, the following features are not supported:
|
||||
- Textures (partial support available)
|
||||
- Dynamic parallelism (CUDA 5.0)
|
||||
- Managed memory (CUDA 6.5)
|
||||
- Graphics interoperability with OpenGL or Direct3D
|
||||
- CUDA Driver API
|
||||
- CUDA IPC Functions (Under Development)
|
||||
- CUDA array, mipmappedArray and pitched memory
|
||||
- MemcpyToSymbol functions
|
||||
- Queue priority controls
|
||||
|
||||
See the [API Support Table](CUDA_Runtime_API_functions_supported_by_HIP.md) for more detailed information.
|
||||
|
||||
#### Kernel language features
|
||||
- Device-side dynamic memory allocations (malloc, free, new, delete) (CUDA 4.0)
|
||||
- C++-style device-side dynamic memory allocations (free, new, delete) (CUDA 4.0)
|
||||
- Virtual functions, indirect functions and try/catch (CUDA 4.0)
|
||||
- `__prof_trigger`
|
||||
- PTX assembly (CUDA 4.0). HCC supports inline GCN assembly.
|
||||
- Several kernel features are under development. See the [HIP Kernel Language](hip_kernel_language.md) for more information. These include:
|
||||
- printf
|
||||
- assert
|
||||
- `__restrict__`
|
||||
- `__threadfence*_`, `__syncthreads*`
|
||||
- Unbounded loop unroll
|
||||
|
||||
|
||||
|
||||
### Is HIP a drop-in replacement for CUDA?
|
||||
@@ -101,18 +94,20 @@ However, we can provide a rough summary of the features included in each CUDA SD
|
||||
- Per-thread-streams (under development)
|
||||
- C++11 (HCC supports all of C++11, all of C++14 and some C++17 features)
|
||||
- CUDA 7.5
|
||||
- float16
|
||||
- float16 (supported)
|
||||
- CUDA 8.0
|
||||
- TBD.
|
||||
- Page Migration including cudaMemAdvise, cudaMemPrefetch, other cudaMem* APIs(not supported)
|
||||
|
||||
|
||||
### What libraries does HIP support?
|
||||
HIP includes growing support for the 4 key math libraries using hcBlas, hcFft, hcrng and hcsparse.
|
||||
These offer pointer-based memory interfaces (as opposed to opaque buffers) and can be easily interfaced with other HCC applications. Developers should use conditional compilation if portability to nvcc systems is desired - using calls to cu* routines on one path and hc* routines on the other.
|
||||
HIP includes growing support for the 4 key math libraries using hcBlas, hcFft, hcrng and hcsparse, as well as MIOpen for machine intelligence applications.
|
||||
These offer pointer-based memory interfaces (as opposed to opaque buffers) and can be easily interfaced with other HIP applications.
|
||||
The hip interfaces support both ROCm and CUDA paths, with familiar library interfaces.
|
||||
|
||||
- [hcblas](https://bitbucket.org/multicoreware/hcblas)
|
||||
- [hcfft](https://bitbucket.org/multicoreware/hcfft)
|
||||
- [hcsparse](https://bitbucket.org/multicoreware/hcsparse)
|
||||
- [hcrng](https://bitbucket.org/multicoreware/hcrng)
|
||||
- [hipBlas](https://github.com/ROCmSoftwarePlatform/hipBLAS), which utilizes [rocBlas](https://github.com/ROCmSoftwarePlatform/rocBLAS).
|
||||
- [hipfft](https://github.com/ROCmSoftwarePlatform/hcFFT)
|
||||
- [hipsparse](https://github.com/ROCmSoftwarePlatform/hcSPARSE)
|
||||
- [hiprng](https://github.com/ROCmSoftwarePlatform/hcrng)
|
||||
|
||||
Additionally, some of the cublas routines are automatically converted to hipblas equivalents by the hipify-clang tool. These APIs use cublas or hcblas depending on the platform, and replace the need
|
||||
to use conditional compilation.
|
||||
@@ -219,7 +214,7 @@ If platform portability is important, use #ifdef __HIP_PLATFORM_NVCC__ to guard
|
||||
### On HCC, can I use HC functionality with HIP?
|
||||
Yes.
|
||||
The code can include hc.hpp and use HC functions inside the kernel. A typical use-case is to use AMD-specific hardware features such as the permute, swizzle, or DPP operations.
|
||||
The "-stdlib=libc++" must be passed to hipcc in order to compile hc.hpp. See the 'bit_extract' sample for an example.
|
||||
See the 'bit_extract' sample for an example.
|
||||
|
||||
Also these functions can be used to extract HCC accelerator and accelerator_view structures from the HIP deviceId and hipStream_t:
|
||||
hipHccGetAccelerator(int deviceId, hc::accelerator *acc);
|
||||
|
||||
@@ -167,7 +167,7 @@ The `__shared__` keyword is supported.
|
||||
Managed memory, including the `__managed__` keyword, are not supported in HIP.
|
||||
|
||||
### `__restrict__`
|
||||
The `__restrict__` keyword tells the compiler that the associated memory pointer will not alias with any other pointer in the kernel or function. This feature can help the compiler generate better code. In most cases, all pointer arguments must use this keyword to realize the benefit. hcc support for the `__restrict__` qualifier on kernel arguments is under development.
|
||||
The `__restrict__` keyword tells the compiler that the associated memory pointer will not alias with any other pointer in the kernel or function. This feature can help the compiler generate better code. In most cases, all pointer arguments must use this keyword to realize the benefit.
|
||||
|
||||
|
||||
## Built-In Variables
|
||||
@@ -603,6 +603,7 @@ The Cuda `__prof_trigger()` instruction is not supported.
|
||||
## Assert
|
||||
|
||||
The assert function is under development.
|
||||
HIP does support an "abort" call which will terminate the process execution from inside the kernel.
|
||||
|
||||
## Printf
|
||||
|
||||
@@ -690,7 +691,6 @@ for (int i=0; i<16; i++) ...
|
||||
```
|
||||
|
||||
|
||||
Unbounded loop unroll is under development on HCC compiler.
|
||||
```
|
||||
#pragma unroll /* hint to compiler to completely unroll next loop. */
|
||||
for (int i=0; i<16; i++) ...
|
||||
@@ -699,8 +699,18 @@ for (int i=0; i<16; i++) ...
|
||||
|
||||
## In-Line Assembly
|
||||
|
||||
In-line assembly, including in-line PTX, in-line HSAIL and in-line GCN ISA, is not supported. Users who need these features should employ conditional compilation to provide different functionally equivalent implementations on each target platform.
|
||||
GCN ISA In-line assembly, is supported. For example:
|
||||
|
||||
```
|
||||
asm volatile ("v_mac_f32_e32 %0, %2, %3" : "=v" (out[i]) : "0"(out[i]), "v" (a), "v" (in[i]));
|
||||
```
|
||||
|
||||
We insert the GCN isa into the kernel using `asm()` Assembler statement.
|
||||
`volatile` keyword is used so that the optimizers must not change the number of volatile operations or change their order of execution relative to other volatile operations.
|
||||
`v_mac_f32_e32` is the GCN instruction, for more information please refer - [AMD GCN3 ISA architecture manual](http://gpuopen.com/compute-product/amd-gcn3-isa-architecture-manual/)
|
||||
Index for the respective operand in the ordered fashion is provided by `%` followed by position in the list of operands
|
||||
`"v"` is the constraint code (for target-specific AMDGPU) for 32-bit VGPR register, for more info please refer - [Supported Constraint Code List for AMDGPU](https://llvm.org/docs/LangRef.html#supported-constraint-code-list)
|
||||
Output Constraints are specified by an `"="` prefix as shown above ("=v"). This indicate that assemby will write to this operand, and the operand will then be made available as a return value of the asm expression. Input constraints do not have a prefix - just the constraint code. The constraint string of `"0"` says to use the assigned register for output as an input as well (it being the 0'th constraint).
|
||||
|
||||
## C++ Support
|
||||
The following C++ features are not supported:
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
# HIP Performance Optimizations
|
||||
|
||||
Please note that this document lists possible ways for experimenting with HIP stack to gain performance. Performance may vary from platform to platform.
|
||||
|
||||
### Unpinned Memory Transfer Optimizations
|
||||
|
||||
#### On Small BAR Setup
|
||||
|
||||
There are two possible ways to transfer data from host-to-device (H2D) and device-to-host(D2H)
|
||||
* Using Staging Buffers
|
||||
* Using PinInPlace
|
||||
|
||||
#### On Large BAR Setup
|
||||
|
||||
There are three possible ways to transfer data from host-to-device (H2D)
|
||||
* Using Staging Buffers
|
||||
* Using PinInPlace
|
||||
* Direct Memcpy
|
||||
|
||||
And there are two possible ways to transfer data from device-to-host (D2H)
|
||||
* Using Staging Buffers
|
||||
* Using PinInPlace
|
||||
|
||||
Some GPUs may not be able to directly access host memory, and in these cases we need to
|
||||
stage the copy through an optimized pinned staging buffer, to implement H2D and D2H copies.The copy is broken into buffer-sized chunks to limit the size of the buffer and also to provide better performance by overlapping the CPU copies with the DMA copies.
|
||||
|
||||
PinInPlace is another algorithm which pins the host memory "in-place", and copies it with the DMA engine.
|
||||
|
||||
By default staging buffers are used for unpinned memory transfers. Environment variables allow control over the unpinned copy algorithm and parameters:
|
||||
|
||||
- HIP_PININPLACE - This environment variable forces the use of PinInPlace logic for all unpinned memory copies
|
||||
|
||||
- HIP_OPTIMAL_MEM_TRANSFER- This environment variable enables a hybrid memory copy logic based on thresholds. These thresholds can be managed with following environment variables:
|
||||
- HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE - Threshold in bytes for H2D copy. For sizes smaller than threshold staging buffers logic would be used else PinInPlace logic.
|
||||
- HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING - Threshold in bytes for H2D copy. For sizes smaller than threshold direct copy logic would be used else staging buffers logic.
|
||||
- HIP_D2H_MEM_TRANSFER_THRESHOLD - Threshold in bytes for D2H copy. For sizes smaller than threshold staging buffer logic would be used else PinInPlace logic.
|
||||
|
||||
|
||||
|
||||
@@ -405,7 +405,7 @@ Code should not assume a warp size of 32 or 64. See [Warp Cross-Lane Functions]
|
||||
|
||||
## memcpyToSymbol
|
||||
|
||||
HIP support for hipMemCpyToSymbol is complete. This feature allows a kernel
|
||||
HIP support for hipMemcpyToSymbol is complete. This feature allows a kernel
|
||||
to define a device-side data symbol which can be accessed on the host side. The symbol
|
||||
can be in __constant or device space.
|
||||
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
# HIP Programming Guide
|
||||
|
||||
## Host Memory
|
||||
|
||||
### Introduction
|
||||
hipHostMemory allocates pinned host memory which is mapped into the address space of all GPUs in the system.
|
||||
There are two use cases for this host memory:
|
||||
- Faster HostToDevice and DeviceToHost Data Transfers:
|
||||
The runtime tracks the hipHostMalloc allocations and can avoid some of the setup required for regular unpinned memory. For exact measurements on a specific system, experiment with --unpinned and --pinned switches for the hipBusBandwidth tool.
|
||||
- Zero-Copy GPU Access:
|
||||
GPU can directly access the host memory over the CPU/GPU interconnect, without need to copy the data. This avoids the need for the copy, but during the kernel access each memory access must traverse the interconnect, which can be tens of times slower than accessing the GPU's local device memory. Zero-copy memory can be a good choice when the memory accesses are infrequent (perhaps only once). Zero-copy memory is typically "Coherent" and thus not cached by the GPU but this can be overridden if desired and is explained in more detail below.
|
||||
|
||||
### Memory allocation flags
|
||||
hipHostMalloc always sets the hipHostMallocPortable and hipHostMallocMapped flags. Both usage models described above use the same allocation flags, and the difference is in how the surrounding code uses the host memory.
|
||||
See the hipHostMalloc API for more information.
|
||||
|
||||
|
||||
### Coherency Controls
|
||||
ROCm defines two coherency options for host memory:
|
||||
- Coherent memory : Supports fine-grain synchronization while the kernel is running. For example, a kernel can perform atomic operations that are visible to the host CPU or to other (peer) GPUs. Synchronization instructions include threadfence_system and C++11-style atomic operations. However, coherent memory cannot be cached by the GPU and thus may have lower performance.
|
||||
- Non-coherent memory : Can be cached by GPU, but cannot support synchronization while the kernel is running. Non-coherent memory can be optionally synchronized only at command (end-of-kernel or copy command) boundaries. This memory is appropriate for high-performance access when fine-grain synchronization is not required.
|
||||
|
||||
IP provides the developer with controls to select which type of memory is used via allocation flags passed to hipHostMalloc and the HIP_HOST_COHERENT environment variable:
|
||||
- hipHostllocCoherent=0, hipHostMallocNonCoherent=0: Use HIP_HOST_COHERENT environment variable:
|
||||
- If HIP_HOST_COHERENT is 1 or undefined, the host memory allocation is coherent.
|
||||
- If host memory is `defined and 0: the host memory allocation is non-coherent.
|
||||
- hipHostMallocCoherent=1, hipHostMallocNonCoherent=0: The host memory allocation will be coherent. HIP_HOST_COHERENT env variable is ignored.
|
||||
- hipHostMallocCoherent=0, hipHostMallocNonCoherent=1: The host memory allocation will be non-coherent. HIP_HOST_COHERENT env variable is ignored.
|
||||
- hipHostMallocCoherent=1, hipHostMallocNonCoherent=1: Illegal.
|
||||
|
||||
|
||||
### Visibility of Zero-Copy Host Memory
|
||||
Coherent host memory is automatically visible at synchronization points.
|
||||
Non-coherent
|
||||
|
||||
| HIP API | Synchronization Effect | Fence | Coherent Host Memory Visibiity | Non-Coherent Host Memory Visibility|
|
||||
| --- | --- | --- | --- | --- |
|
||||
| hipStreamSynchronize | host waits for all commands in the specified stream to complete | system-scope release | yes | yes |
|
||||
| hipDeviceSynchronize | host waits for all commands in all streams on the specified device to complete | system-scope release | yes | yes |
|
||||
| hipEventSynchronize | host waits for the specified event to complete | device-scope release | yes | depends - see below|
|
||||
| hipStreamWaitEvent | stream waits for the specified event to complete | none | yes | no |
|
||||
|
||||
|
||||
### hipEventSynchronize
|
||||
Developers can control the release scope for hipEvents:
|
||||
- By default, the GPU performs a device-scope acquire and release operation with each recorded event. This will make host and device memory visible to other commands executing on the same device.
|
||||
|
||||
A stronger system-level fence can be specified when the event is created with hipEventCreateWithFlags:
|
||||
- hipEventReleaseToSystem : Perform a system-scope release operation when the event is recorded. This will make both Coherent and Non-Coherent host memory visible to other agents in the system, but may involve heavyweight operations such as cache flushing. Coherent memory will typically use lighter-weight in-kernel synchronization mechanisms such as an atomic operation and thus does not need to use hipEventReleaseToSystem.
|
||||
|
||||
### Summary and Recommendations:
|
||||
|
||||
- Coherent host memory is the default and is the easiest to use since the memory is visible to the CPU at typical synchronization points. This memory allows in-kernel synchronization commands such as threadfence_system to work transparently.
|
||||
- HIP/ROCm also supports the ability to cache host memory in the GPU using the "Non-Coherent" host memory allocations. This can provide performance benefit, but care must be taken to use the correct synchronization.
|
||||
|
||||
|
||||
## Unpinned Memory Transfer Optimizations
|
||||
Please note that this document lists possible ways for experimenting with HIP stack to gain performance. Performance may vary from platform to platform.
|
||||
|
||||
### On Small BAR Setup
|
||||
|
||||
There are two possible ways to transfer data from host-to-device (H2D) and device-to-host(D2H)
|
||||
* Using Staging Buffers
|
||||
* Using PinInPlace
|
||||
|
||||
### On Large BAR Setup
|
||||
|
||||
There are three possible ways to transfer data from host-to-device (H2D)
|
||||
* Using Staging Buffers
|
||||
* Using PinInPlace
|
||||
* Direct Memcpy
|
||||
|
||||
And there are two possible ways to transfer data from device-to-host (D2H)
|
||||
* Using Staging Buffers
|
||||
* Using PinInPlace
|
||||
|
||||
Some GPUs may not be able to directly access host memory, and in these cases we need to
|
||||
stage the copy through an optimized pinned staging buffer, to implement H2D and D2H copies.The copy is broken into buffer-sized chunks to limit the size of the buffer and also to provide better performance by overlapping the CPU copies with the DMA copies.
|
||||
|
||||
PinInPlace is another algorithm which pins the host memory "in-place", and copies it with the DMA engine.
|
||||
|
||||
By default staging buffers are used for unpinned memory transfers. Environment variables allow control over the unpinned copy algorithm and parameters:
|
||||
|
||||
- HIP_PININPLACE - This environment variable forces the use of PinInPlace logic for all unpinned memory copies
|
||||
|
||||
- HIP_OPTIMAL_MEM_TRANSFER- This environment variable enables a hybrid memory copy logic based on thresholds. These thresholds can be managed with following environment variables:
|
||||
- HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE - Threshold in bytes for H2D copy. For sizes smaller than threshold staging buffers logic would be used else PinInPlace logic.
|
||||
- HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING - Threshold in bytes for H2D copy. For sizes smaller than threshold direct copy logic would be used else staging buffers logic.
|
||||
- HIP_D2H_MEM_TRANSFER_THRESHOLD - Threshold in bytes for D2H copy. For sizes smaller than threshold staging buffer logic would be used else PinInPlace logic.
|
||||
+990
-636
La diferencia del archivo ha sido suprimido porque es demasiado grande
Cargar Diff
@@ -25,20 +25,220 @@ THE SOFTWARE.
|
||||
|
||||
enum hipChannelFormatKind
|
||||
{
|
||||
hipChannelFormatKindSigned = 0,
|
||||
hipChannelFormatKindUnsigned = 1,
|
||||
hipChannelFormatKindFloat = 2,
|
||||
hipChannelFormatKindNone = 3
|
||||
hipChannelFormatKindSigned = 0,
|
||||
hipChannelFormatKindUnsigned = 1,
|
||||
hipChannelFormatKindFloat = 2,
|
||||
hipChannelFormatKindNone = 3
|
||||
};
|
||||
|
||||
struct hipChannelFormatDesc
|
||||
{
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
int w;
|
||||
enum hipChannelFormatKind f;
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
int w;
|
||||
enum hipChannelFormatKind f;
|
||||
};
|
||||
|
||||
struct hipArray {
|
||||
void* data; //FIXME: generalize this
|
||||
struct hipChannelFormatDesc desc;
|
||||
unsigned int type;
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int depth;
|
||||
};
|
||||
|
||||
typedef struct hipArray* hipArray_t;
|
||||
|
||||
typedef const struct hipArray* hipArray_const_t;
|
||||
|
||||
// TODO: It needs to be modified since it was just copied from hipArray.
|
||||
struct hipMipmappedArray {
|
||||
void* data; //FIXME: generalize this
|
||||
struct hipChannelFormatDesc desc;
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int depth;
|
||||
};
|
||||
|
||||
typedef struct hipMipmappedArray *hipMipmappedArray_t;
|
||||
|
||||
typedef const struct hipMipmappedArray *hipMipmappedArray_const_t;
|
||||
|
||||
/**
|
||||
* hip resource types
|
||||
*/
|
||||
enum hipResourceType
|
||||
{
|
||||
hipResourceTypeArray = 0x00,
|
||||
hipResourceTypeMipmappedArray = 0x01,
|
||||
hipResourceTypeLinear = 0x02,
|
||||
hipResourceTypePitch2D = 0x03
|
||||
};
|
||||
|
||||
/**
|
||||
* hip texture resource view formats
|
||||
*/
|
||||
enum hipResourceViewFormat
|
||||
{
|
||||
hipResViewFormatNone = 0x00,
|
||||
hipResViewFormatUnsignedChar1 = 0x01,
|
||||
hipResViewFormatUnsignedChar2 = 0x02,
|
||||
hipResViewFormatUnsignedChar4 = 0x03,
|
||||
hipResViewFormatSignedChar1 = 0x04,
|
||||
hipResViewFormatSignedChar2 = 0x05,
|
||||
hipResViewFormatSignedChar4 = 0x06,
|
||||
hipResViewFormatUnsignedShort1 = 0x07,
|
||||
hipResViewFormatUnsignedShort2 = 0x08,
|
||||
hipResViewFormatUnsignedShort4 = 0x09,
|
||||
hipResViewFormatSignedShort1 = 0x0a,
|
||||
hipResViewFormatSignedShort2 = 0x0b,
|
||||
hipResViewFormatSignedShort4 = 0x0c,
|
||||
hipResViewFormatUnsignedInt1 = 0x0d,
|
||||
hipResViewFormatUnsignedInt2 = 0x0e,
|
||||
hipResViewFormatUnsignedInt4 = 0x0f,
|
||||
hipResViewFormatSignedInt1 = 0x10,
|
||||
hipResViewFormatSignedInt2 = 0x11,
|
||||
hipResViewFormatSignedInt4 = 0x12,
|
||||
hipResViewFormatHalf1 = 0x13,
|
||||
hipResViewFormatHalf2 = 0x14,
|
||||
hipResViewFormatHalf4 = 0x15,
|
||||
hipResViewFormatFloat1 = 0x16,
|
||||
hipResViewFormatFloat2 = 0x17,
|
||||
hipResViewFormatFloat4 = 0x18,
|
||||
hipResViewFormatUnsignedBlockCompressed1 = 0x19,
|
||||
hipResViewFormatUnsignedBlockCompressed2 = 0x1a,
|
||||
hipResViewFormatUnsignedBlockCompressed3 = 0x1b,
|
||||
hipResViewFormatUnsignedBlockCompressed4 = 0x1c,
|
||||
hipResViewFormatSignedBlockCompressed4 = 0x1d,
|
||||
hipResViewFormatUnsignedBlockCompressed5 = 0x1e,
|
||||
hipResViewFormatSignedBlockCompressed5 = 0x1f,
|
||||
hipResViewFormatUnsignedBlockCompressed6H = 0x20,
|
||||
hipResViewFormatSignedBlockCompressed6H = 0x21,
|
||||
hipResViewFormatUnsignedBlockCompressed7 = 0x22
|
||||
};
|
||||
|
||||
/**
|
||||
* HIP resource descriptor
|
||||
*/
|
||||
struct hipResourceDesc {
|
||||
enum hipResourceType resType;
|
||||
|
||||
union {
|
||||
struct {
|
||||
hipArray_t array;
|
||||
} array;
|
||||
struct {
|
||||
hipMipmappedArray_t mipmap;
|
||||
} mipmap;
|
||||
struct {
|
||||
void *devPtr;
|
||||
struct hipChannelFormatDesc desc;
|
||||
size_t sizeInBytes;
|
||||
} linear;
|
||||
struct {
|
||||
void *devPtr;
|
||||
struct hipChannelFormatDesc desc;
|
||||
size_t width;
|
||||
size_t height;
|
||||
size_t pitchInBytes;
|
||||
} pitch2D;
|
||||
} res;
|
||||
};
|
||||
|
||||
/**
|
||||
* hip resource view descriptor
|
||||
*/
|
||||
struct hipResourceViewDesc
|
||||
{
|
||||
enum hipResourceViewFormat format;
|
||||
size_t width;
|
||||
size_t height;
|
||||
size_t depth;
|
||||
unsigned int firstMipmapLevel;
|
||||
unsigned int lastMipmapLevel;
|
||||
unsigned int firstLayer;
|
||||
unsigned int lastLayer;
|
||||
};
|
||||
|
||||
/**
|
||||
* Memory copy types
|
||||
*
|
||||
*/
|
||||
typedef enum hipMemcpyKind {
|
||||
hipMemcpyHostToHost = 0, ///< Host-to-Host Copy
|
||||
hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy
|
||||
hipMemcpyDeviceToHost = 2, ///< Device-to-Host Copy
|
||||
hipMemcpyDeviceToDevice =3, ///< Device-to-Device Copy
|
||||
hipMemcpyDefault = 4 ///< Runtime will automatically determine copy-kind based on virtual addresses.
|
||||
} hipMemcpyKind;
|
||||
|
||||
struct hipPitchedPtr
|
||||
{
|
||||
void *ptr;
|
||||
size_t pitch;
|
||||
size_t xsize;
|
||||
size_t ysize;
|
||||
};
|
||||
|
||||
struct hipExtent {
|
||||
size_t width; // Width in elements when referring to array memory, in bytes when referring to linear memory
|
||||
size_t height;
|
||||
size_t depth;
|
||||
};
|
||||
|
||||
struct hipPos {
|
||||
size_t x;
|
||||
size_t y;
|
||||
size_t z;
|
||||
};
|
||||
|
||||
struct hipMemcpy3DParms {
|
||||
hipArray_t srcArray;
|
||||
struct hipPos srcPos;
|
||||
struct hipPitchedPtr srcPtr;
|
||||
|
||||
hipArray_t dstArray;
|
||||
struct hipPos dstPos;
|
||||
struct hipPitchedPtr dstPtr;
|
||||
|
||||
struct hipExtent extent;
|
||||
enum hipMemcpyKind kind;
|
||||
};
|
||||
|
||||
static __inline__ struct hipPitchedPtr make_hipPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
|
||||
{
|
||||
struct hipPitchedPtr s;
|
||||
|
||||
s.ptr = d;
|
||||
s.pitch = p;
|
||||
s.xsize = xsz;
|
||||
s.ysize = ysz;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static __inline__ struct hipPos make_hipPos(size_t x, size_t y, size_t z)
|
||||
{
|
||||
struct hipPos p;
|
||||
|
||||
p.x = x;
|
||||
p.y = y;
|
||||
p.z = z;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static __inline__ struct hipExtent make_hipExtent(size_t w, size_t h, size_t d)
|
||||
{
|
||||
struct hipExtent e;
|
||||
|
||||
e.width = w;
|
||||
e.height = h;
|
||||
e.depth = d;
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -28,7 +28,7 @@ THE SOFTWARE.
|
||||
#include "helpers.hpp"
|
||||
|
||||
#include "hc.hpp"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip/hip_hcc.h"
|
||||
#include "hip_runtime.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* @defgroup HipDb HCC-specific debug facilities
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief * Print memory tracker information for this pointer.
|
||||
*
|
||||
* HIP maintains a table for all memory allocations performed by the application.
|
||||
* If targetAddress is 0, the entire table is printed to stderr.
|
||||
* If targetAddress is non-null, this routine will perform some forensic analysis
|
||||
* to find the pointer
|
||||
*/
|
||||
void hipdbPrintMem(void *targetAddress);
|
||||
|
||||
|
||||
|
||||
// doxygen end HipDb
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
@@ -1,103 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_HCC_H
|
||||
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_HCC_H
|
||||
|
||||
#include "hip/hip_runtime_api.h"
|
||||
|
||||
#if __cplusplus
|
||||
#ifdef __HCC__
|
||||
#include <hc.hpp>
|
||||
|
||||
|
||||
/**
|
||||
*-------------------------------------------------------------------------------------------------
|
||||
*-------------------------------------------------------------------------------------------------
|
||||
* @defgroup HCC-specific features
|
||||
* @warning These APIs provide access to special features of HCC compiler and are not available through the CUDA path.
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Return hc::accelerator associated with the specified deviceId
|
||||
* @return #hipSuccess, #hipErrorInvalidDevice
|
||||
*/
|
||||
hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc);
|
||||
|
||||
/**
|
||||
* @brief Return hc::accelerator_view associated with the specified stream
|
||||
*
|
||||
* If stream is 0, the accelerator_view for the default stream is returned.
|
||||
* @return #hipSuccess
|
||||
*/
|
||||
hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av);
|
||||
|
||||
|
||||
#endif // #ifdef __HCC__
|
||||
|
||||
/**
|
||||
* @brief launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelparams or extra
|
||||
*
|
||||
* @param [in[ f Kernel to launch.
|
||||
* @param [in] gridDimX X grid dimension specified in work-items
|
||||
* @param [in] gridDimY Y grid dimension specified in work-items
|
||||
* @param [in] gridDimZ Z grid dimension specified in work-items
|
||||
* @param [in] blockDimX X block dimensions specified in work-items
|
||||
* @param [in] blockDimY Y grid dimension specified in work-items
|
||||
* @param [in] blockDimZ Z grid dimension specified in work-items
|
||||
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The kernel can access this with HIP_DYNAMIC_SHARED.
|
||||
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules.
|
||||
* @param [in] kernelParams
|
||||
* @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and must be in the memory layout and alignment expected by the kernel.
|
||||
* @param [in] startEvent If non-null, specified event will be updated to track the start time of the kernel launch. The event must be created before calling this API.
|
||||
* @param [in] stopEvent If non-null, specified event will be updated to track the stop time of the kernel launch. The event must be created before calling this API.
|
||||
*
|
||||
* @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
|
||||
*
|
||||
* @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please refer to hip_porting_driver_api.md for sample usage.
|
||||
|
||||
* HIP/ROCm actually updates the start event when the associated kernel completes.
|
||||
*/
|
||||
hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY,
|
||||
uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX,
|
||||
uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t hStream,
|
||||
void **kernelParams,
|
||||
void **extra,
|
||||
hipEvent_t startEvent=nullptr,
|
||||
hipEvent_t stopEvent=nullptr
|
||||
);
|
||||
|
||||
// doxygen end HCC-specific features
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
#endif // #if __cplusplus
|
||||
|
||||
#endif //
|
||||
@@ -50,14 +50,6 @@ THE SOFTWARE.
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
|
||||
#if USE_PROMOTE_FREE_HCC == 1
|
||||
#define ADDRESS_SPACE_1
|
||||
#define ADDRESS_SPACE_3
|
||||
#else
|
||||
#define ADDRESS_SPACE_1 __attribute__((address_space(1)))
|
||||
#define ADDRESS_SPACE_3 __attribute__((address_space(3)))
|
||||
#endif
|
||||
|
||||
//---
|
||||
// Remainder of this file only compiles with HCC
|
||||
#if defined __HCC__
|
||||
@@ -89,12 +81,12 @@ namespace hip_impl
|
||||
extern int HIP_TRACE_API;
|
||||
|
||||
#ifdef __cplusplus
|
||||
//#include <hip/hcc_detail/hip_texture.h>
|
||||
#include <hip/hcc_detail/hip_ldg.h>
|
||||
#endif
|
||||
#include <hip/hcc_detail/host_defines.h>
|
||||
#include <hip/hcc_detail/math_functions.h>
|
||||
#include <hip/hcc_detail/device_functions.h>
|
||||
#include <hip/hcc_detail/texture_functions.h>
|
||||
|
||||
// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
|
||||
#if defined (__KALMAR_ACCELERATOR__) && !defined (__HCC_ACCELERATOR__)
|
||||
@@ -107,7 +99,7 @@ extern int HIP_TRACE_API;
|
||||
// TODO-HCC add a dummy implementation of assert, need to replace with a proper kernel exit call.
|
||||
#if __HIP_DEVICE_COMPILE__ == 1
|
||||
#undef assert
|
||||
#define assert(COND) { if (COND) {} }
|
||||
#define assert(COND) { if (!(COND)) {abort();} }
|
||||
#endif
|
||||
|
||||
|
||||
@@ -138,7 +130,7 @@ extern int HIP_TRACE_API;
|
||||
#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
|
||||
|
||||
//sync
|
||||
#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0)
|
||||
#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1)
|
||||
#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
|
||||
|
||||
// misc
|
||||
|
||||
@@ -37,7 +37,8 @@ THE SOFTWARE.
|
||||
|
||||
#include <hip/hcc_detail/host_defines.h>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_texture.h>
|
||||
#include <hip/hcc_detail/driver_types.h>
|
||||
#include <hip/hcc_detail/hip_texture_types.h>
|
||||
|
||||
#if defined (__HCC__) && (__hcc_workweek__ < 16155)
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
@@ -136,6 +137,11 @@ enum hipLimit_t
|
||||
#define hipDeviceMapHost 0x8
|
||||
#define hipDeviceLmemResizeToMax 0x16
|
||||
|
||||
#define hipArrayDefault 0x00 ///< Default HIP array allocation flag
|
||||
#define hipArrayLayered 0x01
|
||||
#define hipArraySurfaceLoadStore 0x02
|
||||
#define hipArrayCubemap 0x04
|
||||
#define hipArrayTextureGather 0x08
|
||||
|
||||
/*
|
||||
* @brief hipJitOption
|
||||
@@ -165,7 +171,7 @@ typedef enum hipJitOption {
|
||||
|
||||
|
||||
/**
|
||||
* @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored.
|
||||
* @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
|
||||
*/
|
||||
typedef enum hipFuncCache_t {
|
||||
hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default)
|
||||
@@ -176,7 +182,7 @@ typedef enum hipFuncCache_t {
|
||||
|
||||
|
||||
/**
|
||||
* @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored.
|
||||
* @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
|
||||
*/
|
||||
typedef enum hipSharedMemConfig {
|
||||
hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking.
|
||||
@@ -200,27 +206,6 @@ typedef struct dim3 {
|
||||
} dim3;
|
||||
|
||||
|
||||
/**
|
||||
* Memory copy types
|
||||
*
|
||||
*/
|
||||
typedef enum hipMemcpyKind {
|
||||
hipMemcpyHostToHost = 0 ///< Host-to-Host Copy
|
||||
,hipMemcpyHostToDevice = 1 ///< Host-to-Device Copy
|
||||
,hipMemcpyDeviceToHost = 2 ///< Device-to-Host Copy
|
||||
,hipMemcpyDeviceToDevice =3 ///< Device-to-Device Copy
|
||||
,hipMemcpyDefault = 4, ///< Runtime will automatically determine copy-kind based on virtual addresses.
|
||||
} hipMemcpyKind;
|
||||
|
||||
typedef struct {
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
enum hipChannelFormatKind f;
|
||||
void* data; //FIXME: generalize this
|
||||
} hipArray;
|
||||
|
||||
|
||||
|
||||
// Doxygen end group GlobalDefs
|
||||
/** @} */
|
||||
|
||||
@@ -379,7 +364,7 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId);
|
||||
* @param [in] cacheConfig
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInitializationError
|
||||
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
* Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
*
|
||||
*/
|
||||
hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig );
|
||||
@@ -391,7 +376,7 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig );
|
||||
* @param [in] cacheConfig
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInitializationError
|
||||
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
* Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
*
|
||||
*/
|
||||
hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t *cacheConfig );
|
||||
@@ -415,7 +400,7 @@ hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit);
|
||||
* @param [in] config;
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInitializationError
|
||||
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
* Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
*
|
||||
*/
|
||||
hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t config );
|
||||
@@ -427,7 +412,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t config );
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError
|
||||
*
|
||||
* Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
||||
* Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
||||
*
|
||||
*/
|
||||
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig );
|
||||
@@ -440,7 +425,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig );
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError
|
||||
*
|
||||
* Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
||||
* Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
||||
*
|
||||
*/
|
||||
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
|
||||
@@ -1287,6 +1272,19 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s
|
||||
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Fills the memory area pointed to by dst with the constant value.
|
||||
*
|
||||
* @param[out] dst Pointer to device memory
|
||||
* @param[in] pitch - data size in bytes
|
||||
* @param[in] value - constant value to be set
|
||||
* @param[in] width
|
||||
* @param[in] height
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
|
||||
*/
|
||||
|
||||
hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height);
|
||||
|
||||
/**
|
||||
* @brief Query memory info.
|
||||
* Return snapshot of free memory, and total allocatable memory on the device.
|
||||
@@ -1315,7 +1313,7 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size);
|
||||
*/
|
||||
#if __cplusplus
|
||||
hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
size_t width, size_t height = 0, unsigned int flags = 0);
|
||||
size_t width, size_t height = 0, unsigned int flags = hipArrayDefault);
|
||||
#else
|
||||
hipError_t hipMallocArray(hipArray** array, const struct hipChannelFormatDesc* desc,
|
||||
size_t width, size_t height, unsigned int flags);
|
||||
@@ -1330,6 +1328,22 @@ hipError_t hipMallocArray(hipArray** array, const struct hipChannelFormatDesc* d
|
||||
*/
|
||||
hipError_t hipFreeArray(hipArray* array);
|
||||
|
||||
/**
|
||||
* @brief Allocate an array on the device.
|
||||
*
|
||||
* @param[out] array Pointer to allocated array in device memory
|
||||
* @param[in] desc Requested channel format
|
||||
* @param[in] extent Requested array allocation width, height and depth
|
||||
* @param[in] flags Requested properties of allocated array
|
||||
* @return #hipSuccess, #hipErrorMemoryAllocation
|
||||
*
|
||||
* @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree
|
||||
*/
|
||||
|
||||
hipError_t hipMalloc3DArray(hipArray_t *array,
|
||||
const struct hipChannelFormatDesc* desc,
|
||||
struct hipExtent extent,
|
||||
unsigned int flags);
|
||||
/**
|
||||
* @brief Copies data between host and device.
|
||||
*
|
||||
@@ -1402,6 +1416,7 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,
|
||||
const void* src, size_t count, hipMemcpyKind kind);
|
||||
|
||||
|
||||
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p);
|
||||
|
||||
// doxygen end Memory
|
||||
/**
|
||||
@@ -1434,7 +1449,6 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,
|
||||
*
|
||||
* @returns #hipSuccess,
|
||||
* @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices
|
||||
* @warning PeerToPeer support is experimental.
|
||||
*/
|
||||
hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId);
|
||||
|
||||
@@ -1452,7 +1466,6 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDev
|
||||
*
|
||||
* Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue,
|
||||
* @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device.
|
||||
* @warning PeerToPeer support is experimental.
|
||||
*/
|
||||
hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags);
|
||||
|
||||
@@ -1465,7 +1478,6 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags);
|
||||
* @param [in] peerDeviceId
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorPeerAccessNotEnabled
|
||||
* @warning PeerToPeer support is experimental.
|
||||
*/
|
||||
hipError_t hipDeviceDisablePeerAccess (int peerDeviceId);
|
||||
|
||||
@@ -1497,7 +1509,6 @@ hipError_t hipMemGetAddressRange ( hipDeviceptr_t* pbase, size_t* psize, hipDevi
|
||||
* @param [in] sizeBytes - Size of memory copy in bytes
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice
|
||||
* @warning PeerToPeer support is experimental.
|
||||
*/
|
||||
hipError_t hipMemcpyPeer (void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes);
|
||||
|
||||
@@ -1656,7 +1667,7 @@ hipError_t hipCtxGetApiVersion (hipCtx_t ctx,int *apiVersion);
|
||||
*
|
||||
* @return #hipSuccess
|
||||
*
|
||||
* @warning AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
* @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
*
|
||||
* @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
|
||||
*/
|
||||
@@ -1669,7 +1680,7 @@ hipError_t hipCtxGetCacheConfig ( hipFuncCache_t *cacheConfig );
|
||||
*
|
||||
* @return #hipSuccess
|
||||
*
|
||||
* @warning AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
* @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
*
|
||||
* @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
|
||||
*/
|
||||
@@ -1682,7 +1693,7 @@ hipError_t hipCtxSetCacheConfig ( hipFuncCache_t cacheConfig );
|
||||
*
|
||||
* @return #hipSuccess
|
||||
*
|
||||
* @warning AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
||||
* @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
||||
*
|
||||
* @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
|
||||
*/
|
||||
@@ -1695,7 +1706,7 @@ hipError_t hipCtxSetSharedMemConfig ( hipSharedMemConfig config );
|
||||
*
|
||||
* @return #hipSuccess
|
||||
*
|
||||
* @warning AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
||||
* @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
||||
*
|
||||
* @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
|
||||
*/
|
||||
@@ -1867,7 +1878,7 @@ hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len,int device);
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInavlidDevice, #hipErrorInvalidValue
|
||||
*/
|
||||
hipError_t hipDeviceGetByPCIBusId ( int* device,const int* pciBusId );
|
||||
hipError_t hipDeviceGetByPCIBusId ( int* device,const char* pciBusId );
|
||||
|
||||
|
||||
/**
|
||||
@@ -2150,6 +2161,24 @@ hipError_t hipIpcCloseMemHandle(void *devPtr);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
hipError_t hipBindTexture(size_t* offset,
|
||||
textureReference* tex,
|
||||
const void* devPtr,
|
||||
const hipChannelFormatDesc* desc,
|
||||
size_t size = UINT_MAX);
|
||||
|
||||
hipError_t ihipBindTextureImpl(int dim,
|
||||
enum hipTextureReadMode readMode,
|
||||
size_t *offset,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
size_t size,
|
||||
enum hipTextureAddressMode addressMode,
|
||||
enum hipTextureFilterMode filterMode,
|
||||
int normalizedCoords,
|
||||
hipTextureObject_t& textureObject);
|
||||
|
||||
/*
|
||||
* @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture reference tex.
|
||||
*
|
||||
@@ -2164,15 +2193,15 @@ hipError_t hipIpcCloseMemHandle(void *devPtr);
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
|
||||
**/
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTexture(size_t *offset,
|
||||
struct texture<T, dim, readMode> &tex,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc *desc,
|
||||
size_t size=UINT_MAX)
|
||||
hipError_t hipBindTexture(size_t *offset,
|
||||
struct texture<T, dim, readMode>& tex,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
size_t size = UINT_MAX)
|
||||
{
|
||||
tex._dataPtr = static_cast<const T*>(devPtr);
|
||||
|
||||
return hipSuccess;
|
||||
return ihipBindTextureImpl(dim, readMode, offset, devPtr, desc, size,
|
||||
tex.addressMode[0], tex.filterMode, tex.normalized,
|
||||
tex.textureObject);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2188,20 +2217,115 @@ hipError_t hipBindTexture(size_t *offset,
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
|
||||
**/
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTexture(size_t *offset,
|
||||
struct texture<T, dim, readMode> &tex,
|
||||
const void *devPtr,
|
||||
size_t size=UINT_MAX)
|
||||
hipError_t hipBindTexture(size_t *offset,
|
||||
struct texture<T, dim, readMode>& tex,
|
||||
const void *devPtr,
|
||||
size_t size = UINT_MAX)
|
||||
{
|
||||
return hipBindTexture(offset, tex, devPtr, &tex.channelDesc, size);
|
||||
return ihipBindTextureImpl(dim, readMode, offset, devPtr, tex.channelDesc, size,
|
||||
tex.addressMode[0], tex.filterMode, tex.normalized,
|
||||
tex.textureObject);
|
||||
}
|
||||
|
||||
// C API
|
||||
hipError_t hipBindTexture2D(size_t* offset,
|
||||
textureReference* tex,
|
||||
const void* devPtr,
|
||||
const hipChannelFormatDesc* desc,
|
||||
size_t width,
|
||||
size_t height,
|
||||
size_t pitch);
|
||||
|
||||
hipError_t ihipBindTexture2DImpl(int dim,
|
||||
enum hipTextureReadMode readMode,
|
||||
size_t *offset,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
size_t width,
|
||||
size_t height,
|
||||
enum hipTextureAddressMode addressMode,
|
||||
enum hipTextureFilterMode filterMode,
|
||||
int normalizedCoords,
|
||||
hipTextureObject_t& textureObject);
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTexture2D(size_t *offset,
|
||||
struct texture<T, dim, readMode>& tex,
|
||||
const void *devPtr,
|
||||
size_t width,
|
||||
size_t height,
|
||||
size_t pitch)
|
||||
{
|
||||
return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, tex.channelDesc, width, height,
|
||||
tex.addressMode[0], tex.filterMode, tex.normalized,
|
||||
tex.textureObject);
|
||||
}
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> &tex, hipArray* array) {
|
||||
tex.width = array->width;
|
||||
tex.height = array->height;
|
||||
tex._dataPtr = static_cast<const T*>(array->data);
|
||||
return hipSuccess;
|
||||
hipError_t hipBindTexture2D(size_t *offset,
|
||||
struct texture<T, dim, readMode>& tex,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc &desc,
|
||||
size_t width,
|
||||
size_t height,
|
||||
size_t pitch)
|
||||
{
|
||||
return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, desc, width, height,
|
||||
tex.addressMode[0], tex.filterMode, tex.normalized,
|
||||
tex.textureObject);
|
||||
}
|
||||
|
||||
//C API
|
||||
hipError_t hipBindTextureToArray(textureReference* tex,
|
||||
hipArray_const_t array,
|
||||
const hipChannelFormatDesc* desc);
|
||||
|
||||
hipError_t ihipBindTextureToArrayImpl(int dim,
|
||||
enum hipTextureReadMode readMode,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
enum hipTextureAddressMode addressMode,
|
||||
enum hipTextureFilterMode filterMode,
|
||||
int normalizedCoords,
|
||||
hipTextureObject_t& textureObject);
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex,
|
||||
hipArray_const_t array)
|
||||
{
|
||||
return ihipBindTextureToArrayImpl(dim, readMode, array, tex.channelDesc,
|
||||
tex.addressMode[0], tex.filterMode, tex.normalized,
|
||||
tex.textureObject);
|
||||
}
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc& desc)
|
||||
{
|
||||
return ihipBindTextureToArrayImpl(dim, readMode, array, desc,
|
||||
tex.addressMode[0], tex.filterMode, tex.normalized,
|
||||
tex.textureObject);
|
||||
}
|
||||
|
||||
//C API
|
||||
hipError_t hipBindTextureToMipmappedArray(const textureReference* tex,
|
||||
hipMipmappedArray_const_t mipmappedArray,
|
||||
const hipChannelFormatDesc* desc);
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToMipmappedArray(const texture<T, dim, readMode>& tex,
|
||||
hipMipmappedArray_const_t mipmappedArray)
|
||||
{
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToMipmappedArray(const texture<T, dim, readMode>& tex,
|
||||
hipMipmappedArray_const_t mipmappedArray,
|
||||
const hipChannelFormatDesc& desc)
|
||||
{
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2211,15 +2335,30 @@ hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> &tex, hipArray
|
||||
*
|
||||
* @return #hipSuccess
|
||||
**/
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipUnbindTexture(struct texture<T, dim, readMode> &tex)
|
||||
{
|
||||
tex._dataPtr = NULL;
|
||||
hipError_t hipUnbindTexture(const textureReference* tex);
|
||||
|
||||
return hipSuccess;
|
||||
extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject);
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipUnbindTexture(struct texture<T, dim, readMode> &tex)
|
||||
{
|
||||
return ihipUnbindTextureImpl(tex.textureObject);
|
||||
}
|
||||
|
||||
hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array);
|
||||
hipError_t hipGetTextureAlignmentOffset (size_t* offset, const textureReference* texref);
|
||||
hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol);
|
||||
|
||||
hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
|
||||
const hipResourceDesc* pResDesc,
|
||||
const hipTextureDesc* pTexDesc,
|
||||
const hipResourceViewDesc* pResViewDesc);
|
||||
|
||||
hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject);
|
||||
|
||||
hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipTextureObject_t textureObject);
|
||||
hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, hipTextureObject_t textureObject);
|
||||
hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipTextureObject_t textureObject);
|
||||
|
||||
// doxygen end Texture
|
||||
/**
|
||||
|
||||
@@ -1,107 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//#pragma once
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_H
|
||||
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_H
|
||||
|
||||
/**
|
||||
* @file hcc_detail/hip_texture.h
|
||||
* @brief HIP C++ Texture API for hcc compiler
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <hip/hcc_detail/driver_types.h>
|
||||
#include <hip/hcc_detail/channel_descriptor.h>
|
||||
#include <hip/hcc_detail/texture_types.h>
|
||||
//#include <hip/hcc_detail/hip_runtime.h>
|
||||
|
||||
//----
|
||||
//Texture - TODO - likely need to move this to a separate file only included with kernel compilation.
|
||||
#define hipTextureType1D 1
|
||||
|
||||
#if __cplusplus
|
||||
template <class T, int texType=hipTextureType1D, hipTextureReadMode readMode=hipReadModeElementType>
|
||||
struct texture : public textureReference {
|
||||
|
||||
const T * _dataPtr; // pointer to underlying data.
|
||||
|
||||
//texture() : filterMode(hipFilterModePoint), normalized(false), _dataPtr(NULL) {};
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#define tex1Dfetch(_tex, _addr) (_tex._dataPtr[_addr])
|
||||
|
||||
#define tex2D(_tex, _dx, _dy) \
|
||||
_tex._dataPtr[(unsigned int)_dx + (unsigned int)_dy*(_tex.width)]
|
||||
|
||||
/**
|
||||
* @addtogroup API HIP API
|
||||
* @{
|
||||
*
|
||||
* Defines the HIP API. See the individual sections for more information.
|
||||
*/
|
||||
|
||||
// These are C++ APIs - maybe belong in separate file.
|
||||
/**
|
||||
*-------------------------------------------------------------------------------------------------
|
||||
*-------------------------------------------------------------------------------------------------
|
||||
* @defgroup Texture Texture Reference Management
|
||||
* @{
|
||||
*
|
||||
*
|
||||
* @warning The HIP texture API implements a small subset of full texture API. Known limitations include:
|
||||
* - Only point sampling is supported.
|
||||
* - Only C++ APIs are provided.
|
||||
* - Many APIs and modes are not implemented.
|
||||
*
|
||||
* The HIP texture support is intended to allow use of texture cache on hardware where this is beneficial.
|
||||
*
|
||||
* The following CUDA APIs are not currently supported:
|
||||
* - cudaBindTexture2D
|
||||
* - cudaBindTextureToArray
|
||||
* - cudaBindTextureToMipmappedArray
|
||||
* - cudaGetChannelDesc
|
||||
* - cudaGetTextureReference
|
||||
*
|
||||
*/
|
||||
|
||||
// C API:
|
||||
#if 0
|
||||
hipChannelFormatDesc hipBindTexture(size_t *offset, struct textureReference *tex, const void *devPtr, const struct hipChannelFormatDesc *desc, size_t size=UINT_MAX)
|
||||
{
|
||||
tex->_dataPtr = devPtr;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// End doxygen API:
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file hcc_detail/hip_texture_types.h
|
||||
* @brief Defines the different newt vector types for HIP runtime.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H
|
||||
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
#include <limits.h>
|
||||
//#include <hip/hcc_detail/driver_types.h>
|
||||
#include <hip/hcc_detail/channel_descriptor.h>
|
||||
#include <hip/hcc_detail/texture_types.h>
|
||||
|
||||
#if __cplusplus
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
template<class T, int texType = hipTextureType1D, enum hipTextureReadMode mode = hipReadModeElementType>
|
||||
struct texture : public textureReference
|
||||
{
|
||||
texture(int norm = 0,
|
||||
enum hipTextureFilterMode fMode = hipFilterModePoint,
|
||||
enum hipTextureAddressMode aMode = hipAddressModeClamp)
|
||||
{
|
||||
normalized = norm;
|
||||
filterMode = fMode;
|
||||
addressMode[0] = aMode;
|
||||
addressMode[1] = aMode;
|
||||
addressMode[2] = aMode;
|
||||
channelDesc = hipCreateChannelDesc<T>();
|
||||
sRGB = 0;
|
||||
}
|
||||
|
||||
texture(int norm,
|
||||
enum hipTextureFilterMode fMode,
|
||||
enum hipTextureAddressMode aMode,
|
||||
struct hipChannelFormatDesc desc)
|
||||
{
|
||||
normalized = norm;
|
||||
filterMode = fMode;
|
||||
addressMode[0] = aMode;
|
||||
addressMode[1] = aMode;
|
||||
addressMode[2] = aMode;
|
||||
channelDesc = desc;
|
||||
sRGB = 0;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* !HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H */
|
||||
|
||||
La diferencia del archivo ha sido suprimido porque es demasiado grande
Cargar Diff
@@ -26,20 +26,91 @@ THE SOFTWARE.
|
||||
|
||||
#include<hip/hcc_detail/driver_types.h>
|
||||
|
||||
enum hipTextureReadMode
|
||||
#define hipTextureType1D 0x01
|
||||
#define hipTextureType2D 0x02
|
||||
#define hipTextureType3D 0x03
|
||||
#define hipTextureTypeCubemap 0x0C
|
||||
#define hipTextureType1DLayered 0xF1
|
||||
#define hipTextureType2DLayered 0xF2
|
||||
#define hipTextureTypeCubemapLayered 0xFC
|
||||
|
||||
/**
|
||||
* Should be same as HSA_IMAGE_OBJECT_SIZE_DWORD/HSA_SAMPLER_OBJECT_SIZE_DWORD
|
||||
*/
|
||||
#define HIP_IMAGE_OBJECT_SIZE_DWORD 12
|
||||
#define HIP_SAMPLER_OBJECT_SIZE_DWORD 8
|
||||
#define HIP_SAMPLER_OBJECT_OFFSET_DWORD HIP_IMAGE_OBJECT_SIZE_DWORD
|
||||
#define HIP_TEXTURE_OBJECT_SIZE_DWORD (HIP_IMAGE_OBJECT_SIZE_DWORD + HIP_SAMPLER_OBJECT_SIZE_DWORD)
|
||||
|
||||
/**
|
||||
* An opaque value that represents a hip texture object
|
||||
*/
|
||||
typedef unsigned long long hipTextureObject_t;
|
||||
|
||||
/**
|
||||
* hip texture address modes
|
||||
*/
|
||||
enum hipTextureAddressMode
|
||||
{
|
||||
hipReadModeElementType = 0
|
||||
hipAddressModeWrap = 0,
|
||||
hipAddressModeClamp = 1,
|
||||
hipAddressModeMirror = 2,
|
||||
hipAddressModeBorder = 3
|
||||
};
|
||||
|
||||
/**
|
||||
* hip texture filter modes
|
||||
*/
|
||||
enum hipTextureFilterMode
|
||||
{
|
||||
hipFilterModePoint = 0
|
||||
hipFilterModePoint = 0,
|
||||
hipFilterModeLinear = 1
|
||||
};
|
||||
|
||||
struct textureReference {
|
||||
enum hipTextureFilterMode filterMode;
|
||||
unsigned normalized;
|
||||
struct hipChannelFormatDesc channelDesc;
|
||||
/**
|
||||
* hip texture read modes
|
||||
*/
|
||||
enum hipTextureReadMode
|
||||
{
|
||||
hipReadModeElementType = 0,
|
||||
hipReadModeNormalizedFloat = 1
|
||||
};
|
||||
|
||||
/**
|
||||
* hip texture reference
|
||||
*/
|
||||
struct textureReference
|
||||
{
|
||||
int normalized;
|
||||
enum hipTextureFilterMode filterMode;
|
||||
enum hipTextureAddressMode addressMode[3]; //Texture address mode for up to 3 dimensions
|
||||
struct hipChannelFormatDesc channelDesc;
|
||||
int sRGB; // Perform sRGB->linear conversion during texture read
|
||||
unsigned int maxAnisotropy; // Limit to the anisotropy ratio
|
||||
enum hipTextureFilterMode mipmapFilterMode;
|
||||
float mipmapLevelBias;
|
||||
float minMipmapLevelClamp;
|
||||
float maxMipmapLevelClamp;
|
||||
|
||||
hipTextureObject_t textureObject;
|
||||
};
|
||||
|
||||
/**
|
||||
* hip texture descriptor
|
||||
*/
|
||||
struct hipTextureDesc
|
||||
{
|
||||
enum hipTextureAddressMode addressMode[3]; //Texture address mode for up to 3 dimensions
|
||||
enum hipTextureFilterMode filterMode;
|
||||
enum hipTextureReadMode readMode;
|
||||
int sRGB; // Perform sRGB->linear conversion during texture read
|
||||
float borderColor[4];
|
||||
int normalizedCoords;
|
||||
unsigned int maxAnisotropy;
|
||||
enum hipTextureFilterMode mipmapFilterMode;
|
||||
float mipmapLevelBias;
|
||||
float minMipmapLevelClamp;
|
||||
float maxMipmapLevelClamp;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
+79
-4
@@ -23,8 +23,83 @@ THE SOFTWARE.
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_HCC_H
|
||||
#define HIP_INCLUDE_HIP_HIP_HCC_H
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__)
|
||||
#include "hip/hcc_detail/hip_hcc.h"
|
||||
#endif
|
||||
#ifdef __HCC__
|
||||
|
||||
#endif
|
||||
#include "hip/hip_runtime_api.h"
|
||||
|
||||
// Forward declarations:
|
||||
namespace hc {
|
||||
class accelerator;
|
||||
class accelerator_view;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
*-------------------------------------------------------------------------------------------------
|
||||
*-------------------------------------------------------------------------------------------------
|
||||
* @defgroup HCC-specific features
|
||||
* @warning These APIs provide access to special features of HCC compiler and are not available through the CUDA path.
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Return hc::accelerator associated with the specified deviceId
|
||||
* @return #hipSuccess, #hipErrorInvalidDevice
|
||||
*/
|
||||
hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc);
|
||||
|
||||
/**
|
||||
* @brief Return hc::accelerator_view associated with the specified stream
|
||||
*
|
||||
* If stream is 0, the accelerator_view for the default stream is returned.
|
||||
* @return #hipSuccess
|
||||
*/
|
||||
hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelparams or extra
|
||||
*
|
||||
* @param [in[ f Kernel to launch.
|
||||
* @param [in] gridDimX X grid dimension specified in work-items
|
||||
* @param [in] gridDimY Y grid dimension specified in work-items
|
||||
* @param [in] gridDimZ Z grid dimension specified in work-items
|
||||
* @param [in] blockDimX X block dimensions specified in work-items
|
||||
* @param [in] blockDimY Y grid dimension specified in work-items
|
||||
* @param [in] blockDimZ Z grid dimension specified in work-items
|
||||
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The kernel can access this with HIP_DYNAMIC_SHARED.
|
||||
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules.
|
||||
* @param [in] kernelParams
|
||||
* @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and must be in the memory layout and alignment expected by the kernel.
|
||||
* @param [in] startEvent If non-null, specified event will be updated to track the start time of the kernel launch. The event must be created before calling this API.
|
||||
* @param [in] stopEvent If non-null, specified event will be updated to track the stop time of the kernel launch. The event must be created before calling this API.
|
||||
*
|
||||
* @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
|
||||
*
|
||||
* @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please refer to hip_porting_driver_api.md for sample usage.
|
||||
|
||||
* HIP/ROCm actually updates the start event when the associated kernel completes.
|
||||
*/
|
||||
hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY,
|
||||
uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX,
|
||||
uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t hStream,
|
||||
void **kernelParams,
|
||||
void **extra,
|
||||
hipEvent_t startEvent=nullptr,
|
||||
hipEvent_t stopEvent=nullptr
|
||||
);
|
||||
|
||||
// doxygen end HCC-specific features
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
#endif // #ifdef __HCC__
|
||||
#endif // #ifdef HIP_INCLUDE_HIP_HIP_HCC_H
|
||||
|
||||
@@ -102,6 +102,7 @@ typedef struct hipDeviceProp_t {
|
||||
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP.
|
||||
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
|
||||
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
|
||||
int pciDomainID; ///< PCI Domain ID
|
||||
int pciBusID; ///< PCI Bus ID.
|
||||
int pciDeviceID; ///< PCI Device ID.
|
||||
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor.
|
||||
@@ -160,6 +161,7 @@ typedef enum hipError_t {
|
||||
hipErrorProfilerNotInitialized = 6,
|
||||
hipErrorProfilerAlreadyStarted = 7,
|
||||
hipErrorProfilerAlreadyStopped = 8,
|
||||
hipErrorInsufficientDriver = 35,
|
||||
hipErrorInvalidImage = 200,
|
||||
hipErrorInvalidContext = 201, ///< Produced when input context is invalid.
|
||||
hipErrorContextAlreadyCurrent = 202,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,13 +20,15 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_H
|
||||
#define HIP_INCLUDE_HIP_HIP_TEXTURE_H
|
||||
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H
|
||||
#define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__)
|
||||
#include <hip/hcc_detail/hip_texture.h>
|
||||
#include <hip/hcc_detail/hip_texture_types.h>
|
||||
#elif defined(__HIP_PLATFORM_NVCC__) && !defined (__HIP_PLATFORM_HCC__)
|
||||
#include <hip/nvcc_detail/hip_texture.h>
|
||||
#include <hip/nvcc_detail/hip_texture_types.h>
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
@@ -111,6 +111,8 @@ kernelName<<<numblocks,numthreads,memperblock,streamId>>>(__VA_ARGS__);\
|
||||
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
#define abort() {asm("trap;");}
|
||||
#undef assert
|
||||
#define assert(COND) { if (!COND) {abort();} }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -31,6 +31,13 @@ THE SOFTWARE.
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define __dparm(x) \
|
||||
= x
|
||||
#else
|
||||
#define __dparm(x)
|
||||
#endif
|
||||
|
||||
//TODO -move to include/hip_runtime_api.h as a common implementation.
|
||||
/**
|
||||
* Memory copy types
|
||||
@@ -102,20 +109,20 @@ typedef cudaEvent_t hipEvent_t;
|
||||
typedef cudaStream_t hipStream_t;
|
||||
typedef cudaIpcEventHandle_t hipIpcEventHandle_t;
|
||||
typedef cudaIpcMemHandle_t hipIpcMemHandle_t;
|
||||
typedef cudaLimit hipLimit_t;
|
||||
typedef cudaFuncCache hipFuncCache_t;
|
||||
typedef enum cudaLimit hipLimit_t;
|
||||
typedef enum cudaFuncCache hipFuncCache_t;
|
||||
typedef CUcontext hipCtx_t;
|
||||
typedef CUsharedconfig hipSharedMemConfig;
|
||||
typedef cudaSharedMemConfig hipSharedMemConfig;
|
||||
typedef CUfunc_cache hipFuncCache;
|
||||
typedef CUjit_option hipJitOption;
|
||||
typedef CUdevice hipDevice_t;
|
||||
typedef CUmodule hipModule_t;
|
||||
typedef CUfunction hipFunction_t;
|
||||
typedef CUdeviceptr hipDeviceptr_t;
|
||||
typedef cudaChannelFormatKind hipChannelFormatKind;
|
||||
typedef cudaChannelFormatDesc hipChannelFormatDesc;
|
||||
typedef cudaTextureReadMode hipTextureReadMode;
|
||||
typedef cudaArray hipArray;
|
||||
typedef enum cudaChannelFormatKind hipChannelFormatKind;
|
||||
typedef struct cudaChannelFormatDesc hipChannelFormatDesc;
|
||||
typedef enum cudaTextureReadMode hipTextureReadMode;
|
||||
typedef struct cudaArray hipArray;
|
||||
|
||||
// Flags that can be used with hipStreamCreateWithFlags
|
||||
#define hipStreamDefault cudaStreamDefault
|
||||
@@ -124,6 +131,11 @@ typedef cudaArray hipArray;
|
||||
//typedef cudaChannelFormatDesc hipChannelFormatDesc;
|
||||
#define hipChannelFormatDesc cudaChannelFormatDesc
|
||||
|
||||
//adding code for hipmemSharedConfig
|
||||
#define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
|
||||
#define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
|
||||
#define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
|
||||
|
||||
inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
|
||||
switch(cuError) {
|
||||
case cudaSuccess : return hipSuccess;
|
||||
@@ -187,7 +199,7 @@ switch(hError) {
|
||||
}
|
||||
}
|
||||
|
||||
inline static cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
|
||||
inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
|
||||
switch(kind) {
|
||||
case hipMemcpyHostToHost:
|
||||
return cudaMemcpyHostToHost;
|
||||
@@ -250,7 +262,7 @@ inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int fla
|
||||
return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) {
|
||||
inline static hipError_t hipMallocArray(hipArray** array, const struct hipChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) {
|
||||
return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
|
||||
}
|
||||
|
||||
@@ -289,8 +301,8 @@ inline static hipError_t hipSetDevice(int device) {
|
||||
|
||||
inline static hipError_t hipChooseDevice( int* device, const hipDeviceProp_t* prop )
|
||||
{
|
||||
cudaDeviceProp cdprop;
|
||||
memset(&cdprop,0x0,sizeof(cudaDeviceProp));
|
||||
struct cudaDeviceProp cdprop;
|
||||
memset(&cdprop,0x0,sizeof(struct cudaDeviceProp));
|
||||
cdprop.major= prop->major;
|
||||
cdprop.minor = prop->minor;
|
||||
cdprop.totalGlobalMem = prop->totalGlobalMem ;
|
||||
@@ -351,25 +363,24 @@ inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes,
|
||||
}
|
||||
|
||||
|
||||
inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind copyKind, hipStream_t stream=0) {
|
||||
inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
|
||||
return hipCUDAErrorTohipError(cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
|
||||
}
|
||||
|
||||
|
||||
inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, size_t offset = 0, hipMemcpyKind copyType = hipMemcpyHostToDevice) {
|
||||
inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, size_t offset __dparm(0), hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
|
||||
return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType)));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind copyType, hipStream_t stream = 0) {
|
||||
inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind copyType, hipStream_t stream __dparm(0)) {
|
||||
return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemcpyFromSymbol(void *dst, const void* symbolName, size_t sizeBytes, size_t offset = 0, hipMemcpyKind kind = hipMemcpyDeviceToHost)
|
||||
inline static hipError_t hipMemcpyFromSymbol(void *dst, const void* symbolName, size_t sizeBytes, size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost))
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind)));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemcpyFromSymbolAsync(void *dst, const void* symbolName, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream = 0)
|
||||
inline static hipError_t hipMemcpyFromSymbolAsync(void *dst, const void* symbolName, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0))
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
|
||||
}
|
||||
@@ -438,7 +449,7 @@ inline static hipError_t hipMemset(void* devPtr,int value, size_t count) {
|
||||
return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemsetAsync(void* devPtr,int value, size_t count, hipStream_t stream = 0) {
|
||||
inline static hipError_t hipMemsetAsync(void* devPtr,int value, size_t count, hipStream_t stream __dparm(0)) {
|
||||
return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
|
||||
}
|
||||
|
||||
@@ -449,7 +460,7 @@ inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value,
|
||||
|
||||
inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t *p_prop, int device)
|
||||
{
|
||||
cudaDeviceProp cdprop;
|
||||
struct cudaDeviceProp cdprop;
|
||||
cudaError_t cerror;
|
||||
cerror = cudaGetDeviceProperties(&cdprop,device);
|
||||
strncpy(p_prop->name,cdprop.name, 256);
|
||||
@@ -510,7 +521,7 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t *p_prop, int dev
|
||||
|
||||
inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
{
|
||||
cudaDeviceAttr cdattr;
|
||||
enum cudaDeviceAttr cdattr;
|
||||
cudaError_t cerror;
|
||||
|
||||
switch (attr) {
|
||||
@@ -586,7 +597,7 @@ inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
}
|
||||
|
||||
inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr){
|
||||
cudaPointerAttributes cPA;
|
||||
struct cudaPointerAttributes cPA;
|
||||
hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
|
||||
if(err == hipSuccess){
|
||||
switch (cPA.memoryType){
|
||||
@@ -617,7 +628,7 @@ inline static hipError_t hipEventCreate( hipEvent_t* event)
|
||||
return hipCUDAErrorTohipError(cudaEventCreate(event));
|
||||
}
|
||||
|
||||
inline static hipError_t hipEventRecord( hipEvent_t event, hipStream_t stream = NULL)
|
||||
inline static hipError_t hipEventRecord( hipEvent_t event, hipStream_t stream __dparm(NULL))
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaEventRecord(event,stream));
|
||||
}
|
||||
@@ -750,18 +761,18 @@ inline static hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void*
|
||||
return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t count, hipStream_t stream=0 )
|
||||
inline static hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t count, hipStream_t stream __dparm(0))
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
|
||||
}
|
||||
|
||||
// Profile APIs:
|
||||
inline hipError_t hipProfilerStart()
|
||||
inline static hipError_t hipProfilerStart()
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaProfilerStart());
|
||||
}
|
||||
|
||||
inline hipError_t hipProfilerStop()
|
||||
inline static hipError_t hipProfilerStop()
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaProfilerStop());
|
||||
}
|
||||
@@ -833,12 +844,12 @@ inline static hipError_t hipCtxSetCacheConfig (hipFuncCache cacheConfig)
|
||||
|
||||
inline static hipError_t hipCtxSetSharedMemConfig (hipSharedMemConfig config)
|
||||
{
|
||||
return hipCUResultTohipError(cuCtxSetSharedMemConfig(config));
|
||||
return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
|
||||
}
|
||||
|
||||
inline static hipError_t hipCtxGetSharedMemConfig ( hipSharedMemConfig * pConfig )
|
||||
{
|
||||
return hipCUResultTohipError(cuCtxGetSharedMemConfig(pConfig));
|
||||
return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig *)pConfig));
|
||||
}
|
||||
|
||||
inline static hipError_t hipCtxSynchronize ( void )
|
||||
@@ -873,12 +884,22 @@ inline static hipError_t hipDeviceGetName(char *name,int len,hipDevice_t device)
|
||||
|
||||
inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId,int len,hipDevice_t device)
|
||||
{
|
||||
return hipCUResultTohipError(cuDeviceGetPCIBusId(pciBusId,len,device));
|
||||
return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId,len,device));
|
||||
}
|
||||
|
||||
inline static hipError_t hipDeviceGetByPCIBusId(int* device, const int *pciBusId)
|
||||
inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char *pciBusId)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device,(char*)pciBusId));
|
||||
return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
|
||||
}
|
||||
|
||||
inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *config)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
|
||||
}
|
||||
|
||||
inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
|
||||
}
|
||||
|
||||
inline static hipError_t hipDeviceGetLimit(size_t *pValue, hipLimit_t limit)
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H
|
||||
#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H
|
||||
|
||||
#include <texture_types.h>
|
||||
|
||||
#endif
|
||||
Archivo ejecutable
+48
@@ -0,0 +1,48 @@
|
||||
#!/bin/bash
|
||||
|
||||
BUILD_ROOT="$( mktemp -d )"
|
||||
SRC_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
WORKING_DIR=$PWD
|
||||
DASH_JAY="-j $(getconf _NPROCESSORS_ONLN)"
|
||||
|
||||
err() {
|
||||
echo "${1-Died}." >&2
|
||||
}
|
||||
|
||||
die() {
|
||||
err "$1"
|
||||
exit 1
|
||||
}
|
||||
|
||||
pushd () {
|
||||
command pushd "$@" > /dev/null
|
||||
}
|
||||
|
||||
popd () {
|
||||
command popd "$@" > /dev/null
|
||||
}
|
||||
|
||||
function setupENV()
|
||||
{
|
||||
sudo apt-get update
|
||||
sudo apt-get install dpkg-dev rpm doxygen libelf-dev
|
||||
}
|
||||
|
||||
function buildHIP()
|
||||
{
|
||||
pushd $BUILD_ROOT
|
||||
cmake $SRC_ROOT -DCMAKE_BUILD_TYPE=Release -DCOMPILE_HIP_ATP_MARKER=1
|
||||
make $DASH_JAY
|
||||
make package
|
||||
rename -v 's/([a-z0-9_.\-]).deb/$1-amd64.deb/' *.deb;rename -v 's/([a-z0-9_.\-]).rpm/$1.x86_64.rpm/' *.rpm
|
||||
cp hip_*.deb $WORKING_DIR
|
||||
sudo dpkg -i hip_base*.deb hip_hcc*.deb hip_sample*.deb hip_doc*.deb
|
||||
popd
|
||||
rm -rf $BUILD_ROOT
|
||||
}
|
||||
|
||||
echo "Preparing build environment"
|
||||
setupENV || die "setupENV failed"
|
||||
echo "Building and installing HIP packages"
|
||||
buildHIP || die "buildHIP failed"
|
||||
echo "Finished building HIP packages"
|
||||
@@ -54,8 +54,8 @@ popd
|
||||
|
||||
# replace github.io links
|
||||
pushd $html_destdir
|
||||
sed -i "s?http://gpuopen-professionalcompute-tools.github.io/HIP?docs/RuntimeAPI/html/index.html?g" README.html
|
||||
sed -i "s?http://gpuopen-professionalcompute-tools.github.io/HIP?docs/RuntimeAPI/html/?g" RELEASE.html
|
||||
sed -i "s?http://rocm-developer-tools.github.io/HIP?docs/RuntimeAPI/html/index.html?g" README.html
|
||||
sed -i "s?http://rocm-developer-tools.github.io/HIP?docs/RuntimeAPI/html/?g" RELEASE.html
|
||||
popd
|
||||
|
||||
exit 0
|
||||
|
||||
@@ -7,16 +7,23 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define SORT_BY_NAME 0
|
||||
#define SORT_RETAIN_ATTS_ORDER 1
|
||||
|
||||
|
||||
bool ResultDatabase::Result::operator<(const Result &rhs) const
|
||||
{
|
||||
if (test < rhs.test)
|
||||
return true;
|
||||
if (test > rhs.test)
|
||||
return false;
|
||||
#if (SORT_RETAIN_ATTS_ORDER == 0)
|
||||
// For ties, sort by the value of the attribute:
|
||||
if (atts < rhs.atts)
|
||||
return true;
|
||||
if (atts > rhs.atts)
|
||||
return false;
|
||||
#endif
|
||||
return false; // less-operator returns false on equal
|
||||
}
|
||||
|
||||
@@ -189,7 +196,10 @@ void ResultDatabase::AddResult(const string &test_orig,
|
||||
void ResultDatabase::DumpDetailed(ostream &out)
|
||||
{
|
||||
vector<Result> sorted(results);
|
||||
sort(sorted.begin(), sorted.end());
|
||||
|
||||
#if SORT_BY_NAME
|
||||
stable_sort(sorted.begin(), sorted.end());
|
||||
#endif
|
||||
|
||||
const int testNameW = 24 ;
|
||||
const int attW = 12;
|
||||
@@ -283,12 +293,15 @@ void ResultDatabase::DumpDetailed(ostream &out)
|
||||
void ResultDatabase::DumpSummary(ostream &out)
|
||||
{
|
||||
vector<Result> sorted(results);
|
||||
sort(sorted.begin(), sorted.end());
|
||||
|
||||
const int testNameW = 24 ;
|
||||
#if SORT_BY_NAME
|
||||
stable_sort(sorted.begin(), sorted.end());
|
||||
#endif
|
||||
|
||||
const int testNameW = 32 ;
|
||||
const int attW = 12;
|
||||
const int fieldW = 9;
|
||||
out << std::fixed << right << std::setprecision(4);
|
||||
out << std::fixed << right << std::setprecision(2);
|
||||
|
||||
// TODO: in big parallel runs, the "trials" are the procs
|
||||
// and we really don't want to print them all out....
|
||||
@@ -334,8 +347,8 @@ void ResultDatabase::DumpSummary(ostream &out)
|
||||
}
|
||||
if (0) {
|
||||
out << endl
|
||||
<< "Note: results marked with (*) had missing values such as" << endl
|
||||
<< "might occur with a mixture of architectural capabilities." << endl;
|
||||
<< "Note: results marked with (*) had missing values such as" << endl
|
||||
<< "might occur with a mixture of architectural capabilities." << endl;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -381,7 +394,9 @@ void ResultDatabase::DumpCsv(string fileName)
|
||||
bool emptyFile;
|
||||
vector<Result> sorted(results);
|
||||
|
||||
sort(sorted.begin(), sorted.end());
|
||||
#if SORT_BY_NAME
|
||||
stable_sort(sorted.begin(), sorted.end());
|
||||
#endif
|
||||
|
||||
//Check to see if the file is empty - if so, add the headers
|
||||
emptyFile = this->IsFileEmpty(fileName);
|
||||
|
||||
@@ -25,15 +25,27 @@ THE SOFTWARE.
|
||||
#include<time.h>
|
||||
#include"ResultDatabase.h"
|
||||
|
||||
#define check(msg, status) \
|
||||
if(status != hipSuccess){ \
|
||||
printf("%s failed.\n",#msg); \
|
||||
exit(1); \
|
||||
#define PRINT_PROGRESS 0
|
||||
|
||||
#define check(cmd) \
|
||||
{\
|
||||
hipError_t status = cmd;\
|
||||
if(status != hipSuccess){ \
|
||||
printf("error: '%s'(%d) from %s at %s:%d\n", \
|
||||
hipGetErrorString(status), status, #cmd,\
|
||||
__FILE__, __LINE__); \
|
||||
abort(); \
|
||||
}\
|
||||
}
|
||||
|
||||
#define LEN 1024*1024
|
||||
#define SIZE LEN * sizeof(float)
|
||||
#define ITER 10120
|
||||
|
||||
#define NUM_GROUPS 1
|
||||
#define GROUP_SIZE 64
|
||||
#define TEST_ITERS 20
|
||||
#define DISPATCHES_PER_TEST 100
|
||||
|
||||
const unsigned p_tests = 0xfffffff;
|
||||
|
||||
|
||||
// HCC optimizes away fully NULL kernel calls, so run one that is nearly null:
|
||||
@@ -44,115 +56,112 @@ __global__ void NearlyNull(hipLaunchParm lp, float* Ad){
|
||||
}
|
||||
|
||||
|
||||
ResultDatabase resultDB;
|
||||
|
||||
|
||||
void stopTest(hipEvent_t start, hipEvent_t stop, const char *msg, int iters)
|
||||
{
|
||||
float mS = 0;
|
||||
check(hipEventRecord(stop));
|
||||
check(hipDeviceSynchronize());
|
||||
check(hipEventElapsedTime(&mS, start, stop));
|
||||
resultDB.AddResult(std::string(msg), "", "uS", mS*1000/iters);
|
||||
if (PRINT_PROGRESS & 0x1 ) {
|
||||
std::cout<< msg <<"\t\t"<<mS*1000/iters<<" uS"<<std::endl;
|
||||
}
|
||||
if (PRINT_PROGRESS & 0x2 ) {
|
||||
resultDB.DumpSummary(std::cout);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(){
|
||||
|
||||
hipError_t err;
|
||||
float *A;
|
||||
float *Ad = NULL;
|
||||
float *Ad;
|
||||
check(hipMalloc(&Ad, 4));
|
||||
|
||||
A = new float[LEN];
|
||||
|
||||
for(int i=0;i<LEN;i++){
|
||||
A[i] = 1.0f;
|
||||
}
|
||||
|
||||
hipStream_t stream;
|
||||
err = hipStreamCreate(&stream);
|
||||
check("Creating stream",err);
|
||||
|
||||
//err = hipMalloc(&Ad, SIZE);
|
||||
//check("Allocating Ad memory on device", err);
|
||||
//err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
|
||||
//check("Doing memory copy from A to Ad", err);
|
||||
|
||||
float mS = 0;
|
||||
hipEvent_t start, stop;
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
|
||||
ResultDatabase resultDB[8];
|
||||
check(hipStreamCreate(&stream));
|
||||
|
||||
|
||||
hipEventRecord(start);
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
resultDB[0].AddResult(std::string("First Kernel Launch"), "", "uS", mS*1000);
|
||||
// std::cout<<"First Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
|
||||
resultDB[0].DumpSummary(std::cout);
|
||||
hipEventRecord(start);
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
resultDB[1].AddResult(std::string("Second Kernel Launch"), "", "uS", mS*1000);
|
||||
// std::cout<<"Second Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
|
||||
resultDB[1].DumpSummary(std::cout);
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
}
|
||||
hipDeviceSynchronize();
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
resultDB[2].AddResult(std::string("NULL Stream Sync dispatch wait"), "", "uS", mS*1000/ITER);
|
||||
resultDB[2].DumpSummary(std::cout);
|
||||
// std::cout<<"NULL Stream Sync dispatch wait: \t"<<mS*1000/ITER<<" uS"<<std::endl;
|
||||
hipDeviceSynchronize();
|
||||
hipEvent_t start, sync, stop;
|
||||
check(hipEventCreate(&start));
|
||||
check(hipEventCreateWithFlags(&sync, hipEventBlockingSync));
|
||||
check(hipEventCreate(&stop));
|
||||
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
}
|
||||
hipEventRecord(stop);
|
||||
hipDeviceSynchronize();
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
resultDB[3].AddResult(std::string("NULL Stream Async dispatch wait"), "", "uS", mS*1000/ITER);
|
||||
resultDB[3].DumpSummary(std::cout);
|
||||
// std::cout<<"NULL Stream Async dispatch wait: \t"<<mS*1000/ITER<<" uS"<<std::endl;
|
||||
hipDeviceSynchronize();
|
||||
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
resultDB[4].AddResult(std::string("Stream Sync dispatch wait"), "", "uS", mS*1000/ITER);
|
||||
resultDB[4].DumpSummary(std::cout);
|
||||
// std::cout<<"Stream Sync dispatch wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
|
||||
hipDeviceSynchronize();
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
|
||||
}
|
||||
hipDeviceSynchronize();
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
resultDB[5].AddResult(std::string("Stream Async dispatch wait"), "", "uS", mS*1000/ITER);
|
||||
// std::cout<<"Stream Async dispatch wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
|
||||
resultDB[5].DumpSummary(std::cout);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
}
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
resultDB[6].AddResult(std::string("NULL Stream No Wait"), "", "uS", mS*1000/ITER);
|
||||
resultDB[6].DumpSummary(std::cout);
|
||||
// std::cout<<"NULL Stream Dispatch No Wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
|
||||
hipDeviceSynchronize();
|
||||
hipStream_t stream0 = 0;
|
||||
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
|
||||
}
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
resultDB[7].AddResult(std::string("Stream Dispatch No Wait"), "", "uS", mS*1000/ITER);
|
||||
resultDB[7].DumpSummary(std::cout);
|
||||
// std::cout<<"Stream Dispatch No Wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
|
||||
hipDeviceSynchronize();
|
||||
|
||||
if (p_tests & 0x1) {
|
||||
hipEventRecord(start);
|
||||
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
|
||||
stopTest(start, stop, "FirstKernelLaunch", 1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (p_tests & 0x2) {
|
||||
hipEventRecord(start);
|
||||
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
|
||||
stopTest(start, stop, "SecondKernelLaunch", 1);
|
||||
}
|
||||
|
||||
|
||||
if (p_tests & 0x4) {
|
||||
for (int t=0; t<TEST_ITERS; t++) {
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<DISPATCHES_PER_TEST;i++){
|
||||
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
|
||||
hipEventRecord(sync);
|
||||
hipEventSynchronize(sync);
|
||||
}
|
||||
stopTest(start, stop, "NullStreamASyncDispatchWait", DISPATCHES_PER_TEST);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (p_tests & 0x10) {
|
||||
for (int t=0; t<TEST_ITERS; t++) {
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<DISPATCHES_PER_TEST;i++){
|
||||
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream, Ad);
|
||||
hipEventRecord(sync);
|
||||
hipEventSynchronize(sync);
|
||||
}
|
||||
stopTest(start, stop, "StreamASyncDispatchWait", DISPATCHES_PER_TEST);
|
||||
}
|
||||
}
|
||||
|
||||
#if 1
|
||||
|
||||
if (p_tests & 0x40) {
|
||||
for (int t=0; t<TEST_ITERS; t++) {
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<DISPATCHES_PER_TEST;i++){
|
||||
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
|
||||
}
|
||||
stopTest(start, stop, "NullStreamASyncDispatchNoWait", DISPATCHES_PER_TEST);
|
||||
}
|
||||
}
|
||||
|
||||
if (p_tests & 0x80) {
|
||||
for (int t=0; t<TEST_ITERS; t++) {
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<DISPATCHES_PER_TEST;i++){
|
||||
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream, Ad);
|
||||
}
|
||||
stopTest(start, stop, "StreamASyncDispatchNoWait", DISPATCHES_PER_TEST);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
resultDB.DumpSummary(std::cout);
|
||||
|
||||
|
||||
check(hipEventDestroy(start));
|
||||
check(hipEventDestroy(sync));
|
||||
check(hipEventDestroy(stop));
|
||||
}
|
||||
|
||||
@@ -129,6 +129,7 @@ void printDeviceProp (int deviceId)
|
||||
cout << setw(w1) << "arch.hasSurfaceFuncs: " << props.arch.hasSurfaceFuncs << endl;
|
||||
cout << setw(w1) << "arch.has3dGrid: " << props.arch.has3dGrid << endl;
|
||||
cout << setw(w1) << "arch.hasDynamicParallelism: " << props.arch.hasDynamicParallelism << endl;
|
||||
cout << setw(w1) << "gcnArch: " << props.gcnArch << endl;
|
||||
|
||||
int deviceCnt;
|
||||
hipGetDeviceCount(&deviceCnt);
|
||||
|
||||
@@ -7,7 +7,7 @@ This tutorial shows how to get write simple HIP application. We will write the s
|
||||
HIP is a C++ runtime API and kernel language that allows developers to create portable applications that can run on AMD and other GPU’s. Our goal was to rise above the lowest-common-denominator paths and deliver a solution that allows you, the developer, to use essential hardware features and maximize your application’s performance on GPU hardware.
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/INSTALL.md)
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
@@ -90,11 +90,11 @@ Use the make command and execute it using ./exe
|
||||
Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [hipify-clang](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/hipify-clang/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/RELEASE.md)
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [hipify-clang](https://github.com/ROCm-Developer-Tools/HIP/hipify-clang/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
|
||||
@@ -15,7 +15,7 @@ For more information:
|
||||
[User Guide for AMDGPU Back-end](llvm.org/docs/AMDGPUUsage.html)
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/INSTALL.md)
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
@@ -27,21 +27,34 @@ We will be using the Simple Matrix Transpose application from the our very first
|
||||
|
||||
## asm() Assembler statement
|
||||
|
||||
We insert the GCN isa into the kernel using asm() Assembler statement. In the same sourcecode, we used for MatrixTranspose. We'll add the following:
|
||||
In the same sourcecode, we used for MatrixTranspose. We'll add the following:
|
||||
|
||||
` asm volatile ("v_mov_b32_e32 %0, %1" : "=v" (out[x*width + y]) : "v" (in[y*width + x])); `
|
||||
|
||||
GCN ISA In-line assembly, is supported. For example:
|
||||
|
||||
```
|
||||
asm volatile ("v_mac_f32_e32 %0, %2, %3" : "=v" (out[i]) : "0"(out[i]), "v" (a), "v" (in[i]));
|
||||
```
|
||||
|
||||
We insert the GCN isa into the kernel using `asm()` Assembler statement.
|
||||
`volatile` keyword is used so that the optimizers must not change the number of volatile operations or change their order of execution relative to other volatile operations.
|
||||
`v_mac_f32_e32` is the GCN instruction, for more information please refer - [AMD GCN3 ISA architecture manual](http://gpuopen.com/compute-product/amd-gcn3-isa-architecture-manual/)
|
||||
Index for the respective operand in the ordered fashion is provided by `%` followed by position in the list of operands
|
||||
`"v"` is the constraint code (for target-specific AMDGPU) for 32-bit VGPR register, for more info please refer - [Supported Constraint Code List for AMDGPU](https://llvm.org/docs/LangRef.html#supported-constraint-code-list)
|
||||
Output Constraints are specified by an `"="` prefix as shown above ("=v"). This indicate that assemby will write to this operand, and the operand will then be made available as a return value of the asm expression. Input constraints do not have a prefix - just the constraint code. The constraint string of `"0"` says to use the assigned register for output as an input as well (it being the 0'th constraint).
|
||||
|
||||
## How to build and run:
|
||||
Use the make command and execute it using ./exe
|
||||
Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia.
|
||||
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/RELEASE.md)
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/ROCm-Developer-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
|
||||
@@ -7,7 +7,7 @@ This tutorial is follow-up of the previous one where we learn how to write our f
|
||||
Memory transfer and kernel execution are the most important parameter in parallel computing (specially HPC and machine learning). Memory bottlenecks is the main problem why we are not able to get the highest performance, therefore obtaining the memory transfer timing and kernel execution timing plays key role in application optimization.
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/INSTALL.md)
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
@@ -64,11 +64,11 @@ Use the make command and execute it using ./exe
|
||||
Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [hipify-clang](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/hipify-clang/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/RELEASE.md)
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [hipify-clang](https://github.com/ROCm-Developer-Tools/HIP/hipify-clang/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
|
||||
@@ -37,11 +37,11 @@ You can also print the HIP function strings to stderr using HIP_TRACE_API enviro
|
||||
Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [hipify-clang](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/hipify-clang/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/RELEASE.md)
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [hipify-clang](https://github.com/ROCm-Developer-Tools/HIP/hipify-clang/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
|
||||
@@ -7,7 +7,7 @@ Earlier we learned how to write our first hip program, in which we compute Matri
|
||||
As we mentioned earlier that Memory bottlenecks is the main problem why we are not able to get the highest performance, therefore minimizing the latency for memory access plays prominent role in application optimization. In this tutorial, we'll learn how to use static shared memory and will explain the dynamic one latter.
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/INSTALL.md)
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
@@ -32,11 +32,11 @@ Use the make command and execute it using ./exe
|
||||
Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/RELEASE.md)
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/ROCm-Developer-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
|
||||
@@ -15,7 +15,7 @@ Let's talk about Warp first. The kernel code is executed in groups of fixed numb
|
||||
` float __shfl_xor (float var, int laneMask, int width=warpSize); `
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/INSTALL.md)
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
@@ -41,11 +41,11 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia
|
||||
please make sure you have a 3.0 or higher compute capable device in order to use warp shfl operations and add `-gencode arch=compute=30, code=sm_30` nvcc flag in the Makefile while using this application.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/RELEASE.md)
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/ROCm-Developer-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
|
||||
@@ -15,7 +15,7 @@ Let's talk about Warp first. The kernel code is executed in groups of fixed numb
|
||||
` float __shfl_xor (float var, int laneMask, int width=warpSize); `
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/INSTALL.md)
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
@@ -41,11 +41,11 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia
|
||||
please make sure you have a 3.0 or higher compute capable device in order to use warp shfl operations and add `-gencode arch=compute=30, code=sm_30` nvcc flag in the Makefile while using this application.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/RELEASE.md)
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/ROCm-Developer-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
|
||||
@@ -7,7 +7,7 @@ Earlier we learned how to use static shared memory. In this tutorial, we'll expl
|
||||
As we mentioned earlier that Memory bottlenecks is the main problem why we are not able to get the highest performance, therefore minimizing the latency for memory access plays prominent role in application optimization. In this tutorial, we'll learn how to use dynamic shared memory.
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/INSTALL.md)
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
@@ -37,11 +37,11 @@ Use the make command and execute it using ./exe
|
||||
Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/RELEASE.md)
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/ROCm-Developer-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
|
||||
@@ -7,7 +7,7 @@ In all Earlier tutorial we used single stream, In this tutorial, we'll explain h
|
||||
The various instances of kernel to be executed on device in exact launch order defined by Host are called streams. We can launch multiple streams on a single device. We will learn how to learn two streams which can we scaled with ease.
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/INSTALL.md)
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
@@ -47,11 +47,11 @@ Use the make command and execute it using ./exe
|
||||
Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/RELEASE.md)
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/ROCm-Developer-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
|
||||
@@ -8,7 +8,7 @@ Loop unrolling optimization hints can be specified with #pragma unroll and #prag
|
||||
Specifying #pragma unroll without a parameter directs the loop unroller to attempt to fully unroll the loop if the trip count is known at compile time and attempt to partially unroll the loop if the trip count is not known at compile time.
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/INSTALL.md)
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
@@ -38,11 +38,11 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia
|
||||
please make sure you have a 3.0 or higher compute capable device in order to use warp shfl operations and add `-gencode arch=compute=30, code=sm_30` nvcc flag in the Makefile while using this application.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/RELEASE.md)
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](https://github.com/ROCm-Developer-Tools/HIP/clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
|
||||
+3
-105
@@ -35,8 +35,8 @@ THE SOFTWARE.
|
||||
This is the best place to put them because the device
|
||||
global variables need to be initialized at the start.
|
||||
*/
|
||||
__device__ ADDRESS_SPACE_1 char gpuHeap[SIZE_OF_HEAP];
|
||||
__device__ ADDRESS_SPACE_1 uint32_t gpuFlags[NUM_PAGES];
|
||||
__device__ char gpuHeap[SIZE_OF_HEAP];
|
||||
__device__ uint32_t gpuFlags[NUM_PAGES];
|
||||
|
||||
__device__ void *__hip_hc_malloc(size_t size)
|
||||
{
|
||||
@@ -1264,109 +1264,7 @@ __device__ double __hip_fast_dsqrt_rz(double x) {
|
||||
}
|
||||
|
||||
__device__ void __threadfence_system(void){
|
||||
// no-op
|
||||
}
|
||||
|
||||
float __hip_host_erfinvf(float x)
|
||||
{
|
||||
float ret;
|
||||
int sign;
|
||||
if (x < -1 || x > 1){
|
||||
return NAN;
|
||||
}
|
||||
if (x == 0){
|
||||
return 0;
|
||||
}
|
||||
if (x > 0){
|
||||
sign = 1;
|
||||
} else {
|
||||
sign = -1;
|
||||
x = -x;
|
||||
}
|
||||
if (x <= 0.7) {
|
||||
float x1 = x * x;
|
||||
float x2 = std::fma(__hip_erfinva3, x1, __hip_erfinva2);
|
||||
float x3 = std::fma(x2, x1, __hip_erfinva1);
|
||||
float x4 = x * std::fma(x3, x1, __hip_erfinva0);
|
||||
|
||||
float r1 = std::fma(__hip_erfinvb4, x1, __hip_erfinvb3);
|
||||
float r2 = std::fma(r1, x1, __hip_erfinvb2);
|
||||
float r3 = std::fma(r2, x1, __hip_erfinvb1);
|
||||
ret = x4 / std::fma(r3, x1, __hip_erfinvb0);
|
||||
} else {
|
||||
float x1 = std::sqrt(-std::log((1 - x) / 2));
|
||||
float x2 = std::fma(__hip_erfinvc3, x1, __hip_erfinvc2);
|
||||
float x3 = std::fma(x2, x1, __hip_erfinvc1);
|
||||
float x4 = std::fma(x3, x1, __hip_erfinvc0);
|
||||
|
||||
float r1 = std::fma(__hip_erfinvd2, x1, __hip_erfinvd1);
|
||||
ret = x4 / std::fma(r1, x1, __hip_erfinvd0);
|
||||
}
|
||||
|
||||
ret = ret * sign;
|
||||
x = x * sign;
|
||||
|
||||
ret -= (std::erf(ret) - x) / (2 / std::sqrt(HIP_PI) * std::exp(-ret * ret));
|
||||
ret -= (std::erf(ret) - x) / (2 / std::sqrt(HIP_PI) * std::exp(-ret * ret));
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
double __hip_host_erfinv(double x)
|
||||
{
|
||||
double ret;
|
||||
int sign;
|
||||
if (x < -1 || x > 1){
|
||||
return NAN;
|
||||
}
|
||||
if (x == 0){
|
||||
return 0;
|
||||
}
|
||||
if (x > 0){
|
||||
sign = 1;
|
||||
} else {
|
||||
sign = -1;
|
||||
x = -x;
|
||||
}
|
||||
if (x <= 0.7) {
|
||||
double x1 = x * x;
|
||||
double x2 = std::fma(__hip_erfinva3, x1, __hip_erfinva2);
|
||||
double x3 = std::fma(x2, x1, __hip_erfinva1);
|
||||
double x4 = x * std::fma(x3, x1, __hip_erfinva0);
|
||||
|
||||
double r1 = std::fma(__hip_erfinvb4, x1, __hip_erfinvb3);
|
||||
double r2 = std::fma(r1, x1, __hip_erfinvb2);
|
||||
double r3 = std::fma(r2, x1, __hip_erfinvb1);
|
||||
ret = x4 / std::fma(r3, x1, __hip_erfinvb0);
|
||||
} else {
|
||||
double x1 = std::sqrt(-std::log((1 - x) / 2));
|
||||
double x2 = std::fma(__hip_erfinvc3, x1, __hip_erfinvc2);
|
||||
double x3 = std::fma(x2, x1, __hip_erfinvc1);
|
||||
double x4 = std::fma(x3, x1, __hip_erfinvc0);
|
||||
|
||||
double r1 = std::fma(__hip_erfinvd2, x1, __hip_erfinvd1);
|
||||
ret = x4 / std::fma(r1, x1, __hip_erfinvd0);
|
||||
}
|
||||
|
||||
ret = ret * sign;
|
||||
x = x * sign;
|
||||
|
||||
ret -= (std::erf(ret) - x) / (2 / std::sqrt(HIP_PI) * std::exp(-ret * ret));
|
||||
ret -= (std::erf(ret) - x) / (2 / std::sqrt(HIP_PI) * std::exp(-ret * ret));
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
float __hip_host_erfcinvf(float y)
|
||||
{
|
||||
return __hip_host_erfinvf(1 - y);
|
||||
}
|
||||
|
||||
double __hip_host_erfcinv(double y)
|
||||
{
|
||||
return __hip_host_erfinv(1 - y);
|
||||
std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||
}
|
||||
|
||||
double __hip_host_j0(double x)
|
||||
|
||||
@@ -128,12 +128,6 @@ __device__ double __hip_fast_dsqrt_ru(double x);
|
||||
__device__ double __hip_fast_dsqrt_rz(double x);
|
||||
__device__ void __threadfence_system(void);
|
||||
|
||||
float __hip_host_erfinvf(float x);
|
||||
double __hip_host_erfinv(double x);
|
||||
|
||||
float __hip_host_erfcinvf(float y);
|
||||
double __hip_host_erfcinv(double y);
|
||||
|
||||
float __hip_host_j0f(float x);
|
||||
double __hip_host_j0(double x);
|
||||
|
||||
|
||||
@@ -72,8 +72,8 @@ namespace hip_impl
|
||||
}
|
||||
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {
|
||||
std::cerr << API_COLOR << os.str() << API_COLOR_END
|
||||
<< std::endl;
|
||||
std::string fullStr;
|
||||
recordApiTrace(&fullStr, os.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+22
-24
@@ -31,6 +31,7 @@ THE SOFTWARE.
|
||||
|
||||
// Stack of contexts
|
||||
thread_local std::stack<ihipCtx_t *> tls_ctxStack;
|
||||
thread_local bool tls_getPrimaryCtx = true;
|
||||
|
||||
void ihipCtxStackUpdate()
|
||||
{
|
||||
@@ -65,6 +66,7 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
|
||||
*ctx = ictx;
|
||||
ihipSetTlsDefaultCtx(*ctx);
|
||||
tls_ctxStack.push(*ctx);
|
||||
tls_getPrimaryCtx = false;
|
||||
deviceCrit->addContext(ictx);
|
||||
}
|
||||
|
||||
@@ -93,8 +95,7 @@ hipError_t hipDriverGetVersion(int *driverVersion)
|
||||
hipError_t e = hipSuccess;
|
||||
if (driverVersion) {
|
||||
*driverVersion = 4;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
@@ -107,8 +108,7 @@ hipError_t hipRuntimeGetVersion(int *runtimeVersion)
|
||||
hipError_t e = hipSuccess;
|
||||
if (runtimeVersion) {
|
||||
*runtimeVersion = HIP_VERSION_PATCH;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
@@ -124,9 +124,7 @@ hipError_t hipCtxDestroy(hipCtx_t ctx)
|
||||
if(primaryCtx== ctx)
|
||||
{
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
if(currentCtx == ctx) {
|
||||
//need to destroy the ctx associated with calling thread
|
||||
tls_ctxStack.pop();
|
||||
@@ -146,19 +144,21 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx)
|
||||
{
|
||||
HIP_INIT_API(ctx);
|
||||
hipError_t e = hipSuccess;
|
||||
ihipCtx_t* tempCtx;
|
||||
*ctx = ihipGetTlsDefaultCtx();
|
||||
ihipCtx_t* currentCtx = ihipGetTlsDefaultCtx();
|
||||
auto deviceHandle = currentCtx->getDevice();
|
||||
*ctx = currentCtx;
|
||||
|
||||
if(!tls_ctxStack.empty()) {
|
||||
tls_ctxStack.pop();
|
||||
}
|
||||
|
||||
if(!tls_ctxStack.empty()) {
|
||||
tempCtx= tls_ctxStack.top();
|
||||
}
|
||||
else {
|
||||
tempCtx = nullptr;
|
||||
currentCtx= tls_ctxStack.top();
|
||||
} else {
|
||||
currentCtx = deviceHandle->_primaryCtx;
|
||||
}
|
||||
|
||||
ihipSetTlsDefaultCtx(tempCtx); //TOD0 - Shall check for NULL?
|
||||
ihipSetTlsDefaultCtx(currentCtx); //TOD0 - Shall check for NULL?
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
@@ -169,8 +169,8 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx)
|
||||
if(ctx != NULL) { //TODO- is this check needed?
|
||||
ihipSetTlsDefaultCtx(ctx);
|
||||
tls_ctxStack.push(ctx);
|
||||
}
|
||||
else {
|
||||
tls_getPrimaryCtx = false;
|
||||
} else {
|
||||
e = hipErrorInvalidContext;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
@@ -180,12 +180,11 @@ hipError_t hipCtxGetCurrent(hipCtx_t* ctx)
|
||||
{
|
||||
HIP_INIT_API(ctx);
|
||||
hipError_t e = hipSuccess;
|
||||
if(!tls_ctxStack.empty()) {
|
||||
if((tls_getPrimaryCtx) || tls_ctxStack.empty()) {
|
||||
*ctx = ihipGetTlsDefaultCtx();
|
||||
} else {
|
||||
*ctx= tls_ctxStack.top();
|
||||
}
|
||||
else {
|
||||
*ctx = NULL;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
@@ -195,10 +194,10 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx)
|
||||
hipError_t e = hipSuccess;
|
||||
if(ctx == NULL) {
|
||||
tls_ctxStack.pop();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
ihipSetTlsDefaultCtx(ctx);
|
||||
tls_ctxStack.push(ctx);
|
||||
tls_getPrimaryCtx = false;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
@@ -213,8 +212,7 @@ hipError_t hipCtxGetDevice(hipDevice_t *device)
|
||||
if(ctx == nullptr) {
|
||||
e = hipErrorInvalidContext;
|
||||
// TODO *device = nullptr;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
auto deviceHandle = ctx->getDevice();
|
||||
*device = deviceHandle->_deviceId;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
|
||||
#include <hc_am.hpp>
|
||||
|
||||
|
||||
|
||||
|
||||
void hipdbPrintMem(void *targetAddress)
|
||||
{
|
||||
hc::am_memtracker_print(targetAddress);
|
||||
};
|
||||
|
||||
|
||||
+57
-34
@@ -146,13 +146,14 @@ hipError_t hipSetDevice(int deviceId)
|
||||
return ihipLogStatus(hipErrorInvalidDevice);
|
||||
} else {
|
||||
ihipSetTlsDefaultCtx(ihipGetPrimaryCtx(deviceId));
|
||||
tls_getPrimaryCtx = true;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSynchronize(void)
|
||||
{
|
||||
HIP_INIT_API();
|
||||
HIP_INIT_SPECIAL_API(TRACE_SYNC);
|
||||
return ihipLogStatus(ihipSynchronize());
|
||||
}
|
||||
|
||||
@@ -180,6 +181,7 @@ hipError_t hipDeviceReset(void)
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipDeviceSetState(void)
|
||||
{
|
||||
hipError_t e = hipErrorInvalidContext;
|
||||
@@ -272,6 +274,9 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device
|
||||
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
{
|
||||
HIP_INIT_API(pi, attr, device);
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
return ihipLogStatus(hipErrorInvalidDevice);
|
||||
}
|
||||
return ihipLogStatus(ihipDeviceGetAttribute(pi,attr,device));
|
||||
}
|
||||
|
||||
@@ -298,6 +303,9 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, int device)
|
||||
hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device)
|
||||
{
|
||||
HIP_INIT_API(props, device);
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
return ihipLogStatus(hipErrorInvalidDevice);
|
||||
}
|
||||
return ihipLogStatus(ihipGetDeviceProperties(props, device));
|
||||
}
|
||||
|
||||
@@ -350,41 +358,44 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device
|
||||
{
|
||||
HIP_INIT_API(major,minor, device);
|
||||
hipError_t e = hipSuccess;
|
||||
e = ihipDeviceGetAttribute(major, hipDeviceAttributeComputeCapabilityMajor, device);
|
||||
e = ihipDeviceGetAttribute(minor, hipDeviceAttributeComputeCapabilityMinor, device);
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
e = hipErrorInvalidDevice;
|
||||
} else {
|
||||
e = ihipDeviceGetAttribute(major, hipDeviceAttributeComputeCapabilityMajor, device);
|
||||
e = ihipDeviceGetAttribute(minor, hipDeviceAttributeComputeCapabilityMinor, device);
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetName(char *name,int len,hipDevice_t device)
|
||||
{
|
||||
HIP_INIT_API(name,len, device);
|
||||
// Cast to void* here to avoid printing garbage in debug modes.
|
||||
HIP_INIT_API((void*)name,len, device);
|
||||
hipError_t e = hipSuccess;
|
||||
auto deviceHandle = ihipGetDevice(device);
|
||||
int nameLen = strlen(deviceHandle->_props.name);
|
||||
if(nameLen <= len)
|
||||
memcpy(name,deviceHandle->_props.name,nameLen);
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
e = hipErrorInvalidDevice;
|
||||
} else {
|
||||
auto deviceHandle = ihipGetDevice(device);
|
||||
int nameLen = strlen(deviceHandle->_props.name);
|
||||
if(nameLen <= len)
|
||||
memcpy(name,deviceHandle->_props.name,nameLen);
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len, int device)
|
||||
{
|
||||
HIP_INIT_API(pciBusId, len, device);
|
||||
// Cast to void* here to avoid printing garbage in debug modes.
|
||||
HIP_INIT_API((void*)pciBusId, len, device);
|
||||
hipError_t e = hipErrorInvalidValue;
|
||||
int deviceCount = 0;
|
||||
ihipGetDeviceCount( &deviceCount );
|
||||
if((device > deviceCount) || (device < 0)) {
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
e = hipErrorInvalidDevice;
|
||||
} else {
|
||||
if((pciBusId != nullptr) && (len > 0)) {
|
||||
int tempPciBusId = 0;
|
||||
e = ihipDeviceGetAttribute( &tempPciBusId, hipDeviceAttributePciBusId, device);
|
||||
if( e == hipSuccess) {
|
||||
std::string tempPciStr = std::to_string(tempPciBusId);
|
||||
if( len < tempPciStr.length()){
|
||||
e = hipErrorInvalidValue;
|
||||
} else {
|
||||
memcpy( pciBusId , tempPciStr.c_str() , tempPciStr.length() );
|
||||
}
|
||||
auto deviceHandle = ihipGetDevice(device);
|
||||
int retVal = snprintf(pciBusId,len, "%04x:%02x:%02x.0",deviceHandle->_props.pciDomainID,deviceHandle->_props.pciBusID,deviceHandle->_props.pciDeviceID);
|
||||
if( retVal > 0 && retVal < len) {
|
||||
e = hipSuccess;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -395,26 +406,38 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device)
|
||||
{
|
||||
HIP_INIT_API(bytes, device);
|
||||
hipError_t e = hipSuccess;
|
||||
auto deviceHandle = ihipGetDevice(device);
|
||||
*bytes= deviceHandle->_props.totalGlobalMem;
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
e = hipErrorInvalidDevice;
|
||||
} else {
|
||||
auto deviceHandle = ihipGetDevice(device);
|
||||
*bytes= deviceHandle->_props.totalGlobalMem;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetByPCIBusId (int* device, const int* pciBusId )
|
||||
hipError_t hipDeviceGetByPCIBusId (int* device, const char* pciBusId )
|
||||
{
|
||||
HIP_INIT_API(device,pciBusId);
|
||||
hipDeviceProp_t tempProp;
|
||||
int deviceCount;
|
||||
int deviceCount = 0 ;
|
||||
hipError_t e = hipErrorInvalidValue;
|
||||
ihipGetDeviceCount( &deviceCount );
|
||||
*device = 0;
|
||||
for (int i = 0; i< deviceCount; i++) {
|
||||
ihipGetDeviceProperties( &tempProp, i );
|
||||
if(tempProp.pciBusID == *pciBusId) {
|
||||
*device =i;
|
||||
e = hipSuccess;
|
||||
break;
|
||||
}
|
||||
if((device != nullptr) && (pciBusId != nullptr)) {
|
||||
int pciBusID = -1;
|
||||
int pciDeviceID = -1;
|
||||
int pciDomainID = -1;
|
||||
int len = 0;
|
||||
len = sscanf (pciBusId,"%04x:%02x:%02x",&pciDomainID,&pciBusID,&pciDeviceID);
|
||||
if(len == 3) {
|
||||
ihipGetDeviceCount( &deviceCount );
|
||||
for (int i = 0; i< deviceCount; i++) {
|
||||
ihipGetDeviceProperties( &tempProp, i );
|
||||
if(tempProp.pciBusID == pciBusID) {
|
||||
*device = i;
|
||||
e = hipSuccess;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
+26
-14
@@ -55,13 +55,13 @@ void ihipEvent_t::attachToCompletionFuture(const hc::completion_future *cf,
|
||||
|
||||
void ihipEvent_t::refereshEventStatus()
|
||||
{
|
||||
bool isReady0 = _marker.is_ready();
|
||||
bool isReady0 = locked_isReady();
|
||||
bool isReady1;
|
||||
int val = 0;
|
||||
if (_state == hipEventStatusRecording) {
|
||||
// TODO - use completion-future functions to obtain ticks and timestamps:
|
||||
hsa_signal_t *sig = static_cast<hsa_signal_t*> (_marker.get_native_handle());
|
||||
isReady1 = _marker.is_ready();
|
||||
isReady1 = locked_isReady();
|
||||
if (sig) {
|
||||
val = hsa_signal_load_acquire(*sig);
|
||||
if (val == 0) {
|
||||
@@ -86,6 +86,17 @@ void ihipEvent_t::refereshEventStatus()
|
||||
}
|
||||
|
||||
|
||||
bool ihipEvent_t::locked_isReady()
|
||||
{
|
||||
return _stream->locked_eventIsReady(this);
|
||||
}
|
||||
|
||||
void ihipEvent_t::locked_waitComplete(hc::hcWaitMode waitMode)
|
||||
{
|
||||
return _stream->locked_eventWaitComplete(this, waitMode);
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags)
|
||||
{
|
||||
hipError_t e = hipSuccess;
|
||||
@@ -127,7 +138,7 @@ hipError_t hipEventCreate(hipEvent_t* event)
|
||||
|
||||
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(event, stream);
|
||||
HIP_INIT_SPECIAL_API(TRACE_SYNC, event, stream);
|
||||
|
||||
if (event && event->_state != hipEventStatusUnitialized) {
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
@@ -164,18 +175,21 @@ hipError_t hipEventDestroy(hipEvent_t event)
|
||||
{
|
||||
HIP_INIT_API(event);
|
||||
|
||||
event->_state = hipEventStatusUnitialized;
|
||||
if (event) {
|
||||
event->_state = hipEventStatusUnitialized;
|
||||
|
||||
delete event;
|
||||
event = NULL;
|
||||
delete event;
|
||||
event = NULL;
|
||||
|
||||
// TODO - examine return additional error codes
|
||||
return ihipLogStatus(hipSuccess);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorInvalidResourceHandle);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipEventSynchronize(hipEvent_t event)
|
||||
{
|
||||
HIP_INIT_API(event);
|
||||
HIP_INIT_SPECIAL_API(TRACE_SYNC, event);
|
||||
|
||||
if (event) {
|
||||
if (event->_state == hipEventStatusUnitialized) {
|
||||
@@ -189,9 +203,7 @@ hipError_t hipEventSynchronize(hipEvent_t event)
|
||||
ctx->locked_syncDefaultStream(true, true);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
event->_marker.wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
|
||||
|
||||
assert (event->_marker.is_ready());
|
||||
event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
@@ -254,9 +266,9 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
|
||||
|
||||
hipError_t hipEventQuery(hipEvent_t event)
|
||||
{
|
||||
HIP_INIT_API(event);
|
||||
HIP_INIT_SPECIAL_API(TRACE_QUERY, event);
|
||||
|
||||
if ((event->_state == hipEventStatusRecording) && (!event->_marker.is_ready())) {
|
||||
if ((event->_state == hipEventStatusRecording) && !event->locked_isReady()) {
|
||||
return ihipLogStatus(hipErrorNotReady);
|
||||
} else {
|
||||
return ihipLogStatus(hipSuccess);
|
||||
|
||||
+78
-34
@@ -83,20 +83,35 @@ int HIP_HIDDEN_FREE_MEM = 256;
|
||||
int HIP_FORCE_SYNC_COPY = 0;
|
||||
|
||||
// TODO - set these to 0 and 1
|
||||
int HIP_EVENT_SYS_RELEASE=1;
|
||||
int HIP_COHERENT_HOST_ALLOC = 0;
|
||||
int HIP_EVENT_SYS_RELEASE=0;
|
||||
int HIP_HOST_COHERENT = 1;
|
||||
|
||||
// TODO - set to 0 once we resolve stability.
|
||||
// USE_ HIP_SYNC_HOST_ALLOC
|
||||
int HIP_SYNC_HOST_ALLOC = 1;
|
||||
|
||||
|
||||
int HIP_INIT_ALLOC=-1;
|
||||
int HIP_SYNC_STREAM_WAIT = 0;
|
||||
int HIP_FORCE_NULL_STREAM=0;
|
||||
|
||||
|
||||
|
||||
#if (__hcc_workweek__ >= 17300)
|
||||
// Make sure we have required bug fix in HCC
|
||||
// Perform resolution on the GPU:
|
||||
// Chicken bit to sync on host to implement null stream.
|
||||
// If 0, null stream synchronization is performed on the GPU
|
||||
int HIP_SYNC_NULL_STREAM = 0;
|
||||
#else
|
||||
int HIP_SYNC_NULL_STREAM = 1;
|
||||
#endif
|
||||
|
||||
// HIP needs to change some behavior based on HCC_OPT_FLUSH :
|
||||
// TODO - set this to 1
|
||||
#if (__hcc_workweek__ >= 17296)
|
||||
int HCC_OPT_FLUSH = 1;
|
||||
#else
|
||||
#warning "HIP disabled HCC_OPT_FLUSH since HCC version does not yet support"
|
||||
int HCC_OPT_FLUSH = 0;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -258,8 +273,6 @@ ihipStream_t::ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int
|
||||
case hipDeviceScheduleBlockingSync : _scheduleMode = Yield; break;
|
||||
default:_scheduleMode = Auto;
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -319,14 +332,34 @@ void ihipStream_t::locked_wait()
|
||||
|
||||
// Causes current stream to wait for specified event to complete:
|
||||
// Note this does not provide any kind of host serialization.
|
||||
void ihipStream_t::locked_waitEvent(hipEvent_t event)
|
||||
void ihipStream_t::locked_streamWaitEvent(hipEvent_t event)
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
|
||||
crit->_av.create_blocking_marker(event->_marker, hc::accelerator_scope);
|
||||
crit->_av.create_blocking_marker(event->marker(), hc::accelerator_scope);
|
||||
}
|
||||
|
||||
|
||||
// Causes current stream to wait for specified event to complete:
|
||||
// Note this does not provide any kind of host serialization.
|
||||
bool ihipStream_t::locked_eventIsReady(hipEvent_t event)
|
||||
{
|
||||
// Event query that returns "Complete" may cause HCC to manipulate
|
||||
// internal queue state so lock the stream's queue here.
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
return (event->marker().is_ready());
|
||||
}
|
||||
|
||||
void ihipStream_t::locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode)
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
event->marker().wait(waitMode);
|
||||
}
|
||||
|
||||
|
||||
// Create a marker in this stream.
|
||||
// Save state in the event so it can track the status of the event.
|
||||
void ihipStream_t::locked_recordEvent(hipEvent_t event)
|
||||
@@ -345,7 +378,7 @@ void ihipStream_t::locked_recordEvent(hipEvent_t event)
|
||||
scopeFlag = HIP_EVENT_SYS_RELEASE ? hc::system_scope : hc::accelerator_scope;
|
||||
}
|
||||
|
||||
event->_marker = crit->_av.create_marker(scopeFlag);
|
||||
event->marker(crit->_av.create_marker(scopeFlag));
|
||||
};
|
||||
|
||||
//=============================================================================
|
||||
@@ -737,21 +770,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
char archName[256];
|
||||
err = hsa_agent_get_info(_hsaAgent, HSA_AGENT_INFO_NAME, &archName);
|
||||
|
||||
if(strcmp(archName,"gfx701")==0){
|
||||
prop->gcnArch = 701;
|
||||
}
|
||||
if(strcmp(archName,"gfx801")==0){
|
||||
prop->gcnArch = 801;
|
||||
}
|
||||
if(strcmp(archName,"gfx802")==0){
|
||||
prop->gcnArch = 802;
|
||||
}
|
||||
if(strcmp(archName,"gfx803")==0){
|
||||
prop->gcnArch = 803;
|
||||
}
|
||||
if(strcmp(archName,"gfx900")==0){
|
||||
prop->gcnArch = 900;
|
||||
}
|
||||
prop->gcnArch = atoi(archName+3);
|
||||
|
||||
DeviceErrorCheck(err);
|
||||
|
||||
@@ -799,13 +818,13 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
DeviceErrorCheck(err);
|
||||
|
||||
// BDFID is 16bit uint: [8bit - BusID | 5bit - Device ID | 3bit - Function/DomainID]
|
||||
// prop->pciDomainID = bdf_id & 0x7;
|
||||
prop->pciDomainID = bdf_id & 0x7;
|
||||
prop->pciDeviceID = (bdf_id>>3) & 0x1F;
|
||||
prop->pciBusID = (bdf_id>>8) & 0xFF;
|
||||
|
||||
// Masquerade as a 3.0-level device. This will change as more HW functions are properly supported.
|
||||
// Application code should use the arch.has* to do detailed feature detection.
|
||||
prop->major = 2;
|
||||
prop->major = 3;
|
||||
prop->minor = 0;
|
||||
|
||||
// Get number of Compute Unit
|
||||
@@ -1233,10 +1252,14 @@ void HipReadEnv()
|
||||
READ_ENV_I(release, HIP_FAIL_SOC, 0, "Fault on Sub-Optimal-Copy, rather than use a slower but functional implementation. Bit 0x1=Fail on async copy with unpinned memory. Bit 0x2=Fail peer copy rather than use staging buffer copy");
|
||||
|
||||
READ_ENV_I(release, HIP_SYNC_HOST_ALLOC, 0, "Sync before and after all host memory allocations. May help stability");
|
||||
READ_ENV_I(release, HIP_INIT_ALLOC, 0, "If not -1, initialize allocated memory to specified byte");
|
||||
READ_ENV_I(release, HIP_SYNC_NULL_STREAM, 0, "Synchronize on host for null stream submissions");
|
||||
READ_ENV_I(release, HIP_FORCE_NULL_STREAM, 0, "Force all stream allocations to secretly return the null stream");
|
||||
|
||||
READ_ENV_I(release, HIP_SYNC_STREAM_WAIT, 0, "hipStreamWaitEvent will synchronize to host");
|
||||
|
||||
|
||||
READ_ENV_I(release, HIP_COHERENT_HOST_ALLOC, 0, "If set, all host memory will be allocated as fine-grained system memory. This allows threadfence_system to work but prevents host memory from being cached on GPU which may have performance impact.");
|
||||
READ_ENV_I(release, HIP_HOST_COHERENT, 0, "If set, all host memory will be allocated as fine-grained system memory. This allows threadfence_system to work but prevents host memory from being cached on GPU which may have performance impact.");
|
||||
|
||||
|
||||
READ_ENV_I(release, HCC_OPT_FLUSH, 0, "When set, use agent-scope fence operations rather than system-scope fence operationsflush when possible. This flag controls both HIP and HCC behavior.");
|
||||
@@ -1434,9 +1457,7 @@ void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, c
|
||||
{
|
||||
|
||||
if ((HIP_TRACE_API & (1<<TRACE_KCMD)) || HIP_PROFILE_API || (COMPILE_HIP_DB & HIP_TRACE_API)) {
|
||||
std::stringstream os_pre;
|
||||
std::stringstream os;
|
||||
os_pre << "<<hip-api tid:";
|
||||
os << tls_tidInfo.tid() << "." << tls_tidInfo.apiSeqNum()
|
||||
<< " hipLaunchKernel '" << kernelName << "'"
|
||||
<< " gridDim:" << lp->grid_dim
|
||||
@@ -1444,6 +1465,11 @@ void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, c
|
||||
<< " sharedMem:+" << lp->dynamic_group_mem_bytes
|
||||
<< " " << *stream;
|
||||
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {
|
||||
std::string fullStr;
|
||||
recordApiTrace(&fullStr, os.str());
|
||||
}
|
||||
|
||||
if (HIP_PROFILE_API == 0x1) {
|
||||
std::string shortAtpString("hipLaunchKernel:");
|
||||
shortAtpString += kernelName;
|
||||
@@ -1451,10 +1477,6 @@ void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, c
|
||||
} else if (HIP_PROFILE_API == 0x2) {
|
||||
MARKER_BEGIN(os.str().c_str(), "HIP");
|
||||
}
|
||||
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {
|
||||
std::cerr << API_COLOR << os.str() << API_COLOR_END << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1872,8 +1894,13 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
}
|
||||
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);
|
||||
|
||||
@@ -1908,7 +1935,11 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
}
|
||||
|
||||
void ihipStream_t::addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes) {
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo ptrInfo(NULL, ptr, ptr, sizeBytes, acc, true, false);
|
||||
#else
|
||||
hc::AmPointerInfo ptrInfo(NULL, ptr, sizeBytes, acc, true, false);
|
||||
#endif
|
||||
hc::am_memtracker_add(ptr, ptrInfo);
|
||||
}
|
||||
|
||||
@@ -1932,7 +1963,11 @@ void ihipStream_t::lockedSymbolCopyAsync(hc::accelerator &acc, void* dst, void*
|
||||
{
|
||||
// TODO - review - this looks broken , should not be adding pointers to tracker dynamically:
|
||||
if(kind == hipMemcpyHostToDevice) {
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
|
||||
if(srcTracked) {
|
||||
addSymbolPtrToTracker(acc, dst, sizeBytes);
|
||||
@@ -1944,7 +1979,11 @@ void ihipStream_t::lockedSymbolCopyAsync(hc::accelerator &acc, void* dst, void*
|
||||
}
|
||||
}
|
||||
if(kind == hipMemcpyDeviceToHost) {
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS);
|
||||
if(dstTracked) {
|
||||
addSymbolPtrToTracker(acc, src, sizeBytes);
|
||||
@@ -1983,8 +2022,13 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
} else {
|
||||
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);
|
||||
|
||||
|
||||
@@ -59,14 +59,18 @@ extern int HIP_STREAM_SIGNALS; /* number of signals to allocate at stream creat
|
||||
extern int HIP_VISIBLE_DEVICES; /* Contains a comma-separated sequence of GPU identifiers */
|
||||
extern int HIP_FORCE_P2P_HOST;
|
||||
|
||||
extern int HIP_COHERENT_HOST_ALLOC;
|
||||
extern int HIP_HOST_COHERENT;
|
||||
|
||||
extern int HIP_HIDDEN_FREE_MEM;
|
||||
//---
|
||||
// Chicken bits for disabling functionality to work around potential issues:
|
||||
extern int HIP_SYNC_HOST_ALLOC;
|
||||
extern int HIP_SYNC_STREAM_WAIT;
|
||||
|
||||
extern int HIP_SYNC_NULL_STREAM;
|
||||
extern int HIP_INIT_ALLOC;
|
||||
extern int HIP_FORCE_NULL_STREAM;
|
||||
|
||||
|
||||
// TODO - remove when this is standard behavior.
|
||||
extern int HCC_OPT_FLUSH;
|
||||
@@ -114,6 +118,7 @@ private:
|
||||
//Extern tls
|
||||
extern thread_local hipError_t tls_lastHipError;
|
||||
extern thread_local TidInfo tls_tidInfo;
|
||||
extern thread_local bool tls_getPrimaryCtx;
|
||||
|
||||
extern std::vector<ProfTrigger> g_dbStartTriggers;
|
||||
extern std::vector<ProfTrigger> g_dbStopTriggers;
|
||||
@@ -190,10 +195,12 @@ extern const char *API_COLOR_END;
|
||||
|
||||
//---
|
||||
//HIP Trace modes - use with HIP_TRACE_API=...
|
||||
#define TRACE_ALL 0 // 0x1
|
||||
#define TRACE_KCMD 1 // 0x2, kernel command
|
||||
#define TRACE_MCMD 2 // 0x4, memory command
|
||||
#define TRACE_MEM 3 // 0x8, memory allocation or deallocation.
|
||||
#define TRACE_ALL 0 // 0x01
|
||||
#define TRACE_KCMD 1 // 0x02, kernel command
|
||||
#define TRACE_MCMD 2 // 0x04, memory command
|
||||
#define TRACE_MEM 3 // 0x08, memory allocation or deallocation.
|
||||
#define TRACE_SYNC 4 // 0x10, synchronization (host or hipStreamWaitEvent)
|
||||
#define TRACE_QUERY 5 // 0x20, hipEventRecord, hipEventQuery, hipStreamQuery
|
||||
|
||||
|
||||
//---
|
||||
@@ -514,9 +521,12 @@ public:
|
||||
|
||||
hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); };
|
||||
|
||||
void locked_waitEvent(hipEvent_t event);
|
||||
void locked_streamWaitEvent(hipEvent_t event);
|
||||
void locked_recordEvent(hipEvent_t event);
|
||||
|
||||
bool locked_eventIsReady(hipEvent_t event);
|
||||
void locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode);
|
||||
|
||||
ihipStreamCritical_t &criticalData() { return _criticalData; };
|
||||
|
||||
//---
|
||||
@@ -605,18 +615,24 @@ public:
|
||||
ihipEvent_t(unsigned flags);
|
||||
void attachToCompletionFuture(const hc::completion_future *cf, hipStream_t stream, ihipEventType_t eventType);
|
||||
void refereshEventStatus();
|
||||
hc::completion_future & marker() { return _marker; }
|
||||
void marker(hc::completion_future cf) { _marker = cf; };
|
||||
|
||||
bool locked_isReady();
|
||||
void locked_waitComplete(hc::hcWaitMode waitMode);
|
||||
|
||||
uint64_t timestamp() const { return _timestamp; } ;
|
||||
ihipEventType_t type() const { return _type; };
|
||||
|
||||
public:
|
||||
hipEventStatus_t _state;
|
||||
|
||||
hipStream_t _stream; // Stream where the event is recorded, or NULL if all streams.
|
||||
hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual stream when recorded
|
||||
unsigned _flags;
|
||||
|
||||
hc::completion_future _marker;
|
||||
|
||||
private:
|
||||
hc::completion_future _marker;
|
||||
ihipEventType_t _type;
|
||||
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
|
||||
friend hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream);
|
||||
|
||||
+370
-58
@@ -27,10 +27,6 @@ THE SOFTWARE.
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
#include "hip/hcc_detail/hip_texture.h"
|
||||
#include <hc_am.hpp>
|
||||
|
||||
|
||||
|
||||
// Internal HIP APIS:
|
||||
namespace hip_internal {
|
||||
@@ -105,17 +101,29 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags)
|
||||
|
||||
// Allocate a new pointer with am_alloc and share with all valid peers.
|
||||
// Returns null-ptr if a memory error occurs (either allocation or sharing)
|
||||
void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, bool shareWithAll, unsigned amFlags, unsigned hipFlags)
|
||||
void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, bool shareWithAll, unsigned amFlags, unsigned hipFlags, size_t alignment)
|
||||
{
|
||||
|
||||
void *ptr = nullptr;
|
||||
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
ptr = hc::am_alloc(sizeBytes, device->_acc, amFlags);
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
if (alignment != 0) {
|
||||
ptr = hc::am_aligned_alloc(sizeBytes, device->_acc, amFlags, alignment);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ptr = hc::am_alloc(sizeBytes, device->_acc, amFlags);
|
||||
}
|
||||
tprintf(DB_MEM, " alloc %s ptr:%p-%p size:%zu on dev:%d\n",
|
||||
msg, ptr, static_cast<char*>(ptr)+sizeBytes, sizeBytes, device->_deviceId);
|
||||
|
||||
if (HIP_INIT_ALLOC != -1) {
|
||||
// TODO , dont' call HIP API directly here:
|
||||
hipMemset(ptr, HIP_INIT_ALLOC, sizeBytes);
|
||||
}
|
||||
|
||||
if (ptr != nullptr) {
|
||||
int r = sharePtr(ptr, ctx, shareWithAll, hipFlags);
|
||||
if (r != 0) {
|
||||
@@ -147,41 +155,45 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void
|
||||
HIP_INIT_API(attributes, ptr);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
hc::accelerator acc;
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
||||
if (status == AM_SUCCESS) {
|
||||
|
||||
attributes->memoryType = amPointerInfo._isInDeviceMem ? hipMemoryTypeDevice: hipMemoryTypeHost;
|
||||
attributes->hostPointer = amPointerInfo._hostPointer;
|
||||
attributes->devicePointer = amPointerInfo._devicePointer;
|
||||
attributes->isManaged = 0;
|
||||
if(attributes->memoryType == hipMemoryTypeHost){
|
||||
attributes->hostPointer = (void*)ptr;
|
||||
}
|
||||
if(attributes->memoryType == hipMemoryTypeDevice){
|
||||
attributes->devicePointer = (void*)ptr;
|
||||
}
|
||||
attributes->allocationFlags = amPointerInfo._appAllocationFlags;
|
||||
attributes->device = amPointerInfo._appId;
|
||||
|
||||
if (attributes->device < 0) {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
|
||||
if((attributes == nullptr) || (ptr == nullptr)) {
|
||||
e = hipErrorInvalidValue;
|
||||
} else {
|
||||
attributes->memoryType = hipMemoryTypeDevice;
|
||||
attributes->hostPointer = 0;
|
||||
attributes->devicePointer = 0;
|
||||
attributes->device = -1;
|
||||
attributes->isManaged = 0;
|
||||
attributes->allocationFlags = 0;
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
||||
if (status == AM_SUCCESS) {
|
||||
|
||||
e = hipErrorUnknown; // TODO - should be hipErrorInvalidValue ?
|
||||
attributes->memoryType = amPointerInfo._isInDeviceMem ? hipMemoryTypeDevice: hipMemoryTypeHost;
|
||||
attributes->hostPointer = amPointerInfo._hostPointer;
|
||||
attributes->devicePointer = amPointerInfo._devicePointer;
|
||||
attributes->isManaged = 0;
|
||||
if(attributes->memoryType == hipMemoryTypeHost){
|
||||
attributes->hostPointer = (void*)ptr;
|
||||
}
|
||||
if(attributes->memoryType == hipMemoryTypeDevice){
|
||||
attributes->devicePointer = (void*)ptr;
|
||||
}
|
||||
attributes->allocationFlags = amPointerInfo._appAllocationFlags;
|
||||
attributes->device = amPointerInfo._appId;
|
||||
|
||||
if (attributes->device < 0) {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
} else {
|
||||
attributes->memoryType = hipMemoryTypeDevice;
|
||||
attributes->hostPointer = 0;
|
||||
attributes->devicePointer = 0;
|
||||
attributes->device = -1;
|
||||
attributes->isManaged = 0;
|
||||
attributes->allocationFlags = 0;
|
||||
|
||||
e = hipErrorUnknown; // TODO - should be hipErrorInvalidValue ?
|
||||
}
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
@@ -192,14 +204,17 @@ hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsi
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
*devicePointer = NULL;
|
||||
|
||||
// Flags must be 0:
|
||||
if (flags != 0) {
|
||||
if ((flags != 0) || (devicePointer == nullptr) || (hostPointer == nullptr)){
|
||||
e = hipErrorInvalidValue;
|
||||
} else {
|
||||
hc::accelerator acc;
|
||||
*devicePointer = NULL;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPointer);
|
||||
if (status == AM_SUCCESS) {
|
||||
*devicePointer = static_cast<char*>(amPointerInfo._devicePointer) + (static_cast<char*>(hostPointer) - static_cast<char*>(amPointerInfo._hostPointer)) ;
|
||||
@@ -230,7 +245,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
|
||||
|
||||
} else {
|
||||
auto device = ctx->getWriteableDevice();
|
||||
*ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false/*shareWithAll*/, 0/*amFlags*/, 0/*hipFlags*/);
|
||||
*ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false/*shareWithAll*/, 0/*amFlags*/, 0/*hipFlags*/, 0);
|
||||
|
||||
if(sizeBytes && (*ptr == NULL)){
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
@@ -293,12 +308,12 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
amFlags = amHostNonCoherent;
|
||||
} else {
|
||||
// depends on env variables:
|
||||
amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostNonCoherent;
|
||||
amFlags = HIP_HOST_COHERENT ? amHostCoherent : amHostNonCoherent;
|
||||
}
|
||||
|
||||
|
||||
*ptr = hip_internal::allocAndSharePtr((amFlags & amHostCoherent) ? "finegrained_host":"pinned_host",
|
||||
sizeBytes, ctx, (trueFlags & hipHostMallocPortable) /*shareWithAll*/, amFlags, flags);
|
||||
sizeBytes, ctx, (trueFlags & hipHostMallocPortable) /*shareWithAll*/, amFlags, flags, 0);
|
||||
|
||||
if(sizeBytes && (*ptr == NULL)){
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
@@ -344,10 +359,29 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
|
||||
|
||||
//err = hipMalloc(ptr, (*pitch)*height);
|
||||
if (ctx) {
|
||||
auto device = ctx->getWriteableDevice();
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
size_t allocGranularity = 0;
|
||||
hsa_amd_memory_pool_t *allocRegion = static_cast<hsa_amd_memory_pool_t*>(acc.get_hsa_am_region());
|
||||
hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity);
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
imageDescriptor.width = *pitch;
|
||||
imageDescriptor.height = height;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = 0;
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
imageDescriptor.format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
|
||||
imageDescriptor.format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
|
||||
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
hsa_ext_image_data_info_t imageInfo;
|
||||
hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
|
||||
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
|
||||
|
||||
const unsigned am_flags = 0;
|
||||
*ptr = hip_internal::allocAndSharePtr("device_pitch", sizeBytes, ctx, false/*shareWithAll*/, am_flags, 0);
|
||||
*ptr = hip_internal::allocAndSharePtr("device_pitch", sizeBytes, ctx, false/*shareWithAll*/, am_flags, 0, alignment);
|
||||
|
||||
if (sizeBytes && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
@@ -367,26 +401,128 @@ hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannel
|
||||
return cd;
|
||||
}
|
||||
|
||||
extern void getChannelOrderAndType(const hipChannelFormatDesc& desc,
|
||||
enum hipTextureReadMode readMode,
|
||||
hsa_ext_image_channel_order_t& channelOrder,
|
||||
hsa_ext_image_channel_type_t& channelType);
|
||||
|
||||
hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
size_t width, size_t height, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_SPECIAL_API((TRACE_MEM), array, desc, width, height, flags);
|
||||
HIP_SET_DEVICE();
|
||||
hipError_t hip_status = hipSuccess;
|
||||
if(width > 0) {
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
*array = (hipArray*)malloc(sizeof(hipArray));
|
||||
array[0]->type = flags;
|
||||
array[0]->width = width;
|
||||
array[0]->height = height;
|
||||
array[0]->depth = 1;
|
||||
array[0]->desc = *desc;
|
||||
|
||||
void ** ptr = &array[0]->data;
|
||||
|
||||
if (ctx) {
|
||||
const unsigned am_flags = 0;
|
||||
size_t size = width;
|
||||
if(height > 0) {
|
||||
size = size * height;
|
||||
}
|
||||
|
||||
size_t allocSize = 0;
|
||||
switch(desc->f) {
|
||||
case hipChannelFormatKindSigned:
|
||||
allocSize = size * sizeof(int);
|
||||
break;
|
||||
case hipChannelFormatKindUnsigned:
|
||||
allocSize = size * sizeof(unsigned int);
|
||||
break;
|
||||
case hipChannelFormatKindFloat:
|
||||
allocSize = size * sizeof(float);
|
||||
break;
|
||||
case hipChannelFormatKindNone:
|
||||
allocSize = size * sizeof(size_t);
|
||||
break;
|
||||
default:
|
||||
hip_status = hipErrorUnknown;
|
||||
break;
|
||||
}
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
size_t allocGranularity = 0;
|
||||
hsa_amd_memory_pool_t *allocRegion = static_cast<hsa_amd_memory_pool_t*>(acc.get_hsa_am_region());
|
||||
hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity);
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
|
||||
imageDescriptor.width = width;
|
||||
imageDescriptor.height = height;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = 0;
|
||||
switch (flags) {
|
||||
case hipArrayLayered:
|
||||
case hipArrayCubemap:
|
||||
case hipArraySurfaceLoadStore:
|
||||
case hipArrayTextureGather:
|
||||
assert(0);
|
||||
break;
|
||||
case hipArrayDefault:
|
||||
default:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
break;
|
||||
}
|
||||
hsa_ext_image_channel_order_t channelOrder;
|
||||
hsa_ext_image_channel_type_t channelType;
|
||||
getChannelOrderAndType(*desc, hipReadModeElementType, channelOrder, channelType);
|
||||
imageDescriptor.format.channel_order = channelOrder;
|
||||
imageDescriptor.format.channel_type = channelType;
|
||||
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
hsa_ext_image_data_info_t imageInfo;
|
||||
hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
|
||||
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
|
||||
|
||||
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false/*shareWithAll*/, am_flags, 0, alignment);
|
||||
if (size && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
|
||||
} else {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
} else {
|
||||
hip_status = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipMalloc3DArray(hipArray_t *array,
|
||||
const struct hipChannelFormatDesc* desc,
|
||||
struct hipExtent extent,
|
||||
unsigned int flags)
|
||||
{
|
||||
HIP_INIT();
|
||||
HIP_SET_DEVICE();
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
*array = (hipArray*)malloc(sizeof(hipArray));
|
||||
array[0]->width = width;
|
||||
array[0]->height = height;
|
||||
|
||||
array[0]->f = desc->f;
|
||||
array[0]->type = flags;
|
||||
array[0]->width = extent.width;
|
||||
array[0]->height = extent.height;
|
||||
array[0]->depth = extent.depth;
|
||||
array[0]->desc = *desc;
|
||||
|
||||
void ** ptr = &array[0]->data;
|
||||
|
||||
if (ctx) {
|
||||
const unsigned am_flags = 0;
|
||||
const size_t size = width*height;
|
||||
const size_t size = extent.width*extent.height*extent.depth;
|
||||
|
||||
size_t allocSize = 0;
|
||||
switch(desc->f) {
|
||||
@@ -406,7 +542,48 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
hip_status = hipErrorUnknown;
|
||||
break;
|
||||
}
|
||||
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false/*shareWithAll*/, am_flags, 0);
|
||||
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
size_t allocGranularity = 0;
|
||||
hsa_amd_memory_pool_t *allocRegion = static_cast<hsa_amd_memory_pool_t*>(acc.get_hsa_am_region());
|
||||
hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity);
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
imageDescriptor.width = extent.width;
|
||||
imageDescriptor.height = extent.height;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = 0;
|
||||
switch (flags) {
|
||||
case hipArrayLayered:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
|
||||
imageDescriptor.array_size = extent.depth;
|
||||
break;
|
||||
case hipArraySurfaceLoadStore:
|
||||
case hipArrayTextureGather:
|
||||
case hipArrayDefault:
|
||||
assert(0);
|
||||
break;
|
||||
case hipArrayCubemap:
|
||||
default:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
|
||||
imageDescriptor.depth = extent.depth;
|
||||
break;
|
||||
}
|
||||
hsa_ext_image_channel_order_t channelOrder;
|
||||
hsa_ext_image_channel_type_t channelType;
|
||||
getChannelOrderAndType(*desc, hipReadModeElementType, channelOrder, channelType);
|
||||
imageDescriptor.format.channel_order = channelOrder;
|
||||
imageDescriptor.format.channel_type = channelType;
|
||||
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
hsa_ext_image_data_info_t imageInfo;
|
||||
hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
|
||||
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
|
||||
|
||||
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0, alignment);
|
||||
|
||||
if (size && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
@@ -415,7 +592,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr)
|
||||
@@ -425,7 +602,11 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr)
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
|
||||
if(status == AM_SUCCESS){
|
||||
*flagsPtr = amPointerInfo._appAllocationFlags;
|
||||
@@ -456,7 +637,11 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
|
||||
}
|
||||
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t am_status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
|
||||
|
||||
if(am_status == AM_SUCCESS){
|
||||
@@ -826,7 +1011,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
|
||||
hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) {
|
||||
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream);
|
||||
if(width > dpitch || width > spitch)
|
||||
@@ -857,7 +1042,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con
|
||||
|
||||
size_t byteSize;
|
||||
if(dst) {
|
||||
switch(dst[0].f) {
|
||||
switch(dst[0].desc.f) {
|
||||
case hipChannelFormatKindSigned:
|
||||
byteSize = sizeof(int);
|
||||
break;
|
||||
@@ -918,6 +1103,56 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
|
||||
{
|
||||
HIP_INIT_SPECIAL_API((TRACE_MCMD), p);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
hc::completion_future marker;
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
size_t byteSize;
|
||||
if(p) {
|
||||
switch(p->dstArray->desc.f) {
|
||||
case hipChannelFormatKindSigned:
|
||||
byteSize = sizeof(int);
|
||||
break;
|
||||
case hipChannelFormatKindUnsigned:
|
||||
byteSize = sizeof(unsigned int);
|
||||
break;
|
||||
case hipChannelFormatKindFloat:
|
||||
byteSize = sizeof(float);
|
||||
break;
|
||||
case hipChannelFormatKindNone:
|
||||
byteSize = sizeof(size_t);
|
||||
break;
|
||||
default:
|
||||
byteSize = 0;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorUnknown);
|
||||
}
|
||||
|
||||
try {
|
||||
for (int i = 0; i < p->extent.depth; i++) {
|
||||
for(int j = 0; j < p->extent.height; j++) {
|
||||
// TODO: p->srcPos or p->dstPos are not 0.
|
||||
unsigned char* src = (unsigned char*)p->srcPtr.ptr + i*p->srcPtr.ysize*p->srcPtr.pitch + j*p->srcPtr.pitch;
|
||||
unsigned char* dst = (unsigned char*)p->dstArray->data + i*p->dstArray->height*p->dstArray->width*byteSize + j*p->dstArray->width*byteSize;
|
||||
stream->locked_copySync(dst, src, p->extent.width*byteSize, p->kind);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
// TODO - make member function of stream?
|
||||
template <typename T>
|
||||
void
|
||||
@@ -956,6 +1191,7 @@ ihipMemsetKernel(hipStream_t stream,
|
||||
|
||||
}
|
||||
|
||||
|
||||
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
|
||||
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream )
|
||||
{
|
||||
@@ -1006,7 +1242,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s
|
||||
return ihipLogStatus(e);
|
||||
};
|
||||
|
||||
hipError_t hipMemset(void* dst, int value, size_t sizeBytes )
|
||||
hipError_t hipMemset(void* dst, int value, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, value, sizeBytes);
|
||||
|
||||
@@ -1058,6 +1294,58 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes )
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height)
|
||||
{
|
||||
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, pitch, value, width, height);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
hipStream_t stream = hipStreamNull;
|
||||
// TODO - call an ihip memset so HIP_TRACE is correct.
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
if (stream) {
|
||||
auto crit = stream->lockopen_preKernelCommand();
|
||||
|
||||
hc::completion_future cf ;
|
||||
|
||||
size_t sizeBytes = pitch * height;
|
||||
if ((sizeBytes & 0x3) == 0) {
|
||||
// use a faster dword-per-workitem copy:
|
||||
try {
|
||||
value = value & 0xff;
|
||||
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value) ;
|
||||
ihipMemsetKernel<uint32_t> (stream, crit, static_cast<uint32_t*> (dst), value32, sizeBytes/sizeof(uint32_t), &cf);
|
||||
}
|
||||
catch (std::exception &ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
} else {
|
||||
// use a slow byte-per-workitem copy:
|
||||
try {
|
||||
ihipMemsetKernel<char> (stream, crit, static_cast<char*> (dst), value, sizeBytes, &cf);
|
||||
}
|
||||
catch (std::exception &ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
// TODO - is hipMemset supposed to be async?
|
||||
cf.wait();
|
||||
|
||||
stream->lockclose_postKernelCommand("hipMemset", &crit->_av);
|
||||
|
||||
if (HIP_LAUNCH_BLOCKING) {
|
||||
tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__, ToString(stream).c_str());
|
||||
cf.wait();
|
||||
tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__, ToString(stream).c_str());
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes )
|
||||
{
|
||||
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, value, sizeBytes);
|
||||
@@ -1108,7 +1396,7 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeByte
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemGetInfo (size_t *free, size_t *total)
|
||||
hipError_t hipMemGetInfo(size_t *free, size_t *total)
|
||||
{
|
||||
HIP_INIT_API(free, total);
|
||||
|
||||
@@ -1154,7 +1442,11 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size)
|
||||
|
||||
if(ptr != nullptr && size != nullptr){
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
||||
if(status == AM_SUCCESS){
|
||||
*size = amPointerInfo._sizeBytes;
|
||||
@@ -1179,7 +1471,11 @@ hipError_t hipFree(void* ptr)
|
||||
|
||||
if (ptr) {
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
||||
if(status == AM_SUCCESS){
|
||||
if(amPointerInfo._hostPointer == NULL){
|
||||
@@ -1207,7 +1503,11 @@ hipError_t hipHostFree(void* ptr)
|
||||
hipError_t hipStatus = hipErrorInvalidValue;
|
||||
if (ptr) {
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
||||
if(status == AM_SUCCESS){
|
||||
if(amPointerInfo._hostPointer == ptr){
|
||||
@@ -1241,7 +1541,11 @@ hipError_t hipFreeArray(hipArray* array)
|
||||
|
||||
if(array->data) {
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, array->data);
|
||||
if(status == AM_SUCCESS){
|
||||
if(amPointerInfo._hostPointer == NULL){
|
||||
@@ -1259,7 +1563,11 @@ hipError_t hipMemGetAddressRange ( hipDeviceptr_t* pbase, size_t* psize, hipDevi
|
||||
HIP_INIT_API ( pbase , psize , dptr );
|
||||
hipError_t hipStatus = hipSuccess;
|
||||
hc::accelerator acc;
|
||||
hc::AmPointerInfo amPointerInfo( NULL , NULL , 0 , acc , 0 , 0 );
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo( NULL , NULL , NULL, 0 , acc , 0 , 0 );
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo( NULL , NULL, 0 , acc , 0 , 0 );
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo( &amPointerInfo , dptr );
|
||||
if (status == AM_SUCCESS) {
|
||||
*pbase = amPointerInfo._devicePointer;
|
||||
@@ -1282,7 +1590,11 @@ hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr){
|
||||
if((handle == NULL) || (devPtr == NULL)) {
|
||||
hipStatus = hipErrorInvalidResourceHandle;
|
||||
} else {
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo( NULL , NULL , NULL, 0 , acc , 0 , 0 );
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo( NULL , NULL , 0 , acc , 0 , 0 );
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo( &amPointerInfo , devPtr );
|
||||
if (status == AM_SUCCESS) {
|
||||
psize = (size_t)amPointerInfo._sizeBytes;
|
||||
|
||||
@@ -452,8 +452,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
|
||||
lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize,
|
||||
(startEvent || stopEvent) ? &cf : nullptr
|
||||
#define USE_NAMED_KERNEL 0
|
||||
#if USE_NAMED_KERNEL
|
||||
#if (__hcc_workweek__ > 17312)
|
||||
, f->_name.c_str()
|
||||
#endif
|
||||
);
|
||||
|
||||
+32
-28
@@ -38,21 +38,26 @@ hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags)
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getWriteableDevice()->_acc;
|
||||
|
||||
// TODO - se try-catch loop to detect memory exception?
|
||||
//
|
||||
//Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete:
|
||||
//This matches CUDA stream behavior:
|
||||
if (HIP_FORCE_NULL_STREAM) {
|
||||
*stream = 0;
|
||||
} else {
|
||||
hc::accelerator acc = ctx->getWriteableDevice()->_acc;
|
||||
|
||||
{
|
||||
// Obtain mutex access to the device critical data, release by destructor
|
||||
LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData());
|
||||
// TODO - se try-catch loop to detect memory exception?
|
||||
//
|
||||
//Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete:
|
||||
//This matches CUDA stream behavior:
|
||||
|
||||
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
|
||||
{
|
||||
// Obtain mutex access to the device critical data, release by destructor
|
||||
LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData());
|
||||
|
||||
ctxCrit->addStream(istream);
|
||||
*stream = istream;
|
||||
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
|
||||
|
||||
ctxCrit->addStream(istream);
|
||||
*stream = istream;
|
||||
}
|
||||
}
|
||||
|
||||
tprintf(DB_SYNC, "hipStreamCreate, %s\n", ToString(*stream).c_str());
|
||||
@@ -84,7 +89,7 @@ hipError_t hipStreamCreate(hipStream_t *stream)
|
||||
|
||||
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(stream, event, flags);
|
||||
HIP_INIT_SPECIAL_API(TRACE_SYNC, stream, event, flags);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
@@ -93,18 +98,15 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
|
||||
|
||||
} else if (event->_state != hipEventStatusUnitialized) {
|
||||
|
||||
if (stream != hipStreamNull) {
|
||||
|
||||
// This will user create_blocking_marker to wait on the specified queue.
|
||||
stream->locked_waitEvent(event);
|
||||
|
||||
if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) {
|
||||
// conservative wait on host for the specified event to complete:
|
||||
event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
|
||||
} else {
|
||||
// TODO-hcc Convert to use create_blocking_marker(...) functionality.
|
||||
// Currently we have a super-conservative version of this - block on host, and drain the queue.
|
||||
// This should create a barrier packet in the target queue.
|
||||
// TODO-HIP_SYNC_NULL_STREAM
|
||||
stream->locked_wait();
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
// This will user create_blocking_marker to wait on the specified queue.
|
||||
stream->locked_streamWaitEvent(event);
|
||||
}
|
||||
|
||||
} // else event not recorded, return immediately and don't create marker.
|
||||
|
||||
return ihipLogStatus(e);
|
||||
@@ -114,7 +116,7 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
|
||||
//---
|
||||
hipError_t hipStreamQuery(hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(stream);
|
||||
HIP_INIT_SPECIAL_API(TRACE_QUERY, stream);
|
||||
|
||||
// Use default stream if 0 specified:
|
||||
if (stream == hipStreamNull) {
|
||||
@@ -122,15 +124,14 @@ hipError_t hipStreamQuery(hipStream_t stream)
|
||||
stream = device->_defaultStream;
|
||||
}
|
||||
|
||||
int pendingOps = 0;
|
||||
bool isEmpty = 0;
|
||||
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit(stream->_criticalData);
|
||||
pendingOps = crit->_av.get_pending_async_ops();
|
||||
isEmpty = crit->_av.get_is_empty();
|
||||
}
|
||||
|
||||
|
||||
hipError_t e = (pendingOps > 0) ? hipErrorNotReady : hipSuccess;
|
||||
hipError_t e = isEmpty ? hipSuccess : hipErrorNotReady ;
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
@@ -140,6 +141,7 @@ hipError_t hipStreamQuery(hipStream_t stream)
|
||||
hipError_t hipStreamSynchronize(hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(stream);
|
||||
HIP_INIT_SPECIAL_API(TRACE_SYNC, stream);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
@@ -169,7 +171,9 @@ hipError_t hipStreamDestroy(hipStream_t stream)
|
||||
|
||||
//--- Drain the stream:
|
||||
if (stream == NULL) {
|
||||
e = hipErrorInvalidResourceHandle; // TODO - review - what happens if try to destroy null stream
|
||||
if (!HIP_FORCE_NULL_STREAM) {
|
||||
e = hipErrorInvalidResourceHandle;
|
||||
}
|
||||
} else {
|
||||
stream->locked_wait();
|
||||
|
||||
|
||||
@@ -0,0 +1,668 @@
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include "hip_texture.h"
|
||||
|
||||
static std::map<hipTextureObject_t, hipTexture*> textureHash;
|
||||
|
||||
void saveTextureInfo(const hipTexture* pTexture,
|
||||
const hipResourceDesc* pResDesc,
|
||||
const hipTextureDesc* pTexDesc,
|
||||
const hipResourceViewDesc* pResViewDesc)
|
||||
{
|
||||
if (pResDesc != nullptr) {
|
||||
memcpy((void*)&(pTexture->resDesc), (void*)pResDesc, sizeof(hipResourceDesc));
|
||||
}
|
||||
|
||||
if (pTexDesc != nullptr) {
|
||||
memcpy((void*)&(pTexture->texDesc), (void*)pTexDesc, sizeof(hipTextureDesc));
|
||||
}
|
||||
|
||||
if (pResViewDesc != nullptr) {
|
||||
memcpy((void*)&(pTexture->resViewDesc), (void*)pResViewDesc, sizeof(hipResourceViewDesc));
|
||||
}
|
||||
}
|
||||
|
||||
void getChannelOrderAndType(const hipChannelFormatDesc& desc,
|
||||
enum hipTextureReadMode readMode,
|
||||
hsa_ext_image_channel_order_t& channelOrder,
|
||||
hsa_ext_image_channel_type_t& channelType)
|
||||
{
|
||||
if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w != 0) {
|
||||
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
|
||||
} else if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w == 0) {
|
||||
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGB;
|
||||
} else if (desc.x != 0 && desc.y != 0 && desc.z == 0 && desc.w == 0) {
|
||||
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG;
|
||||
} else if (desc.x != 0 && desc.y == 0 && desc.z == 0 && desc.w == 0) {
|
||||
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
|
||||
} else {
|
||||
}
|
||||
|
||||
switch (desc.f) {
|
||||
case hipChannelFormatKindUnsigned:
|
||||
switch(desc.x) {
|
||||
case 32:
|
||||
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
|
||||
break;
|
||||
case 16:
|
||||
channelType = readMode == hipReadModeNormalizedFloat ? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 :
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16;
|
||||
break;
|
||||
case 8:
|
||||
channelType = readMode == hipReadModeNormalizedFloat ? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 :
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
|
||||
break;
|
||||
default:
|
||||
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
|
||||
}
|
||||
break;
|
||||
case hipChannelFormatKindSigned:
|
||||
switch(desc.x) {
|
||||
case 32:
|
||||
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
|
||||
break;
|
||||
case 16:
|
||||
channelType = readMode == hipReadModeNormalizedFloat ? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 :
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16;
|
||||
break;
|
||||
case 8:
|
||||
channelType = readMode == hipReadModeNormalizedFloat ? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 :
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8;
|
||||
break;
|
||||
default:
|
||||
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
|
||||
}
|
||||
break;
|
||||
case hipChannelFormatKindFloat:
|
||||
switch(desc.x) {
|
||||
case 32:
|
||||
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
|
||||
break;
|
||||
case 16:
|
||||
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT;
|
||||
break;
|
||||
case 8:
|
||||
break;
|
||||
default:
|
||||
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
|
||||
}
|
||||
break;
|
||||
case hipChannelFormatKindNone:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void fillSamplerDescriptor(hsa_ext_sampler_descriptor_t& samplerDescriptor,
|
||||
enum hipTextureAddressMode addressMode,
|
||||
enum hipTextureFilterMode filterMode,
|
||||
int normalizedCoords)
|
||||
{
|
||||
if (normalizedCoords) {
|
||||
samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED;
|
||||
} else {
|
||||
samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED;
|
||||
}
|
||||
|
||||
switch (filterMode) {
|
||||
case hipFilterModePoint:
|
||||
samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST;
|
||||
break;
|
||||
case hipFilterModeLinear:
|
||||
samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_LINEAR;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (addressMode) {
|
||||
case hipAddressModeWrap:
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT;
|
||||
break;
|
||||
case hipAddressModeClamp:
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE;
|
||||
break;
|
||||
case hipAddressModeMirror:
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT;
|
||||
break;
|
||||
case hipAddressModeBorder:
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool getHipTextureObject(hipTextureObject_t* pTexObject,
|
||||
hsa_ext_image_t& image,
|
||||
hsa_ext_sampler_t sampler)
|
||||
{
|
||||
unsigned int* texSRD;
|
||||
hipMalloc((void **) &texSRD, HIP_TEXTURE_OBJECT_SIZE_DWORD * 4);
|
||||
hipMemcpy(texSRD, (void *)image.handle, HIP_IMAGE_OBJECT_SIZE_DWORD * 4, hipMemcpyDeviceToDevice);
|
||||
hipMemcpy(texSRD + HIP_SAMPLER_OBJECT_OFFSET_DWORD, (void *)sampler.handle, HIP_SAMPLER_OBJECT_SIZE_DWORD * 4, hipMemcpyDeviceToDevice);
|
||||
*pTexObject = (hipTextureObject_t) texSRD;
|
||||
|
||||
#ifdef DEBUG
|
||||
unsigned int* srd = (unsigned int*) malloc(HIP_TEXTURE_OBJECT_SIZE_DWORD * 4);
|
||||
hipMemcpy(srd, texSRD, HIP_TEXTURE_OBJECT_SIZE_DWORD * 4, hipMemcpyDeviceToHost);
|
||||
printf("New SRD: \n");
|
||||
for (int i = 0; i < HIP_TEXTURE_OBJECT_SIZE_DWORD; i++) {
|
||||
printf("SRD[%d]: %x\n", i, srd[i]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
// Texture Object APIs
|
||||
hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
|
||||
const hipResourceDesc* pResDesc,
|
||||
const hipTextureDesc* pTexDesc,
|
||||
const hipResourceViewDesc* pResViewDesc)
|
||||
{
|
||||
HIP_INIT_API(pTexObject, pResDesc, pTexDesc, pResViewDesc);
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = (hipTexture*) malloc(sizeof(hipTexture));
|
||||
if (pTexture != nullptr) {
|
||||
memset(pTexture, 0, sizeof(hipTexture));
|
||||
saveTextureInfo(pTexture, pResDesc, pTexDesc, pResViewDesc);
|
||||
}
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
hsa_ext_image_channel_order_t channelOrder;
|
||||
hsa_ext_image_channel_type_t channelType;
|
||||
void* devPtr = nullptr;
|
||||
|
||||
switch (pResDesc->resType) {
|
||||
case hipResourceTypeArray:
|
||||
devPtr = pResDesc->res.array.array->data;
|
||||
imageDescriptor.width = pResDesc->res.array.array->width;
|
||||
imageDescriptor.height = pResDesc->res.array.array->height;
|
||||
switch (pResDesc->res.array.array->type) {
|
||||
case hipArrayLayered:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = pResDesc->res.array.array->depth;
|
||||
break;
|
||||
case hipArrayCubemap:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
|
||||
imageDescriptor.depth = pResDesc->res.array.array->depth;
|
||||
imageDescriptor.array_size = 0;
|
||||
break;
|
||||
case hipArraySurfaceLoadStore:
|
||||
case hipArrayTextureGather:
|
||||
case hipArrayDefault:
|
||||
default:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = 0;
|
||||
break;
|
||||
}
|
||||
getChannelOrderAndType(pResDesc->res.array.array->desc, pTexDesc->readMode, channelOrder, channelType);
|
||||
break;
|
||||
case hipResourceTypeMipmappedArray:
|
||||
devPtr = pResDesc->res.mipmap.mipmap->data;
|
||||
imageDescriptor.width = pResDesc->res.mipmap.mipmap->width;
|
||||
imageDescriptor.height = pResDesc->res.mipmap.mipmap->height;
|
||||
imageDescriptor.depth = pResDesc->res.mipmap.mipmap->depth;
|
||||
imageDescriptor.array_size = 0;
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
getChannelOrderAndType(pResDesc->res.mipmap.mipmap->desc, pTexDesc->readMode, channelOrder, channelType);
|
||||
break;
|
||||
case hipResourceTypeLinear:
|
||||
devPtr = pResDesc->res.linear.devPtr;
|
||||
imageDescriptor.width = pResDesc->res.linear.sizeInBytes;
|
||||
imageDescriptor.height = 1;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = 0;
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1D; // ? HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR
|
||||
getChannelOrderAndType(pResDesc->res.linear.desc, pTexDesc->readMode, channelOrder, channelType);
|
||||
break;
|
||||
case hipResourceTypePitch2D:
|
||||
devPtr = pResDesc->res.pitch2D.devPtr;
|
||||
imageDescriptor.width = pResDesc->res.pitch2D.width;
|
||||
imageDescriptor.height = pResDesc->res.pitch2D.height;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = 0;
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
getChannelOrderAndType(pResDesc->res.pitch2D.desc, pTexDesc->readMode, channelOrder, channelType);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
imageDescriptor.format.channel_order = channelOrder;
|
||||
imageDescriptor.format.channel_type = channelType;
|
||||
|
||||
hsa_ext_sampler_descriptor_t samplerDescriptor;
|
||||
fillSamplerDescriptor(samplerDescriptor, pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords);
|
||||
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
if (HSA_STATUS_SUCCESS != hsa_ext_image_create(*agent, &imageDescriptor, devPtr, permission, &(pTexture->image)) ||
|
||||
HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
|
||||
return ihipLogStatus(hipErrorRuntimeOther);
|
||||
}
|
||||
|
||||
getHipTextureObject(pTexObject, pTexture->image, pTexture->sampler);
|
||||
|
||||
textureHash[*pTexObject] = pTexture;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject)
|
||||
{
|
||||
HIP_INIT_API(textureObject);
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = textureHash[textureObject];
|
||||
if (pTexture != nullptr) {
|
||||
hsa_ext_image_destroy(*agent, pTexture->image);
|
||||
hsa_ext_sampler_destroy(*agent, pTexture->sampler);
|
||||
free(pTexture);
|
||||
textureHash.erase(textureObject);
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipTextureObject_t textureObject)
|
||||
{
|
||||
HIP_INIT_API(pResDesc, textureObject);
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hipTexture* pTexture = textureHash[textureObject];
|
||||
if (pTexture != nullptr && pResDesc != nullptr) {
|
||||
memcpy((void*)pResDesc, (void*)&(pTexture->resDesc), sizeof(hipResourceDesc));
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, hipTextureObject_t textureObject)
|
||||
{
|
||||
HIP_INIT_API(pResViewDesc, textureObject);
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hipTexture* pTexture = textureHash[textureObject];
|
||||
if (pTexture != nullptr && pResViewDesc != nullptr) {
|
||||
memcpy((void*)pResViewDesc, (void*)&(pTexture->resViewDesc), sizeof(hipResourceViewDesc));
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipTextureObject_t textureObject)
|
||||
{
|
||||
HIP_INIT_API(pTexDesc, textureObject);
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hipTexture* pTexture = textureHash[textureObject];
|
||||
if (pTexture != nullptr && pTexDesc != nullptr) {
|
||||
memcpy((void*)pTexDesc, (void*)&(pTexture->texDesc), sizeof(hipTextureDesc));
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
// Texture Reference APIs
|
||||
hipError_t ihipBindTextureImpl(int dim,
|
||||
enum hipTextureReadMode readMode,
|
||||
size_t *offset,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
size_t size,
|
||||
enum hipTextureAddressMode addressMode,
|
||||
enum hipTextureFilterMode filterMode,
|
||||
int normalizedCoords,
|
||||
hipTextureObject_t& textureObject)
|
||||
{
|
||||
HIP_INIT_API();
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = (hipTexture*) malloc(sizeof(hipTexture));
|
||||
if (pTexture != nullptr) {
|
||||
memset(pTexture, 0, sizeof(hipTexture));
|
||||
}
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
|
||||
assert(dim == hipTextureType1D);
|
||||
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1D;
|
||||
imageDescriptor.width = size;
|
||||
imageDescriptor.height = 1;
|
||||
imageDescriptor.depth = 1;
|
||||
imageDescriptor.array_size = 0;
|
||||
|
||||
hsa_ext_image_channel_order_t channelOrder;
|
||||
hsa_ext_image_channel_type_t channelType;
|
||||
getChannelOrderAndType(desc, readMode, channelOrder, channelType);
|
||||
imageDescriptor.format.channel_order = channelOrder;
|
||||
imageDescriptor.format.channel_type = channelType;
|
||||
|
||||
hsa_ext_sampler_descriptor_t samplerDescriptor;
|
||||
fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords);
|
||||
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
|
||||
if (HSA_STATUS_SUCCESS != hsa_ext_image_create(*agent, &imageDescriptor, devPtr, permission, &(pTexture->image)) ||
|
||||
HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
|
||||
return ihipLogStatus(hipErrorRuntimeOther);
|
||||
}
|
||||
getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler);
|
||||
textureHash[textureObject] = pTexture;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipBindTexture(size_t* offset,
|
||||
textureReference* tex,
|
||||
const void* devPtr,
|
||||
const hipChannelFormatDesc* desc,
|
||||
size_t size)
|
||||
{
|
||||
// TODO: hipReadModeElementType is default.
|
||||
return ihipBindTextureImpl(hipTextureType1D, hipReadModeElementType,
|
||||
offset, devPtr, *desc, size,
|
||||
tex->addressMode[0], tex->filterMode, tex->normalized,
|
||||
tex->textureObject);
|
||||
}
|
||||
|
||||
hipError_t ihipBindTexture2DImpl(int dim,
|
||||
enum hipTextureReadMode readMode,
|
||||
size_t *offset,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
size_t width,
|
||||
size_t height,
|
||||
enum hipTextureAddressMode addressMode,
|
||||
enum hipTextureFilterMode filterMode,
|
||||
int normalizedCoords,
|
||||
hipTextureObject_t& textureObject)
|
||||
{
|
||||
HIP_INIT_API();
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = (hipTexture*) malloc(sizeof(hipTexture));
|
||||
if (pTexture != nullptr) {
|
||||
memset(pTexture, 0, sizeof(hipTexture));
|
||||
}
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
|
||||
assert(dim == hipTextureType2D);
|
||||
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
imageDescriptor.width = width;
|
||||
imageDescriptor.height = height;
|
||||
imageDescriptor.depth = 1;
|
||||
imageDescriptor.array_size = 0;
|
||||
|
||||
hsa_ext_image_channel_order_t channelOrder;
|
||||
hsa_ext_image_channel_type_t channelType;
|
||||
getChannelOrderAndType(desc, readMode, channelOrder, channelType);
|
||||
imageDescriptor.format.channel_order = channelOrder;
|
||||
imageDescriptor.format.channel_type = channelType;
|
||||
|
||||
hsa_ext_sampler_descriptor_t samplerDescriptor;
|
||||
fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords);
|
||||
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
|
||||
if (HSA_STATUS_SUCCESS != hsa_ext_image_create(*agent, &imageDescriptor, devPtr, permission, &(pTexture->image)) ||
|
||||
HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
|
||||
return ihipLogStatus(hipErrorRuntimeOther);
|
||||
}
|
||||
getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler);
|
||||
textureHash[textureObject] = pTexture;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipBindTexture2D(size_t* offset,
|
||||
textureReference* tex,
|
||||
const void* devPtr,
|
||||
const hipChannelFormatDesc* desc,
|
||||
size_t width,
|
||||
size_t height,
|
||||
size_t pitch)
|
||||
{
|
||||
// TODO: hipReadModeElementType is default.
|
||||
return ihipBindTexture2DImpl(hipTextureType2D, hipReadModeElementType,
|
||||
offset, devPtr, *desc, width, height,
|
||||
tex->addressMode[0], tex->filterMode, tex->normalized,
|
||||
tex->textureObject);
|
||||
}
|
||||
|
||||
hipError_t ihipBindTextureToArrayImpl(int dim,
|
||||
enum hipTextureReadMode readMode,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
enum hipTextureAddressMode addressMode,
|
||||
enum hipTextureFilterMode filterMode,
|
||||
int normalizedCoords,
|
||||
hipTextureObject_t& textureObject)
|
||||
{
|
||||
HIP_INIT_API();
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = (hipTexture*) malloc(sizeof(hipTexture));
|
||||
if (pTexture != nullptr) {
|
||||
memset(pTexture, 0, sizeof(hipTexture));
|
||||
}
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
|
||||
imageDescriptor.width = array->width;
|
||||
imageDescriptor.height = array->height;
|
||||
imageDescriptor.depth = array->depth;
|
||||
imageDescriptor.array_size = 0;
|
||||
|
||||
switch (dim) {
|
||||
case hipTextureType1D:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1D;
|
||||
imageDescriptor.height = 1;
|
||||
imageDescriptor.depth = 1;
|
||||
break;
|
||||
case hipTextureType2D:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
imageDescriptor.depth = 1;
|
||||
break;
|
||||
case hipTextureType3D:
|
||||
case hipTextureTypeCubemap:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
|
||||
break;
|
||||
case hipTextureType1DLayered:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1DA;
|
||||
imageDescriptor.height = 1;
|
||||
imageDescriptor.array_size = array->height;
|
||||
break;
|
||||
case hipTextureType2DLayered:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
|
||||
imageDescriptor.depth = 1;
|
||||
imageDescriptor.array_size = array->depth;
|
||||
break;
|
||||
case hipTextureTypeCubemapLayered:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
hsa_ext_image_channel_order_t channelOrder;
|
||||
hsa_ext_image_channel_type_t channelType;
|
||||
getChannelOrderAndType(desc, readMode, channelOrder, channelType);
|
||||
imageDescriptor.format.channel_order = channelOrder;
|
||||
imageDescriptor.format.channel_type = channelType;
|
||||
|
||||
hsa_ext_sampler_descriptor_t samplerDescriptor;
|
||||
fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords);
|
||||
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
|
||||
if (HSA_STATUS_SUCCESS != hsa_ext_image_create(*agent, &imageDescriptor, array->data, permission, &(pTexture->image)) ||
|
||||
HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
|
||||
return ihipLogStatus(hipErrorRuntimeOther);
|
||||
}
|
||||
getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler);
|
||||
textureHash[textureObject] = pTexture;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipBindTextureToArray(textureReference* tex,
|
||||
hipArray_const_t array,
|
||||
const hipChannelFormatDesc* desc)
|
||||
{
|
||||
// TODO: hipReadModeElementType is default.
|
||||
return ihipBindTextureToArrayImpl(hipTextureType2D, hipReadModeElementType,
|
||||
array, *desc,
|
||||
tex->addressMode[0], tex->filterMode, tex->normalized,
|
||||
tex->textureObject);
|
||||
}
|
||||
|
||||
hipError_t hipBindTextureToMipmappedArray(textureReference* tex,
|
||||
hipMipmappedArray_const_t mipmappedArray,
|
||||
const hipChannelFormatDesc* desc)
|
||||
{
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject)
|
||||
{
|
||||
HIP_INIT_API();
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = textureHash[textureObject];
|
||||
if (pTexture != nullptr) {
|
||||
hsa_ext_image_destroy(*agent, pTexture->image);
|
||||
hsa_ext_sampler_destroy(*agent, pTexture->sampler);
|
||||
free(pTexture);
|
||||
textureHash.erase(textureObject);
|
||||
}
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipUnbindTexture(const textureReference* tex)
|
||||
{
|
||||
return ihipUnbindTextureImpl(tex->textureObject);
|
||||
}
|
||||
|
||||
hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array)
|
||||
{
|
||||
HIP_INIT_API(desc, array);
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
*desc = array->desc;
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex)
|
||||
{
|
||||
HIP_INIT_API(offset, tex);
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol)
|
||||
{
|
||||
HIP_INIT_API(tex, symbol);
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
@@ -20,9 +20,17 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_H
|
||||
#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_H
|
||||
#ifndef HIP_INCLUDE_HCC_DETAIL_HIP_TEXTURE_H
|
||||
#define HIP_INCLUDE_HCC_DETAIL_HIP_TEXTURE_H
|
||||
|
||||
#include <texture_types.h>
|
||||
#include <hip/hcc_detail/texture_types.h>
|
||||
|
||||
struct hipTexture {
|
||||
hipResourceDesc resDesc;
|
||||
hipTextureDesc texDesc;
|
||||
hipResourceViewDesc resViewDesc;
|
||||
hsa_ext_image_t image;
|
||||
hsa_ext_sampler_t sampler;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -820,16 +820,6 @@ __host__ float modff(float x, float *iptr)
|
||||
return std::modf(x, iptr);
|
||||
}
|
||||
|
||||
__host__ float erfcinvf(float y)
|
||||
{
|
||||
return __hip_host_erfcinvf(y);
|
||||
}
|
||||
|
||||
__host__ double erfcinv(double y)
|
||||
{
|
||||
return __hip_host_erfcinv(y);
|
||||
}
|
||||
|
||||
__host__ double fdivide(double x, double y)
|
||||
{
|
||||
return x/y;
|
||||
@@ -937,16 +927,6 @@ __host__ void sincospi(double x, double *sptr, double *cptr)
|
||||
*cptr = std::cos(HIP_PI*x);
|
||||
}
|
||||
|
||||
//__host__ float normcdfinvf(float x)
|
||||
//{
|
||||
// return std::sqrt(2) * erfinvf(2*x-1);
|
||||
//}
|
||||
|
||||
//__host__ double normcdfinv(double x)
|
||||
//{
|
||||
// return std::sqrt(2) * erfinv(2*x-1);
|
||||
//}
|
||||
|
||||
__host__ float nextafterf(float x, float y)
|
||||
{
|
||||
return std::nextafter(x, y);
|
||||
|
||||
@@ -18,7 +18,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s EXCLUDE_HIP_PLATFORM all
|
||||
* BUILD: %t %s
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -31,16 +31,8 @@ THE SOFTWARE.
|
||||
#define NUM 1024
|
||||
#define SIZE 1024*4
|
||||
|
||||
// TODO - collapse:
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
__device__ ADDRESS_SPACE_1 int globalIn[NUM];
|
||||
__device__ ADDRESS_SPACE_1 int globalOut[NUM];
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
__device__ int globalIn[NUM];
|
||||
__device__ int globalOut[NUM];
|
||||
#endif
|
||||
|
||||
__global__ void Assign(hipLaunchParm lp, int* Out)
|
||||
{
|
||||
@@ -55,7 +47,7 @@ int main()
|
||||
A = new int[NUM];
|
||||
B = new int[NUM];
|
||||
C = new int[NUM];
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
for(int i=0;i<NUM;i++) {
|
||||
A[i] = -1*i;
|
||||
B[i] = 0;
|
||||
C[i] = 0;
|
||||
@@ -64,7 +56,7 @@ int main()
|
||||
hipMalloc((void**)&Ad, SIZE);
|
||||
hipHostMalloc((void**)&Am, SIZE);
|
||||
hipHostMalloc((void**)&Cm, SIZE);
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
for(int i=0;i<NUM;i++) {
|
||||
Am[i] = -1*i;
|
||||
Cm[i] = 0;
|
||||
}
|
||||
@@ -77,12 +69,12 @@ int main()
|
||||
hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
|
||||
hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream);
|
||||
hipStreamSynchronize(stream);
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
for(int i=0;i<NUM;i++) {
|
||||
assert(Am[i] == B[i]);
|
||||
assert(Am[i] == Cm[i]);
|
||||
}
|
||||
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
for(int i=0;i<NUM;i++) {
|
||||
A[i] = -2*i;
|
||||
B[i] = 0;
|
||||
}
|
||||
@@ -91,12 +83,12 @@ int main()
|
||||
hipLaunchKernel(Assign, dim3(1,1,1), dim3(NUM,1,1), 0, 0, Ad);
|
||||
hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
|
||||
hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost);
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
for(int i=0;i<NUM;i++) {
|
||||
assert(A[i] == B[i]);
|
||||
assert(A[i] == C[i]);
|
||||
}
|
||||
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
for(int i=0;i<NUM;i++) {
|
||||
A[i] = -3*i;
|
||||
B[i] = 0;
|
||||
}
|
||||
@@ -107,7 +99,7 @@ int main()
|
||||
hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
|
||||
hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream);
|
||||
hipStreamSynchronize(stream);
|
||||
for(unsigned i=0;i<NUM;i++) {
|
||||
for(int i=0;i<NUM;i++) {
|
||||
assert(A[i] == B[i]);
|
||||
assert(A[i] == C[i]);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/device_functions.h"
|
||||
#include "test_common.h"
|
||||
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
__host__ __device__ void fence_system() {
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
__threadfence_system();
|
||||
#else
|
||||
std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||
#endif
|
||||
}
|
||||
|
||||
__host__ __device__ void round_robin(const int id, const int num_dev, const int num_iter, volatile int* data, volatile int* flag) {
|
||||
for (int i = 0; i < num_iter; i++) {
|
||||
while(*flag%num_dev != id)
|
||||
fence_system(); // invalid the cache for read
|
||||
|
||||
(*data)++;
|
||||
fence_system(); // make sure the store to data is sequenced before the store to flag
|
||||
(*flag)++;
|
||||
fence_system(); // invalid the cache to flush out flag
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void gpu_round_robin(const int id, const int num_dev, const int num_iter, volatile int* data, volatile int* flag) {
|
||||
round_robin(id, num_dev, num_iter, data, flag);
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
||||
int num_gpus = 0;
|
||||
HIP_ASSERT(hipGetDeviceCount(&num_gpus));
|
||||
if (num_gpus == 0) {
|
||||
passed();
|
||||
return 0;
|
||||
}
|
||||
|
||||
volatile int* data;
|
||||
HIP_ASSERT(hipHostMalloc(&data, sizeof(int), hipHostMallocCoherent));
|
||||
constexpr int init_data = 1000;
|
||||
*data = init_data;
|
||||
|
||||
volatile int* flag;
|
||||
HIP_ASSERT(hipHostMalloc(&flag, sizeof(int), hipHostMallocCoherent));
|
||||
*flag = 0;
|
||||
|
||||
// number of rounds per device
|
||||
constexpr int num_iter = 1000;
|
||||
|
||||
// one CPU thread + 1 kernel/GPU
|
||||
const int num_dev = num_gpus + 1;
|
||||
|
||||
int next_id = 0;
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
// create a CPU thread for the round_robin
|
||||
threads.push_back(std::thread(round_robin, next_id++, num_dev, num_iter, data, flag));
|
||||
|
||||
// run one thread per GPU
|
||||
dim3 dim_block(1,1,1);
|
||||
dim3 dim_grid(1,1,1);
|
||||
|
||||
// launch one kernel per device for the round robin
|
||||
for (; next_id < num_dev; ++next_id) {
|
||||
threads.push_back(std::thread([=]() {
|
||||
HIP_ASSERT(hipSetDevice(next_id-1));
|
||||
hipLaunchKernelGGL(gpu_round_robin, dim_grid, dim_block, 0, 0x0
|
||||
, next_id, num_dev, num_iter, data, flag);
|
||||
HIP_ASSERT(hipDeviceSynchronize());
|
||||
}));
|
||||
}
|
||||
|
||||
for (auto& t : threads) {
|
||||
t.join();
|
||||
}
|
||||
|
||||
int expected_data = init_data + num_dev * num_iter;
|
||||
int expected_flag = num_dev * num_iter;
|
||||
|
||||
bool passed = *data == expected_data
|
||||
&& *flag == expected_flag;
|
||||
|
||||
HIP_ASSERT(hipHostFree((void*)data));
|
||||
HIP_ASSERT(hipHostFree((void*)flag));
|
||||
|
||||
if (passed) {
|
||||
passed();
|
||||
}
|
||||
else {
|
||||
failed("Failed Verification!\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -109,15 +109,14 @@ int main(int argc, char **argv)
|
||||
std::cout << devCount << std::endl;
|
||||
}
|
||||
if (retDevInfo) {
|
||||
hipSetDevice(device);
|
||||
hipDeviceProp_t devProp;
|
||||
hipDevice_t deviceT;
|
||||
hipDeviceGet(&deviceT, device);
|
||||
|
||||
hipGetDeviceProperties(&devProp, device);
|
||||
if (devProp.major < 1) {
|
||||
printf("%d does not support HIP\n", device);
|
||||
return -1;
|
||||
}
|
||||
std::cout << devProp.pciBusID << std::endl;
|
||||
char pciBusId[100];
|
||||
memset(pciBusId,0,100);
|
||||
hipDeviceGetPCIBusId(pciBusId,100,deviceT);
|
||||
|
||||
cout<<pciBusId<<endl;
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
@@ -28,59 +28,64 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
#include <assert.h>
|
||||
#include <string>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
using namespace std;
|
||||
|
||||
int getDeviceNumber(){
|
||||
FILE *in;
|
||||
char buff[512];
|
||||
string str;
|
||||
if(!(in = popen("./hipEnvVar -c", "r"))){
|
||||
return 1;
|
||||
FILE *in;
|
||||
char buff[512];
|
||||
string str;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
if(!(in = popen("./directed_tests/hipEnvVar -c", "r"))){
|
||||
return 1;
|
||||
}
|
||||
while(fgets(buff, 512, in)!=NULL){
|
||||
cout << buff;
|
||||
}
|
||||
fgets(buff, sizeof(buff), in);
|
||||
pclose(in);
|
||||
return atoi(buff);
|
||||
}
|
||||
|
||||
// Query the current device ID remotely to hipEnvVar
|
||||
int getDevicePCIBusNumRemote(int deviceID){
|
||||
void getDevicePCIBusNumRemote(int deviceID, char* pciBusID){
|
||||
FILE *in;
|
||||
char buff[512];
|
||||
string str = "./hipEnvVar -d ";
|
||||
string str = "./directed_tests/hipEnvVar -d ";
|
||||
str += std::to_string(deviceID);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
if(!(in = popen(str.c_str(), "r"))){
|
||||
return 1;
|
||||
exit(1);
|
||||
}
|
||||
while(fgets(pciBusID, 100, in)!=NULL){
|
||||
cout << pciBusID;
|
||||
}
|
||||
fgets(buff, sizeof(buff), in);
|
||||
pclose(in);
|
||||
return atoi(buff);
|
||||
}
|
||||
|
||||
// Query the current device ID locally
|
||||
int getDevicePCIBusNum(int deviceID){
|
||||
hipSetDevice(deviceID);
|
||||
hipDeviceProp_t devProp;
|
||||
// Query the current device ID locally on AMD path
|
||||
void getDevicePCIBusNum(int deviceID, char* pciBusID){
|
||||
hipDevice_t deviceT;
|
||||
hipDeviceGet(&deviceT, deviceID);
|
||||
|
||||
hipGetDeviceProperties(&devProp, deviceID);
|
||||
if (devProp.major < 1) {
|
||||
printf("%d does not support HIP\n", deviceID);
|
||||
return -1;
|
||||
}
|
||||
return devProp.pciBusID;
|
||||
memset(pciBusID,0,100);
|
||||
hipDeviceGetPCIBusId(pciBusID,100,deviceT);
|
||||
}
|
||||
|
||||
int main() {
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
|
||||
std::vector<std::string> devPCINum;
|
||||
char pciBusID[100];
|
||||
//collect the device pci bus ID for all devices
|
||||
int totalDeviceNum = getDeviceNumber();
|
||||
std::cout << "The total number of available devices is " << totalDeviceNum<< std::endl
|
||||
<<"Valid index range is 0 - "<<totalDeviceNum-1<<std::endl;
|
||||
std::vector<int> devPCINum;
|
||||
for (int i = 0; i < totalDeviceNum ; i++) {
|
||||
devPCINum.push_back(getDevicePCIBusNum(i));
|
||||
getDevicePCIBusNum(i, pciBusID);
|
||||
devPCINum.push_back(pciBusID);
|
||||
std::cout <<"The collected device PCI Bus ID of Device "<<i<<" is "
|
||||
<< getDevicePCIBusNum(i) << std::endl;
|
||||
<< devPCINum.back() << std::endl;
|
||||
}
|
||||
|
||||
//select each of the available devices to be the target device,
|
||||
@@ -88,9 +93,10 @@ int main() {
|
||||
for (int i = 0; i < totalDeviceNum ; i++) {
|
||||
setenv("HIP_VISIBLE_DEVICES",(char*)std::to_string(i).c_str(),1);
|
||||
setenv("CUDA_VISIBLE_DEVICES",(char*)std::to_string(i).c_str(),1);
|
||||
if (devPCINum[i] != getDevicePCIBusNumRemote(0)) {
|
||||
getDevicePCIBusNumRemote(0, pciBusID);
|
||||
if (devPCINum[i] == pciBusID) {
|
||||
std::cout << "The returned PciBusID is not correct"<< std::endl;
|
||||
std::cout << "Expected "<< devPCINum[i] << ", but get " << getDevicePCIBusNum(i) << endl;
|
||||
std::cout << "Expected "<< devPCINum[i] << ", but get " << pciBusID << endl;
|
||||
exit(-1);
|
||||
} else {
|
||||
continue;
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
int main( void ) {
|
||||
char pciBusId[13];
|
||||
int deviceCount = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&deviceCount));
|
||||
HIPASSERT(deviceCount != 0);
|
||||
for(int i=0; i< deviceCount;i++) {
|
||||
int pciBusID = -1;
|
||||
int pciDeviceID = -1;
|
||||
int pciDomainID = -1;
|
||||
int tempPciBusId = -1;
|
||||
int tempDeviceId = -1;
|
||||
HIPCHECK(hipDeviceGetPCIBusId ( &pciBusId[0], 13, i ));
|
||||
sscanf (pciBusId,"%04x:%02x:%02x",&pciDomainID,&pciBusID,&pciDeviceID);
|
||||
HIPCHECK(hipDeviceGetAttribute(&tempPciBusId,hipDeviceAttributePciBusId , i));
|
||||
if(pciBusID != tempPciBusId) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
HIPCHECK(hipDeviceGetByPCIBusId ( &tempDeviceId, pciBusId ));
|
||||
if(tempDeviceId != i) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
int main( void ) {
|
||||
char pciBusId[13];
|
||||
int deviceCount = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&deviceCount));
|
||||
HIPASSERT(deviceCount != 0);
|
||||
for(int i=0; i< deviceCount;i++) {
|
||||
int pciBusID = -1;
|
||||
int pciDeviceID = -1;
|
||||
int pciDomainID = -1;
|
||||
int tempPciBusId = -1;
|
||||
HIPCHECK(hipDeviceGetPCIBusId ( &pciBusId[0], 13, i ));
|
||||
sscanf (pciBusId,"%04x:%02x:%02x",&pciDomainID,&pciBusID,&pciDeviceID);
|
||||
HIPCHECK(hipDeviceGetAttribute(&tempPciBusId,hipDeviceAttributePciBusId , i));
|
||||
if(pciBusID != tempPciBusId) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
passed();
|
||||
}
|
||||
@@ -24,7 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11
|
||||
* RUN: %t
|
||||
* RUN: %t EXCLUDE_HIP_PLATFORM all
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
|
||||
@@ -446,9 +446,41 @@ int main(int argc, char *argv[])
|
||||
|
||||
|
||||
if (p_tests & 0x1000) {
|
||||
printf ("==> Test 0x1000 try null stream\n");
|
||||
hipStreamQuery(0/* try null stream*/);
|
||||
printf ("==> Test 0x1000 simple null stream tests\n");
|
||||
|
||||
// try some null stream:
|
||||
hipStreamQuery(0);
|
||||
|
||||
|
||||
hipStream_t s1;
|
||||
hipEvent_t e1;
|
||||
|
||||
{
|
||||
// stream null waits on event in s1 stream:
|
||||
HIPCHECK(hipStreamCreate(&s1));
|
||||
HIPCHECK(hipEventCreate(&e1));
|
||||
|
||||
HIPCHECK(hipEventRecord(e1, s1))
|
||||
|
||||
HIPCHECK(hipStreamWaitEvent(hipStream_t(0), e1, 0/*flags*/));
|
||||
|
||||
HIPCHECK(hipStreamDestroy(s1));
|
||||
HIPCHECK(hipEventDestroy(e1));
|
||||
}
|
||||
|
||||
{
|
||||
// stream s1 waits on event in null stream:
|
||||
HIPCHECK(hipStreamCreate(&s1));
|
||||
HIPCHECK(hipEventCreate(&e1));
|
||||
|
||||
HIPCHECK(hipEventRecord(e1, hipStream_t(0)))
|
||||
|
||||
HIPCHECK(hipStreamWaitEvent(s1, e1, 0/*flags*/));
|
||||
|
||||
HIPCHECK(hipStreamDestroy(s1));
|
||||
HIPCHECK(hipEventDestroy(e1));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -471,8 +503,8 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
printf ("test: alternating memcpy/count-reverse followed by event\n");
|
||||
if (p_tests & 0x4000 ) {
|
||||
printf ("test: %x alternating memcpy/count-reverse followed by event\n", p_tests);
|
||||
RUN_SYNC_TEST(0x4000, streamersDev0, sync_queryAllUntilComplete(streamersDev0), true);
|
||||
RUN_SYNC_TEST(0x8000, streamersDev0, sync_streamWaitEvent(streamersDev0.back()->event(), 0, sideStreams[0], false), true);
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ THE SOFTWARE.
|
||||
#include <stddef.h>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_texture.h"
|
||||
#include "hip/hip_texture_types.h"
|
||||
#include "hip/hip_runtime_api.h"
|
||||
|
||||
#define HC __attribute__((hc))
|
||||
|
||||
Referencia en una nueva incidencia
Block a user