Add 'projects/hip-tests/' from commit '5ce508401e1934053b127de5bf756dcd56a326a2'

git-subtree-dir: projects/hip-tests
git-subtree-mainline: cc7a96c46f
git-subtree-split: 5ce508401e
This commit is contained in:
systems-assistant[bot]
2025-08-10 02:09:49 +00:00
کامیت 7105a89a51
1515فایلهای تغییر یافته به همراه347072 افزوده شده و 0 حذف شده
@@ -0,0 +1,48 @@
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
pipelines:
- pipeline: hip_pipeline
source: \HIP
trigger:
branches:
include:
- amd-staging
- amd-mainline
variables:
- group: common
- template: /.azuredevops/variables-global.yml@pipelines_repo
trigger:
batch: true
branches:
include:
- amd-staging
- amd-mainline
paths:
exclude:
- '.jenkins'
- CODEOWNERS
- LICENSE.txt
- '**/.md'
pr:
autoCancel: true
branches:
include:
- amd-staging
- amd-mainline
paths:
exclude:
- '.jenkins'
- CODEOWNERS
- LICENSE.txt
- '**/.md'
drafts: false
jobs:
- template: ${{ variables.CI_COMPONENT_PATH }}/hip-tests.yml@pipelines_repo
@@ -0,0 +1,10 @@
Language: Cpp
BasedOnStyle: Google
AlignEscapedNewlinesLeft: false
AlignOperands: false
ColumnLimit: 100
AlwaysBreakTemplateDeclarations: false
DerivePointerAlignment: false
IndentFunctionDeclarationAfterType: false
MaxEmptyLinesToKeep: 2
SortIncludes: false
@@ -0,0 +1,20 @@
# Set the default behavior, in case people don't have core.autolf set.
* text=auto
# Explicitly declare text files you want to always be normalized and converted
# to have LF line endings on checkout.
*.c text eol=lf
*.cpp text eol=lf
*.cc text eol=lf
*.h text eol=lf
*.hpp text eol=lf
*.txt text eol=lf
# Define files to support auto-remove trailing white space
# Need to run the command below, before add modified file(s) to the staging area
# git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"'
*.cpp filter=trimspace
*.c filter=trimspace
*.h filter=trimspacecpp
*.hpp filter=trimspace
*.md filter=trimspace
+56
مشاهده پرونده
@@ -0,0 +1,56 @@
#!/usr/bin/env bash
set -euo pipefail
RANGE=""
while [[ $# -gt 0 ]]; do
echo $1
echo $2
case "$1" in
--range)
RANGE="$2"
shift 2
;;
*)
echo "Unknown arg $1" >&2
exit 64
;;
esac
done
regex='\.(c|cc|cpp|cxx|h|hh|hpp|hxx)$'
if [[ -n $RANGE ]]; then
files=$(git diff --name-only "$RANGE" | grep -E "$regex" || true)
else
files=$(git diff --cached --name-only --diff-filter=ACMR | grep -E "$regex" || true)
fi
echo "Checking $files"
[[ -z $files ]] && exit 0
clang_bin="${CLANG_FORMAT:-clang-format}"
if ! command -v "$clang_bin" >/dev/null 2>&1; then
if [[ -x "/c/Program Files/LLVM/bin/clang-format.exe" ]]; then
clang_bin="/c/Program Files/LLVM/bin/clang-format.exe"
fi
fi
clang_format_diff="${CLANG_FORMAT_DIFF:-clang-format-diff}"
if ! command -v "$clang_format_diff" >/dev/null 2>&1; then
if [[ -x "/c/Program Files/LLVM/share/clang/clang-format-diff.py" ]]; then
clang_format_diff="/c/Program Files/LLVM/share/clang/clang-format-diff.py"
fi
fi
for file in $files; do
echo "Checking lines of $file"
if [[ -n $RANGE ]]; then
diff_output=$(git diff -U0 "$RANGE" -- "$file")
else
diff_output=$(git diff -U0 --cached -- "$file")
fi
echo "$diff_output" | "$clang_format_diff" -style=file -fallback-style=none -p1
done
+2
مشاهده پرونده
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
exec "$(git rev-parse --show-toplevel)/.github/hooks/clang-format-check.sh"
+5
مشاهده پرونده
@@ -0,0 +1,5 @@
disabled: false
scmId: gh-emu-rocm
branchesToScan:
- amd-staging
- amd-mainline
+36
مشاهده پرونده
@@ -0,0 +1,36 @@
## Associated JIRA ticket number/Github issue number
<!-- For example: "Closes #1234" or "Fixes SWDEV-123456" -->
## What type of PR is this? (check all applicable)
- [ ] Refactor
- [ ] Feature
- [ ] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Continuous Integration
## What were the changes?
<!-- Please give a short summary of the change. -->
## Why are these changes needed?
<!-- Please explain the motivation behind the change and why this solves the given problem. -->
## Updated CHANGELOG?
<!-- Needed for Release updates for a ROCm release. -->
- [ ] Yes
- [ ] No, Does not apply to this PR.
## Added/Updated documentation?
- [ ] Yes
- [ ] No, Does not apply to this PR.
## Additional Checks
- [ ] I have added tests relevant to the introduced functionality, and the unit tests are passing locally.
- [ ] Any dependent changes have been merged.
@@ -0,0 +1,76 @@
import os, re, sys
from typing import List, Optional
def is_checkbox(line: str) -> bool:
return bool(re.match(r"^\s*-\s*\[[ xX]\]\s*.+", line))
def is_checked(line: str) -> bool:
return bool(re.match(r"^\s*-\s*\[[xX]\]\s*.+", line))
def is_comment(line: str) -> bool:
return bool(re.match(r"^\s*<!--.*-->\s*$", line))
def text_clean(lines: List[str]) -> str:
text = [line for line in lines if not is_comment(line)]
return "".join("".join(text).strip().split())
def validate_section(section_name: str, lines: List[str]) -> Optional[str]:
has_checkboxes = any(is_checkbox(line) for line in lines)
if has_checkboxes:
if not any(is_checked(line) for line in lines):
return f"Section {section_name} is a checklist without selections"
return None
if not text_clean(lines):
return f"Section {section_name} is empty text section"
return None
def check_description(description: str) -> List[str]:
if not description:
# pull_request_template is not merged yet, so treat as valid for now
return []
# return ["PR description is empty"]
sections = []
current_section = None
current_lines = []
errors = []
for line in description.splitlines():
header_match = re.match(r"^\s*##\s*(.+?)\s*$", line)
if header_match:
if current_section:
sections.append((current_section, current_lines))
current_section = header_match.group(1)
current_lines = []
elif current_section:
current_lines.append(line)
if current_section:
sections.append((current_section, current_lines))
if not sections:
return ["No sections available, template is empty"]
for section_name, section_lines in sections:
error = validate_section(section_name, section_lines)
if error:
errors.append(error)
return errors
if __name__ == "__main__":
pr_description = os.getenv("PR_DESCRIPTION", "")
errors = check_description(pr_description)
if not errors:
print("All good")
exit(0)
print("\n".join(errors))
exit(1)
+22
مشاهده پرونده
@@ -0,0 +1,22 @@
name: Clang format check
on:
pull_request:
types: [synchronize, opened]
jobs:
format:
runs-on: AMD-ROCm-Internal-dev1
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install clang-format
run: |
sudo apt update && sudo apt install -y clang-format
- name: Run clang-format-check
id: clang-format
run: |
chmod +x .github/hooks/clang-format-check.sh
./.github/hooks/clang-format-check.sh --range "${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
+73
مشاهده پرونده
@@ -0,0 +1,73 @@
name: Keywords checker
on:
pull_request:
types: [opened, synchronize, reopened, edited]
branches:
- amd-staging
workflow_dispatch:
jobs:
check-keywords:
runs-on: AMD-ROCm-Internal-dev1
env:
KEYWORDS: ${{ vars.KEYWORDS }}
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Check keywords
run: |
set -e
if [ -z "$KEYWORDS" ]; then
echo "No keywords set. Skipping check"
exit 0
fi
IFS=',' read -ra KEYWORDS_ARRAY <<< "$KEYWORDS"
echo "Checking against list of keywords: ${KEYWORDS_ARRAY[*]}"
MATCHED=0
BASE_BRANCH=${{github.event.pull_request.base.ref}}
HEAD_BRANCH=${{github.event.pull_request.head.ref}}
PR_TITLE="${{ github.event.pull_request.title }}"
for file in $(git diff --name-only origin/$BASE_BRANCH..origin/$HEAD_BRANCH); do
if [ -f "$file" ]; then
for keyword in "${KEYWORDS_ARRAY[*]}"; do
grep -in -E "${keyword}" "$file" | while IFS= read -r line; do
echo "Matched in '$file': $line"
MATCHED=1
done
done
fi
done
for commit in $(git log --format=%H origin/$BASE_BRANCH..origin/$HEAD_BRANCH); do
msg=$(git log -1 --format=%B "$commit")
for keyword in "${KEYWORDS_ARRAY[*]}"; do
if echo "$msg" | grep -i -q "$keyword"; then
echo "Match in commit $commit: $msg"
MATCHED=1
fi
done
done
for keyword in "${KEYWORDS_ARRAY[*]}"; do
if echo "$PR_TITLE" | grep -i -q "$keyword"; then
echo "Match in PR title"
MATCHED=1
fi
done
if [ "$MATCHED" -eq 1 ]; then
echo "Keywords found, please see diagnostics higher"
exit 1
else
echo "No keywords found"
exit 0
fi
+15
مشاهده پرونده
@@ -0,0 +1,15 @@
name: Rocm Validation Suite KWS
on:
push:
branches: [amd-staging, amd-mainline]
pull_request:
types: [opened, synchronize, reopened]
workflow_dispatch:
jobs:
kws:
if: ${{ github.event_name == 'pull_request' }}
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/kws.yml@mainline
secrets: inherit
with:
pr_number: ${{github.event.pull_request.number}}
base_branch: ${{github.base_ref}}
@@ -0,0 +1,46 @@
name: Validate PR Title
on:
pull_request:
types: [opened, edited, synchronize, reopened]
jobs:
validate-pr-title:
runs-on: ubuntu-latest
steps:
- name: Check PR Title
id: check-pr-title
run: |
PR_TITLE="${{ github.event.pull_request.title }}"
if [[ ! "$PR_TITLE" =~ ^SWDEV-[0-9]+ ]]; then
echo "::error::PR title must start with a Jira ticket ID, SWDEV-<num>"
exit 1
else
echo "PR title is valid"
fi
validate-commit-messages:
runs-on: AMD-ROCm-Internal-dev1
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Check all commit messages
id: validate-commit-messags
run: |
COMMITS=$(git log --format="%H %s" origin/${{ github.event.pull_request.base.ref }}..origin/${{ github.event.pull_request.head.ref }})
echo "$COMMITS"
echo "$COMMITS" | while read -r hash message; do
echo -e "$hash $message\n "
if [[ "$message" =~ ^SWDEV-[0-9]+ ]]; then
echo "Valid JIRA ticket format"
elif [[ "$message" =~ ^Merge\ branch ]]; then
echo "Merge commits are allowed"
else
echo "::error:: $hash commit should start with Jira ticket ID, SWDEV-<num> or be a merge commit"
exit 1
fi
done
+25
مشاهده پرونده
@@ -0,0 +1,25 @@
name: ROCm CI Caller
on:
pull_request:
branches: [amd-staging, amd-npi, release/rocm-rel-*, amd-mainline]
types: [opened, reopened, synchronize]
push:
branches: [amd-mainline]
workflow_dispatch:
issue_comment:
types: [created]
jobs:
call-workflow:
if: github.event_name != 'issue_comment' ||(github.event_name == 'issue_comment' && github.event.issue.pull_request && (startsWith(github.event.comment.body, '!verify') || startsWith(github.event.comment.body, '!linux-hip-psdb') || startsWith(github.event.comment.body, '!verify release') || startsWith(github.event.comment.body, '!verify retest')))
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/rocm_ci.yml@mainline
secrets: inherit
with:
input_sha: ${{github.event_name == 'pull_request' && github.event.pull_request.head.sha || (github.event_name == 'push' && github.sha) || (github.event_name == 'issue_comment' && github.event.issue.pull_request.head.sha) || github.sha}}
input_pr_num: ${{github.event_name == 'pull_request' && github.event.pull_request.number || (github.event_name == 'issue_comment' && github.event.issue.number) || 0}}
input_pr_url: ${{github.event_name == 'pull_request' && github.event.pull_request.html_url || (github.event_name == 'issue_comment' && github.event.issue.pull_request.html_url) || ''}}
input_pr_title: ${{github.event_name == 'pull_request' && github.event.pull_request.title || (github.event_name == 'issue_comment' && github.event.issue.pull_request.title) || ''}}
repository_name: ${{ github.repository }}
base_ref: ${{github.event_name == 'pull_request' && github.event.pull_request.base.ref || (github.event_name == 'issue_comment' && github.event.issue.pull_request.base.ref) || github.ref}}
trigger_event_type: ${{ github.event_name }}
comment_text: ${{ github.event_name == 'issue_comment' && github.event.comment.body || '' }}
@@ -0,0 +1,22 @@
name: Validate PR desription
on:
pull_request:
types: [opened, edited, synchronize]
jobs:
validate-pr-description:
runs-on: AMD-ROCm-Internal-dev1
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Validate PR description
env:
PR_DESCRIPTION: ${{ github.event.pull_request.body }}
run: python .github/scripts/validate_pr_description.py
@@ -0,0 +1,22 @@
.*
!.gitignore
*.o
*.exe
*.swp
lib
packages
build
tags
samples/0_Intro/module_api/runKernel.hip.out
samples/0_Intro/module_api/vcpy_isa.code
samples/0_Intro/module_api/vcpy_isa.hsaco
samples/0_Intro/module_api/vcpy_kernel.co
samples/0_Intro/module_api/vcpy_kernel.code
samples/1_Utils/hipInfo/hipInfo
samples/1_Utils/hipDispatchLatency/hipDispatchLatency
utils/coverage/generateHipAPICoverage
utils/coverage/CoverageReport.xml
utils/coverage/coverageReportHTML/CoverageReport.html
utils/coverage/coverageReportHTML/testAPIs
utils/coverage/coverageReportHTML/testModules
@@ -0,0 +1,110 @@
def hipBuildTest(String backendLabel) {
node(backendLabel) {
stage("SYNC - ${backendLabel}") {
// Checkout hip-tests repository with the PR patch
dir("${WORKSPACE}/hip-tests") {
checkout scm
env.HIP_TESTS_DIR = "${WORKSPACE}" + "/hip-tests"
}
// Clone hip repository
dir("${WORKSPACE}/hip") {
git branch: 'develop',
url: 'https://github.com/ROCm-Developer-Tools/hip'
env.HIP_DIR = "${WORKSPACE}" + "/hip"
}
// Clone clr repository
dir("${WORKSPACE}/clr") {
git branch: 'develop',
credentialsId: 'branch-credentials',
url: 'https://github.com/ROCm-Developer-Tools/clr'
env.CLR_DIR = "${WORKSPACE}" + "/clr"
}
// Clone hipcc repspoitory
dir("${WORKSPACE}/hipcc") {
git branch: 'develop',
credentialsId: 'branch-credentials',
url: 'https://github.com/ROCm-Developer-Tools/hipcc'
env.HIPCC_DIR = "${WORKSPACE}" + "/hipcc"
}
}
stage("BUILD HIP - ${backendLabel}") {
// Running the build on clr workspace
dir("${WORKSPACE}/clr") {
sh """#!/usr/bin/env bash
set -x
rm -rf build
mkdir -p build
cd build
# Check if backend label contains string "amd" or backend host is a server with amd gpu
if [[ $backendLabel =~ amd ]]; then
cmake -DCLR_BUILD_HIP=ON -DHIP_PATH=\$PWD/install -DHIPCC_BIN_DIR=\$HIPCC_DIR/bin -DHIP_COMMON_DIR=\$HIP_DIR -DCMAKE_PREFIX_PATH="/opt/rocm/" -DCMAKE_INSTALL_PREFIX=\$PWD/install ..
else
cmake -DCLR_BUILD_HIP=ON -DHIP_PLATFORM=nvidia -DHIPCC_BIN_DIR=\$HIPCC_DIR/bin -DHIP_COMMON_DIR=\$HIP_DIR -DCMAKE_INSTALL_PREFIX=\$PWD/install ..
fi
make -j\$(nproc)
make install -j\$(nproc)
"""
}
}
stage("BUILD HIP TESTS - ${backendLabel}") {
// Running the build on HIP TESTS workspace
dir("${WORKSPACE}/hip-tests") {
env.HIP_PATH = "${CLR_DIR}" + "/build/install"
sh """#!/usr/bin/env bash
set -x
rm -rf build
mkdir -p build
cd build
echo "testing $HIP_PATH"
# Check if backend label contains string "amd" or backend host is a server with amd gpu
if [[ $backendLabel =~ amd ]]; then
cmake -DHIP_PLATFORM=amd -DHIP_PATH=\$CLR_DIR/build/install ../catch
else
export HIP_PLATFORM=nvidia
cmake -DHIP_PLATFORM=nvidia -DHIP_PATH=\$CLR_DIR/build/install ../catch
fi
make -j\$(nproc) build_tests
"""
}
}
timeout(time: 1, unit: 'HOURS') {
stage("TEST - ${backendLabel}") {
dir("${WORKSPACE}/hip-tests") {
sh """#!/usr/bin/env bash
set -x
cd build
if [[ $backendLabel =~ amd ]]; then
ctest --overwrite BuildDirectory=. --output-junit hiptest_output_catch_amd.xml
else
ctest --overwrite BuildDirectory=. --output-junit hiptest_output_catch_nvidia.xml -E 'Unit_hipMemcpyHtoD_Positive_Synchronization_Behavior|Unit_hipMemcpy_Positive_Synchronization_Behavior|Unit_hipFreeNegativeHost'
fi
"""
}
}
}
}
}
timestamps {
node('external-bootstrap') {
skipDefaultCheckout()
// labels belonging to each backend - AMD, NVIDIA
String[] labels = ['hip-amd-gfx908-ubu2004', 'hip-nvidia-rtx5000-ubu2004']
buildMap = [:]
labels.each { backendLabel ->
echo "backendLabel: ${backendLabel}"
buildMap[backendLabel] = { hipBuildTest(backendLabel) }
}
buildMap['failFast'] = false
parallel buildMap
}
}
@@ -0,0 +1,2 @@
* @cpaquot_amdeng @gandryey_amdeng @skudchad_amdeng @lmoriche_amdeng
@@ -0,0 +1,116 @@
# Contributing to hip-tests #
We welcome contributions to the hip-tests project. Please follow these details to help ensure your contributions will be successfully accepted.
If you want to contribute to our documentation, refer to {doc}`Contribute to ROCm docs <rocm:contribute/contributing>`.
## Issue Discussion ##
Please use the [GitHub Issue](https://github.com/ROCm/hip-tests/issues) tab to notify us of issues.
* Use your best judgement for issue creation. If your issue is already listed, upvote the issue and
comment or post to provide additional details, such as how you reproduced this issue.
* If you're not sure if your issue is the same, err on the side of caution and file your issue.
You can add a comment to include the issue number (and link) for the similar issue. If we evaluate
your issue as being the same as the existing issue, we'll close the duplicate.
* If your issue doesn't exist, use the issue template to file a new issue.
* When filing an issue, be sure to provide as much information as possible, including script output so
we can collect information about your configuration. This helps reduce the time required to
reproduce your issue.
* Check your issue regularly, as we may require additional information to successfully reproduce the
issue.
* You may also open an issue to ask questions to the maintainers about whether a proposed change
meets the acceptance criteria, or to discuss an idea pertaining to the library.
## Acceptance Criteria ##
HIP-tests is a C++ based Catch2 unit test framework that allows developers to test HIP API functionalities for AMD and NVIDIA GPUs from single source code.
Contributors wishing to make bug fixes or unit test enhancements should run on both AMD and NVIDIA platforms.### Doxygen Editing Guidelines ###
- bugs should be marked with @bugs near the code where the bug might be fixed. The @bug message will appear in the API description and also in the
doxygen bug list.
## Code Structure ##
hip-tests contains mainly three types of application/test codes,
- `catch/unit`
This contains catch2 unit tests for different kinds of feature functionalities in HIP, for example,
- `atomic` is to test all types of atomic operations,
- `compiler` contains tests to launch kernels,
- `memory` includes applications to test and use different kinds of memory related HIP APIs.
etc.
- `perftest`
This directory has some perftest examples.
- `samples`
This directory has many hip samples, including codes and instructions for building and executing different kinds of hip features and operations.
## Coding Style ##
Catch2 is a unit testing framework for C++, using cmake integration. For more information and tutorial, you can refer to [catch2 documents] (https://github.com/catchorg/Catch2/blob/devel/docs/tutorial.md).
## Pull Request Guidelines ##
By creating a pull request, you agree to the statements made in the code license section. Your pull request should target the default branch. Our current default branch is the develop branch, which serves as our integration branch.
Follow existing best practice for writing a good Git commit message.
Some tips:
http://chris.beams.io/posts/git-commit/
https://robots.thoughtbot.com/5-useful-tips-for-a-better-commit-message
In particular :
- Use imperative voice, ie "Fix this bug", "Refactor the XYZ routine", "Update the doc".
Not : "Fixing the bug", "Fixed the bug", "Bug fix", etc.
- Subject should summarize the commit. Do not end subject with a period. Use a blank line
after the subject.
### Deliverables ###
hip-tests is an open source repository. Because of this, we include the following license description at the top of every source file.
If you create new source files in the repository, please include this text in them as well (replacing "xx" with the digits for the current year):
```
// Copyright (c) 20xx Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
```
### Process ###
After you create a PR, you can take a look at a diff of the changes you made using the PR's "Files" tab.
PRs must pass through the checks and the code review described in the [Acceptance Criteria](#acceptance-criteria) section before they can be merged.
Checks may take some time to complete. You can view their progress in the table near the bottom of the pull request page. You may also be able to use the links in the table
to view logs associated with a check if it fails.
During code reviews, another developer will take a look through your proposed change. If any modifications are requested (or further discussion about anything is
needed), they may leave a comment. You can follow up and respond to the comment, and/or create comments of your own if you have questions or ideas.
When a modification request has been completed, the conversation thread about it will be marked as resolved.
To update the code in your PR (eg. in response to a code review discussion), you can simply push another commit to the branch used in your pull request.
## References ##
* For hip-test repository information, refer to [README](https://github.com/ROCm/hip-tests).
* For how to build and run hip runtime and hip catch2 unint tests from source, on the platform of AMD or NVIDIA, refer to [build HIP] (https://github.com/ROCm/HIP/blob/develop/docs/install/build.rst).
* hip samples give detail instructions on how to build and run HIP applications, you can refer to [HIP samples] (https://github.com/ROCm/hip-tests/tree/develop/samples).
@@ -0,0 +1,20 @@
Copyright (c) 2008 - 2024 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
@@ -0,0 +1,60 @@
## What is this repository for? ###
This repository provides unit tests for [HIP](https://github.com/ROCm/HIP) implementation.
## DISCLAIMER
The information presented in this document is for informational purposes only and may contain technical inaccuracies, omissions, and typographical errors. The information contained herein is subject to change and may be rendered inaccurate for many reasons, including but not limited to product and roadmap changes, component and motherboard versionchanges, new model and/or product releases, product differences between differing manufacturers, software changes, BIOS flashes, firmware upgrades, or the like. Any computer system has risks of security vulnerabilities that cannot be completely prevented or mitigated.AMD assumes no obligation to update or otherwise correct or revise this information. However, AMD reserves the right to revise this information and to make changes from time to time to the content hereof without obligation of AMD to notify any person of such revisions or changes.THIS INFORMATION IS PROVIDED AS IS.” AMD MAKES NO REPRESENTATIONS OR WARRANTIES WITH RESPECT TO THE CONTENTS HEREOF AND ASSUMES NO RESPONSIBILITY FOR ANY INACCURACIES, ERRORS, OR OMISSIONS THAT MAY APPEAR IN THIS INFORMATION. AMD SPECIFICALLY DISCLAIMS ANY IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR ANY PARTICULAR PURPOSE. IN NO EVENT WILL AMD BE LIABLE TO ANY PERSON FOR ANY RELIANCE, DIRECT, INDIRECT, SPECIAL, OR OTHER CONSEQUENTIAL DAMAGES ARISING FROM THE USE OF ANY INFORMATION CONTAINED HEREIN, EVEN IF AMD IS EXPRESSLY ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies.
©2025 Advanced Micro Devices, Inc. All Rights Reserved.
## Repository branches
The hip-tests repository maintains several branches. The branches that are of importance are:
* Main branch: This is the stable branch. It is up to date with the latest release branch, for example, if the latest release is rocm-5.4, main branch will be the repository based on this release.
* Develop branch: This is the default branch, on which the new features are still under development and visible. While this maybe of interest to many, it should be noted that this branch and the features under development might not be stable.
* Release branches. These are branches corresponding to each ROCM release, listed with release tags, such as rocm-5.4, etc.
## Release tagging
hip-tests releases are typically naming convention for each ROCM release to help differentiate them.
* rocm x.yy: These are the stable releases based on the ROCM release.
This type of release is typically made once a month.
## Build HIP Catch tests
For building HIP from source, please check instructions on the [HIP page](https://rocm.docs.amd.com/projects/HIP/en/latest/install/build.html).
HIP catch tests can be built via the following instructions:
1 .Clone the hip-tests source code from the repository, with definition of branch. The default branch is `develop`, as an example,
```bash
$ git clone -b develop https://github.com/ROCm/hip-tests.git
$ export HIP_TESTS_DIR="$(readlink -f hip-tests)"
```
2. Build the catch tests
```bash
$ cd "$HIP_TESTS_DIR"
$ mkdir -p build; cd build
$ cmake ../catch/ -DHIP_PLATFORM=amd
$ make -j$(nproc) build_tests
$ ctest # run tests
```
HIP catch tests are built under the folder `$HIP_TESTS_DIR/build`.
### Build HIP Catch2 standalone test
HIP Catch2 supports building standalone tests, for example,
```bash
$ hipcc $HIP_TESTS_DIR/catch/unit/memory/hipPointerGetAttributes.cc -I ./catch/include ./catch/hipTestMain/standalone_main.cc -I ./catch/external/Catch2 -o hipPointerGetAttributes
$ ./hipPointerGetAttributes
```
### Building with address sanitizer
To build catch tests with Address Sanitizer options, use the cmake option `-DENABLE_ADDRESS_SANITIZER=ON`.
@@ -0,0 +1,8 @@
# Common Tests - Test independent of all platforms
set(TEST_SRC
add.cc
)
hip_add_exe_to_target(NAME ABMAddKernels
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests)
@@ -0,0 +1,41 @@
#include <hip_test_common.hh>
#include <iostream>
template <typename T> __global__ void add(T* a, T* b, T* c, size_t size) {
size_t i = threadIdx.x;
if (i < size) c[i] = a[i] + b[i];
}
TEMPLATE_TEST_CASE("ABM_AddKernel_MultiTypeMultiSize", "", int, long, float, long long, double) {
auto size = GENERATE(as<size_t>{}, 100, 500, 1000);
TestType *d_a, *d_b, *d_c;
auto res = hipMalloc(&d_a, sizeof(TestType) * size);
REQUIRE(res == hipSuccess);
res = hipMalloc(&d_b, sizeof(TestType) * size);
REQUIRE(res == hipSuccess);
res = hipMalloc(&d_c, sizeof(TestType) * size);
REQUIRE(res == hipSuccess);
std::vector<TestType> a, b, c;
for (size_t i = 0; i < size; i++) {
a.push_back(i + 1);
b.push_back(i + 1);
c.push_back(2 * (i + 1));
}
res = hipMemcpy(d_a, a.data(), sizeof(TestType) * size, hipMemcpyHostToDevice);
REQUIRE(res == hipSuccess);
res = hipMemcpy(d_b, b.data(), sizeof(TestType) * size, hipMemcpyHostToDevice);
REQUIRE(res == hipSuccess);
hipLaunchKernelGGL(add<TestType>, 1, size, 0, 0, d_a, d_b, d_c, size);
HIP_CHECK(hipGetLastError());
res = hipMemcpy(a.data(), d_c, sizeof(TestType) * size, hipMemcpyDeviceToHost);
REQUIRE(res == hipSuccess);
HIP_CHECK(hipFree(d_a));
HIP_CHECK(hipFree(d_b));
HIP_CHECK(hipFree(d_c));
REQUIRE(a == c);
}
@@ -0,0 +1 @@
add_subdirectory(AddKernels)
@@ -0,0 +1,378 @@
cmake_minimum_required(VERSION 3.16.8)
# to skip the simple compiler test
set(CMAKE_C_COMPILER_WORKS 1)
set(CMAKE_CXX_COMPILER_WORKS 1)
project(hiptests)
option(ENABLE_ADDRESS_SANITIZER "Option to enable ASAN build" OFF)
option(BUILD_SHARED_LIBS "Option for testing shared libraries" ON)
option(TEST_CLOCK_CYCLE "Option to use clock64" OFF)
if (TEST_CLOCK_CYCLE)
add_definitions(-DTEST_CLOCK_CYCLE)
endif()
# flag to generate standalone exe per src file.
message(STATUS "STANDALONE_TESTS : ${STANDALONE_TESTS}")
if(NOT WIN32)
set(CPACK_SET_DESTDIR ON CACHE BOOL "Installer package will install hip catch to CMAKE_INSTALL_PREFIX instead of CPACK_PACKAGING_INSTALL_PREFIX")
endif()
# Check if platform is set
if(NOT HIP_PLATFORM STREQUAL "amd" AND NOT HIP_PLATFORM STREQUAL "nvidia")
message(FATAL_ERROR "Unexpected HIP_PLATFORM: " ${HIP_PLATFORM})
endif()
if(HIP_PLATFORM STREQUAL "amd")
if(UNIX AND DEFINED ROCM_PATH)
# Read -DROCM_PATH and set CXX_FLAGS for amd platform only
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --rocm-path=${ROCM_PATH}")
endif()
if(DEFINED HIP_PATH)
# Read -DHIP_PATH and set CXX_FLAGS for amd platform only
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --hip-path=${HIP_PATH}")
endif()
endif()
# Read -DHIP_PATH
# If not set read env{HIP_PATH} only on Windows
if(WIN32)
if(NOT DEFINED HIP_PATH)
if(DEFINED ENV{HIP_PATH})
set(HIP_PATH $ENV{HIP_PATH} CACHE STRING "HIP Path")
endif()
endif()
endif()
if(NOT DEFINED HIP_PATH)
if(DEFINED ROCM_PATH)
set(HIP_PATH ${ROCM_PATH})
else()
set(HIP_PATH "/opt/rocm")
endif()
endif()
if(NOT DEFINED ROCM_PATH)
set(ROCM_PATH "/opt/rocm")
endif()
message(STATUS "HIP_PATH: ${HIP_PATH}")
message(STATUS "ROCM_PATH: ${ROCM_PATH}")
if (WIN32)
set(HIPCC_EXEC "hipcc.exe")
set(HIPCONFIG_EXEC "hipconfig.exe")
else()
set(HIPCC_EXEC "hipcc")
set(HIPCONFIG_EXEC "hipconfig")
endif()
set(CMAKE_C_COMPILER "${HIP_PATH}/bin/${HIPCC_EXEC}")
set(CMAKE_CXX_COMPILER "${HIP_PATH}/bin/${HIPCC_EXEC}")
execute_process(COMMAND ${HIP_PATH}/bin/${HIPCONFIG_EXEC} --version
OUTPUT_VARIABLE HIP_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT WIN32)
if(NOT BUILD_SHARED_LIBS)
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/llvm)
find_package(amd_comgr)
find_package(hsa-runtime64)
link_libraries(hsa-runtime64::hsa-runtime64)
link_libraries(amd_comgr)
link_libraries(hiprtc-builtins)
endif() # end BUILD_SHARED_LIBS
endif() # end win32
# enforce c++17
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++17")
# Address sanitizer options
if(ENABLE_ADDRESS_SANITIZER)
message(STATUS "Building catch tests with Address Sanitizer options")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan -g -gz")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan -g -gz")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--enable-new-dtags -fuse-ld=lld -fsanitize=address -shared-libasan -g -gz -Wl,--build-id=sha1 -L${ROCM_PATH}/lib/asan")
endif()
string(REPLACE "." ";" VERSION_LIST ${HIP_VERSION})
list(GET VERSION_LIST 0 HIP_VERSION_MAJOR)
list(GET VERSION_LIST 1 HIP_VERSION_MINOR)
list(GET VERSION_LIST 2 HIP_VERSION_PATCH_GITHASH)
string(REPLACE "-" ";" VERSION_LIST ${HIP_VERSION_PATCH_GITHASH})
list(GET VERSION_LIST 0 HIP_VERSION_PATCH)
list(GET VERSION_LIST 1 HIP_VERSION_GITHASH)
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
set(HIP_PACKAGING_VERSION_PATCH ${HIP_VERSION_PATCH}.$ENV{ROCM_LIBPATCH_VERSION})
else()
set(HIP_PACKAGING_VERSION_PATCH ${HIP_VERSION_PATCH}-${HIP_VERSION_GITHASH})
endif()
if(NOT DEFINED CATCH2_PATH)
if(DEFINED ENV{CATCH2_PATH})
set(CATCH2_PATH $ENV{CATCH2_PATH} CACHE STRING "Catch2 Path")
else()
set(CATCH2_PATH "${CMAKE_CURRENT_LIST_DIR}/external/Catch2")
endif()
endif()
message(STATUS "Catch2 Path: ${CATCH2_PATH}")
# Set JSON Parser path
if(NOT DEFINED JSON_PARSER)
if(DEFINED ENV{JSON_PARSER})
set(JSON_PARSER $ENV{JSON_PARSER} CACHE STRING "JSON Parser Path")
else()
set(JSON_PARSER "${CMAKE_CURRENT_LIST_DIR}/external/picojson")
endif()
endif()
message(STATUS "Searching Catch2 in: ${CMAKE_CURRENT_LIST_DIR}/external")
find_package(Catch2 REQUIRED
PATHS
${CMAKE_CURRENT_LIST_DIR}/external
PATH_SUFFIXES
Catch2/cmake/Catch2
)
include(Catch)
include(CTest)
# path used for generating the *_include.cmake file
set(CATCH2_INCLUDE ${CATCH2_PATH}/cmake/Catch2/catch_include.cmake.in)
include_directories(
${CATCH2_PATH}
"./include"
"./kernels"
${HIP_PATH}/include
${JSON_PARSER}
)
option(RTC_TESTING "Run tests using HIP RTC to compile the kernels" OFF)
if (RTC_TESTING)
add_definitions(-DRTC_TESTING=ON)
endif()
add_definitions(-DKERNELS_PATH="${CMAKE_CURRENT_SOURCE_DIR}/kernels/")
set(CATCH_BUILD_DIR catch_tests)
set(HIP_TEST_CONFIG_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/config)
file(MAKE_DIRECTORY ${HIP_TEST_CONFIG_BINARY_DIR})
file(GLOB JSON_FILES "./hipTestMain/config/*.json")
foreach(json IN LISTS JSON_FILES)
file(COPY ${json}
DESTINATION ${HIP_TEST_CONFIG_BINARY_DIR})
endforeach()
set(CATCH_SCRIPT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/script)
file(COPY ./external/Catch2/cmake/Catch2/CatchAddTests.cmake
DESTINATION ${CATCH_SCRIPT_BINARY_DIR})
file(COPY ./external/Catch2/cmake/Catch2/catch_include.cmake
DESTINATION ${CATCH_SCRIPT_BINARY_DIR})
set(ADD_SCRIPT_PATH ${CATCH_SCRIPT_BINARY_DIR}/CatchAddTests.cmake)
set(CATCH_INCLUDE_PATH ${CATCH_SCRIPT_BINARY_DIR}/catch_include.cmake)
if (WIN32)
configure_file(catchProp_in_rc.in ${CMAKE_CURRENT_BINARY_DIR}/catchProp.rc @ONLY)
cmake_path(SET LLVM_RC_PATH "${HIP_PATH}/../lc/bin/llvm-rc.exe")
cmake_path(SET LLVM_RC_PATH NORMALIZE "${LLVM_RC_PATH}")
# generates the .res files to be used by executables to populate the properties
# expects LC folder with clang, llvm-rc to be present one level up of HIP
execute_process(COMMAND ${LLVM_RC_PATH} ${CMAKE_CURRENT_BINARY_DIR}/catchProp.rc
OUTPUT_VARIABLE RC_OUTPUT)
set(PROP_RC ${CMAKE_CURRENT_BINARY_DIR})
# When args to linker exceeds max chars.
# msbuild writes args to a rsp file.
# This is used to reference the obj file correctly
SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "")
SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "")
endif()
if(HIP_PLATFORM STREQUAL "amd")
add_compile_options(-Wall -Wextra -Wvla -Werror -Wno-deprecated -Wno-option-ignored)
endif()
cmake_policy(PUSH)
if(POLICY CMP0037)
cmake_policy(SET CMP0037 OLD)
endif()
# Turn off CMAKE_HIP_ARCHITECTURES Feature if cmake version is 3.21+
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.21.0)
set(CMAKE_HIP_ARCHITECTURES OFF)
endif()
message(STATUS "CMAKE HIP ARCHITECTURES: ${CMAKE_HIP_ARCHITECTURES}")
# Note to pass arch use format like -DOFFLOAD_ARCH_STR="--offload-arch=gfx900 --offload-arch=gfx906"
# having space at the start/end of OFFLOAD_ARCH_STR can cause build failures
# Identify the GPU Targets.
# This is done due to limitation of rocm_agent_enumerator
# While building test parallelly, rocm_agent_enumerator can fail and give out an empty target
# That results in hipcc building the test for gfx803 (the default target)
# preference to pass arch -
# OFFLOAD_ARCH_STR
# rocm_agent_enumerator
if(NOT DEFINED OFFLOAD_ARCH_STR
AND EXISTS "${ROCM_PATH}/bin/rocm_agent_enumerator"
AND HIP_PLATFORM STREQUAL "amd" AND UNIX)
execute_process(COMMAND "${ROCM_PATH}/bin/rocm_agent_enumerator"
OUTPUT_VARIABLE HIP_GPU_ARCH
RESULT_VARIABLE ROCM_AGENT_ENUM_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE)
# Trim out gfx000
string(REPLACE "gfx000\n" "" HIP_GPU_ARCH ${HIP_GPU_ARCH})
if (NOT HIP_GPU_ARCH STREQUAL "")
string(REGEX REPLACE "\n" ";" HIP_GPU_ARCH_LIST "${HIP_GPU_ARCH}")
list(REMOVE_DUPLICATES HIP_GPU_ARCH_LIST)
list(LENGTH HIP_GPU_ARCH_LIST HIP_GPU_ARCH_LEN)
set(OFFLOAD_ARCH_STR "")
foreach(_hip_gpu_arch ${HIP_GPU_ARCH_LIST})
set(OFFLOAD_ARCH_STR "--offload-arch=${_hip_gpu_arch} ${OFFLOAD_ARCH_STR}")
endforeach()
else()
message(STATUS "ROCm Agent Enumerator found no valid architectures")
endif()
elseif(DEFINED OFFLOAD_ARCH_STR)
string(REPLACE "--offload-arch=" "" HIP_GPU_ARCH_LIST ${OFFLOAD_ARCH_STR})
endif()
if(DEFINED OFFLOAD_ARCH_STR)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OFFLOAD_ARCH_STR} ")
endif()
message(STATUS "Using offload arch string: ${OFFLOAD_ARCH_STR}")
find_package(Git)
# get hip-tests commit short hash
execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE git_result
OUTPUT_VARIABLE git_output
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(git_result EQUAL 0)
set(HIP_TESTS_GITHASH ${git_output})
endif()
set(OFFLOAD_ARCH_LIST ${OFFLOAD_ARCH_STR})
separate_arguments(OFFLOAD_ARCH_LIST)
list(REMOVE_DUPLICATES OFFLOAD_ARCH_LIST)
set(hip_gpu_arch_list "")
foreach(__offload_arch ${OFFLOAD_ARCH_LIST})
string(REPLACE "--offload-arch=" "" targetid ${__offload_arch})
string(REPLACE ":" ";" target_id_list ${targetid})
list(GET target_id_list 0 arch)
string(APPEND hip_gpu_arch_list "${arch};")
list(REMOVE_DUPLICATES hip_gpu_arch_list)
endforeach()
if(WIN32)
set(configToUse "config_amd_windows")
set(config_file ${CMAKE_SOURCE_DIR}/hipTestMain/config/${configToUse})
set(json_file ${HIP_TEST_CONFIG_BINARY_DIR}/${configToUse}.json)
set(CLANG_CPP "${HIP_PATH}/../lc/bin/clang-cpp.exe")
set(cmd "${CLANG_CPP} -P -DGITHASH=\"${HIP_VERSION_GITHASH}\" ${config_file}>${json_file}")
message(${cmd})
execute_process(COMMAND cmd.exe /C ${cmd}
RESULT_VARIABLE json_result)
message(STATUS "${configToUse}.json file generation result: ${json_result}")
else()
set(configToUse "config_amd_linux")
foreach(arch ${hip_gpu_arch_list})
set(config_file ${CMAKE_SOURCE_DIR}/hipTestMain/config/${configToUse})
set(json_file ${HIP_TEST_CONFIG_BINARY_DIR}/${configToUse}_${arch}.json)
set(cmd "${ROCM_PATH}/llvm/bin/clang-cpp -P -D${arch} -DGITHASH=\\\"${HIP_VERSION_GITHASH}\\\" ${config_file}>${json_file}")
message(${cmd})
execute_process(COMMAND bash -c ${cmd}
RESULT_VARIABLE json_result)
message(STATUS "${configToUse}_${arch}.json file generation result: ${json_result}")
endforeach()
endif()
# prints the catch info to a file
string(TIMESTAMP _timestamp UTC)
set(_autogen "# Auto-generated by cmake on ${_timestamp} UTC\n")
set(_catchInfo ${_autogen} "HIP_VERSION=${HIP_VERSION}\n")
set(_catchInfo ${_catchInfo} "HIP_PLATFORM=${HIP_PLATFORM}\n")
set(_catchInfo ${_catchInfo} "HIP_TESTS_GITHASH=${HIP_TESTS_GITHASH}\n")
set(_catchInfo ${_catchInfo} "ARCHS=${HIP_GPU_ARCH_LIST}\n")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/catchInfo.txt ${_catchInfo})
# allows user to run ctest from catch_tests level
set(_subdirs ${_autogen} "subdirs(..)\n")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/CTestTestfile.cmake ${_subdirs})
find_package(Python3 COMPONENTS Interpreter REQUIRED)
# copy python script and headers to catch test package
set(CATCH_INCLUDE_DIR include)
execute_process(COMMAND ${CMAKE_COMMAND} -E
make_directory ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/${CATCH_INCLUDE_DIR})
file(COPY ./unit/compileAndCaptureOutput.py
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/unit )
file(COPY ./include/hip_test_common.hh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/${CATCH_INCLUDE_DIR})
file(COPY ./include/hip_test_context.hh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/${CATCH_INCLUDE_DIR})
file(COPY ./external/Catch2/catch.hpp DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/${CATCH_INCLUDE_DIR})
# Enable device lambda on nvidia platforms
if(HIP_PLATFORM STREQUAL "nvidia")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --extended-lambda")
endif()
# Disable CXX extensions (gnu++11 etc)
set(CMAKE_CXX_EXTENSIONS OFF)
add_custom_target(build_tests)
# Tests folder
add_subdirectory(unit ${CATCH_BUILD_DIR}/unit)
add_subdirectory(ABM ${CATCH_BUILD_DIR}/ABM)
add_subdirectory(kernels ${CATCH_BUILD_DIR}/kernels)
add_subdirectory(hipTestMain ${CATCH_BUILD_DIR}/hipTestMain)
add_subdirectory(stress ${CATCH_BUILD_DIR}/stress)
add_subdirectory(TypeQualifiers ${CATCH_BUILD_DIR}/TypeQualifiers)
add_subdirectory(perftests ${CATCH_BUILD_DIR}/perftests)
add_subdirectory(multiproc ${CATCH_BUILD_DIR}/multiproc)
add_subdirectory(performance ${CATCH_BUILD_DIR}/performance)
add_custom_target(gen_coverage
COMMAND ${CMAKE_COMMAND} -B build/
COMMAND ${CMAKE_COMMAND} --build build/
COMMAND ./build/generateHipAPICoverage ${HIP_PATH}/include
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../utils/coverage
COMMENT "Generating Test Coverage Report")
cmake_policy(POP)
# packaging the tests
# make package_test to generate packages for test
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/)
add_subdirectory(packaging)
if(UNIX)
add_custom_target(package_test COMMAND ${CMAKE_COMMAND} .
COMMAND rm -rf *.deb *.rpm *.tar.gz
COMMAND make package
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
else()
file(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} CATCH_BINARY_DIR)
add_custom_target(package_test COMMAND ${CMAKE_COMMAND} .
COMMAND cpack
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
# Doxygen documentation
# check if doxygen is installed
find_package(Doxygen)
if(DOXYGEN_FOUND)
# Configure Doxyfile with proper paths
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/DoxyfileTests)
set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/DoxyfileTests.out)
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
add_custom_target(build_docs
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT}
COMMENT "Generating test plan documentation with Doxygen")
else()
message(STATUS "Doxygen was not found. Building test plan documentation will not be available")
endif()
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,204 @@
# HIP Tests - with Catch2
## Intro and Motivation
HIP Tests were using HIT framework (a custom framework tailored for HIP) to add, build and run tests. As time progressed the frame got big and took substantial amount of effort to maintain and extend. It also took substantial amount of time to configure. We took this oppurtunity to rewrite the HIP's testing framework and porting the test infra to Catch2 format.
## How to write tests
Tests in Catch2 are declared via ```TEST_CASE```.
[Please read the Catch2 documentation on how to write test cases](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/tutorial.md#top)
[Catch2 Detailed Reference](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/Readme.md#top)
## Taking care of existing features
- Dont build on platform: EXCLUDE_HIP_PLATFORM, can be done via CMAKE. Adding source in if(HIP_PLATFORM == amd/nvidia).
- HIPCC_OPTIONS/CLANG Options: Can be done via: set_source_files_properties(src.cc PROPERTIES COMPILE_FLAGS “…”).
- Additional libraries: Can be done via target_link_libraries()
- Multiple runs with different args: This can be done by Catchs Feature: GENERATE(…)
Running Subtest: ctest R “...” (Regex to match the subtest name)
## New Features
- Skip test without recompiling tests, by addition of a json file. Default name is ```config.json``` , this can be overridden by using the variable ```HIP_CATCH_EXCLUDE_FILE=some_config.json```.
- Json file supports regex. Ex: All tests which has the word Memset can be skipped using *Memset*
- Support multiple skip test list which can be set via environment variable, so you can have multiple files containing different skip test lists and can pick and choose among them depending on your platform and os.
- Better CI integration via xunit compatible output
## Testing Context
HIP testing framework gives you a context for each test. This context will have useful information about the environment your test is running.
Some useful functions are:
- `bool isWindows()` : true if os is windows
- `bool isLinux()` : true if os is linux
- `bool isAmd()` : true if platform is AMD
- `bool isNvidia()` : true if platform is NVIDIA
This information can be accessed in any test via using: `TestContext::get().isAmd()`.
## Adding test for a specific platform
There might be some functionality which is not present on some platforms. Those tests can be hidden inside following macros.
- ```HT_AMD``` is 1 when tests are running on AMD platform and 0 on NVIDIA.
- ```HT_NVIDIA``` is 1 when tests are running on NVIDIA platform and 0 on AMD
Usage:
```cpp
#if HT_AMD
TEST_CASE("hipExtAPIs") {
// ...
}
#endif
```
## Config file schema
Some tests can be skipped using a config file placed in hipTestMain/config folder. Multiple config files can be defined for different configurations.
The naming convention for the file needs to be "config_platform_os_archname.json"
Platform and os are mandatory.
Arch name is optional and takes precedence while loading the json file.
Currently the json files need to be manually chosen by the executor for the architecture of choice.
example:
config_amd_windows.json
config_nvidia_windows.json
The schema of the json file is as follows:
```json
{
"DisabledTests":
[
"TestName1",
"TestName2",
...
]
}
```
## Environment Variables
- `HIP_CATCH_EXCLUDE_FILE` : This variable can be set to the config file name or full path. Disabled tests will be read from this.
- `HT_LOG_ENABLE` : This is for debugging the HIP Test Framework itself. Setting it to 1, all `LogPrintf` will be printed on screen
## Test Macros
### Single Thread Macros
These macros are to be used when your test is calling HIP APIs via the main thread.
- `HIP_CHECK` : This macro takes in a HIP API and tests for its result to be either ```hipSuccess``` or ```hipErrorPeerAccessAlreadyEnabled```.
- Usage: ```HIP_CHECK(hipMalloc(&dPtr, 10));```
- ```HIP_CHECK_ERROR``` : This macro takes in a HIP API and tests its result against a provided result. This can be used when the API is expected to fail with a particular result.
- Usage: ```HIP_CHECK_ERROR(hipMalloc(&dPtr, 0), hipErrorInvalidValue);```
- ```HIPRTC_CHECK``` : This macro takes in a HIPRTC API and tests its result against HIPRTC_SUCCESS.
- Usage: ```HIPRTC_CHECK(hiprtcCompileProgram(prog, count, options));```
- ```HIP_ASSERT``` : This macro takes in a bool condition as input and does a ```REQUIRE``` on the condition.
- Usage: ```HIP_ASSERT(result == 10);```
### Multi Thread Macros
These macros are to be used when you call HIP APIs in a multi threaded way. They exist because Catch2 ```REQUIRE``` and ```CHECK``` macros can not handle multi threaded calls. To solve this problem, two macros are added```HIP_CHECK_THREAD``` and ```REQUIRE_THREAD``` which can be used to check result of HIP APIs and test assertions respectively. The results can be validate after the threads join via ```HIP_CHECK_THREAD_FINALIZE```.
Note: These should used in ```std::thread``` only. For multi proc guidelines look at [MultiProc Macros](#multi-process-macros) and [SpawnProc Class](#multiproc-management-class)
- ```HIP_CHECK_THREAD``` : This macro takes in a HIP API and tests for its result to be either ```hipSuccess``` or ```hipErrorPeerAccessAlreadyEnabled```. It can also tell other threads if an error has occured in one of the HIP API and can prematurely stop the threads.
- ```REQUIRE_THREAD``` : This macro takes in a bool condition and tests for its result to be true. If this check fails, it can signal other threads to terminate early.
- ```HIP_CHECK_THREAD_FINALIZE``` : This macro checks for the results logged by ```HIP_CHECK_THREAD```. This needs to be called after the threads have joined.
Please also note that you can not return values in functions calling ```HIP_CHECK_THREAD``` or ```REQUIRE_THREAD``` macro.
Usage:
```cpp
auto threadFunc = []() {
int *dPtr{nullptr};
HIP_CHECK_THREAD(hipMalloc(&dPtr, 10));
REQUIRE_THREAD(dPtr != nullptr);
// Some other work
};
// Launch threads
std::vector<std::thread> threadPool;
for(...) {
threadPool.emplace_back(std::thread(threadFunc));
}
// Join threads
for(auto &i : threadPool) {
i.join();
}
// Validate all results
HIP_CHECK_THREAD_FINALIZE();
```
### Skipping Tests if certain criteria is not met
If there arises a condition where certain flag is disabled and due to which a test can not run at that time, the following macro can be of use. It will highlight the test in ctest report as well.
- ```HIP_SKIP_TEST``` : The api takes in an input of the reason as well and prints out the line HIP_SKIP_THIS_TEST. This causes ctest to mark the test as skipped and the test shows up in the report as skipped prompting proper response from the team.
Usage:
```cpp
TEST_CASE("TestOnlyOnXnack") {
if(!XNACKEnabled) {
HipTest::HIP_SKIP_TEST("Test only runs on system with XNACK enabled");
return;
}
// Rest of test functionality
}
```
### Multi Process Macros
These macros are to be called in multi process tests, inside a process which gets spawned. The reasoning is the same, Catch2 does not support multi process checks.
- ```HIPCHECK``` : Same as ```HIP_CHECK``` but will not call Catch2's ```REQUIRE``` on the HIP API. It will print if there is a mismatch and exit the process.
- ```HIPASSERT``` : Same as ```HIP_ASSERT``` but will not call Catch2's ```REQUIRE``` on the HIP API. It will print if there is a mismatch and exit the process.
## MultiProc Management Class
There is a special interface available for process isolation. ```hip::SpawnProc``` in ```hip_test_process.hh```. Using this interface test can spawn a process and place passing conditions on its return value or its output to stdout. This can be useful for testing printf output.
Sample Usage:
```cpp
hip::SpawnProc proc(<name of exe>, <optional bool value, if output is to be recorded>);
REQUIRE(0 == proc.run()); // Test of return value of the proc
REQUIRE(exepctedOutput == proc.getOutput()); // Test on expected output of the process
```
The process must be a standalone exe inside the same folder as other tests.
## Enabling New Tests
Initially, the new tests can be enabled via using ```-DHIP_CATCH_TEST=1```. After porting existing tests, this will be turned on by default.
## Building a single test
```bash
hipcc <path_to_test.cpp> -I<HIP_SRC_DIR>/tests/catch/include <HIP_SRC_DIR>/tests/catch/hipTestMain/standalone_main.cc -I<HIP_SRC_DIR>/tests/catch/external/Catch2 -g -o <out_file_name>
```
## Debugging support
Catch2 allows multiple ways in which you can debug the test case.
- `-b` options breaks into a debugger as soon as there is a failure encountered [Catch2 Options Reference](https://github.com/catchorg/Catch2/blob/devel/docs/command-line.md#breaking-into-the-debugger)
- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure
- User can also call [CATCH_BREAK_INTO_DEBUGGER](https://github.com/catchorg/Catch2/blob/devel/docs/configuration.md#overriding-catchs-debug-break--b) macro to break at a certain point in a test case.
- User can also mention filename.cc:__LineNumber__ to break into a test case via gdb.
## External Libs being used
- [Catch2](https://github.com/catchorg/Catch2) - Testing framework
- [picojson](https://github.com/kazuho/picojson) - For config file parsing
# Testing Guidelines
Tests fall in 5 categories and its file name prefix are as follows:
- Unit tests (Prefix: Unit_\*API\*_\*Optional Scenario\*, example : Unit_hipMalloc_Negative or Unit_hipMalloc): Unit Tests are simplest test for an API, the target here is to test the API with different types of input and different ways of calling.
- Application Behavior Modelling tests (Prefix: ABM_\*Intent\*_\*Optional Scenario\*, example: ABM_ModuleLoadAndRun): ABM tests are used to model a specific use case of HIP APIs, either seen in a customer app or a general purpose app. It mimics the calling behavior seen in aforementioned app.
- Stress/Scale tests (Prefix: Stress_\*API\*_\*Intent\*_\*Optional Scenario\*, example: Stress_hipMemset_ExhaustVRAM): These tests are used to see the behavior of HIP APIs in edge scenarios, for example what happens when we have exhausted vram and do a hipMalloc or run many instances of same API in parallel.
- Multi Process tests (Prefix: MultiProc_\*API\*_\*Optional Scenario\*, example: MultiProc_hipIPCMemHandle_GetDataFromProc): These tests are multi process tests and will only run on linux. They are used to test HIP APIs in multi process environment
- Performance tests(Prefix: Perf_\*Intent\*_\*Optional Scenario\*, example: Perf_DispatchLatenc y): Performance tests are used to get results of HIP APIs.
# General Guidelines:
- Do not use the catch2 tags. Tags wont be used for filtering
- Add as many INFO() as you can in tests which prints state of the t est, this will help the debugger when the test fails (INFO macro only prints when the test fails)
- Check return of each HIP API and fail whenever there is a misma tch with hipSuccess or hiprtcSuccess.
- Each Category of test will hav e its own exe and catch_discover_test macro will be called on it to discover its tests
- Optional Scenario in test names are optional. For example you can test all Scenarios of hipMalloc API in one file, you can name the file Unit_hipMalloc, if you are having a file just for negative scenarios you can name it as Unit_hipMalloc_Negative.
@@ -0,0 +1,8 @@
# Common Tests
set(TEST_SRC
hipManagedKeyword.cc
)
hip_add_exe_to_target(NAME TypeQualifiers
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests)
@@ -0,0 +1,87 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the hipManagedKeyword basic scenario
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#define N 1048576
__managed__ float A[N]; // Accessible by ALL CPU and GPU functions !!!
__managed__ float B[N];
__managed__ int x = 0;
__global__ void add(const float *A, float *B) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i = index; i < N; i += stride)
B[i] = A[i] + B[i];
}
__global__ void GPU_func() {
x++;
}
TEST_CASE("Unit_hipManagedKeyword_SingleGpu") {
for (int i = 0; i < N; i++) {
A[i] = 1.0f;
B[i] = 2.0f;
}
int blockSize = 256;
int numBlocks = (N + blockSize - 1) / blockSize;
dim3 dimGrid(numBlocks, 1, 1);
dim3 dimBlock(blockSize, 1, 1);
hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0, static_cast<const float*>(A),
static_cast<float*>(B));
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
float maxError = 0.0f;
for (int i = 0; i < N; i++)
maxError = fmax(maxError, fabs(B[i]-3.0f));
REQUIRE(maxError == 0.0f);
}
TEST_CASE("Unit_hipManagedKeyword_MultiGpu") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int i = 0; i < numDevices; i++){
int managed_memory = 0;
HIPCHECK(hipDeviceGetAttribute(&managed_memory,
hipDeviceAttributeManagedMemory,
i));
if (!managed_memory) {
HipTest::HIP_SKIP_TEST("managed memory access not supported on device");
return;
}
}
for (int i = 0; i < numDevices; i++) {
HIP_CHECK(hipSetDevice(i));
GPU_func<<< 1, 1 >>>();
HIP_CHECK(hipDeviceSynchronize());
}
REQUIRE(x == numDevices);
}
@@ -0,0 +1,40 @@
#include <windows.h>
#define HIP_VERSION "@HIP_VERSION@"
#define HIP_VERSION_MAJOR @HIP_VERSION_MAJOR@
#define HIP_VERSION_MINOR @HIP_VERSION_MINOR@
#define HIP_VERSION_PATCH @HIP_VERSION_PATCH@
VS_VERSION_INFO VERSIONINFO
FILEVERSION HIP_VERSION_MAJOR, HIP_VERSION_MINOR , HIP_VERSION_PATCH
PRODUCTVERSION 10,1
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS VS_FF_DEBUG
#else
FILEFLAGS 0x0L
#endif
FILEOS VOS_NT_WINDOWS32
FILETYPE VFT_APP
FILESUBTYPE VFT2_UNKNOWN
BEGIN
BLOCK "StringFileInfo"
BEGIN
BLOCK "040904b0"
BEGIN
VALUE "CompanyName", "Advanced Micro Devices Inc.\0"
VALUE "FileDescription", "HIP unit tests"
VALUE "FileVersion", "amdhip64.dll" HIP_VERSION
VALUE "LegalCopyright", "Copyright (C) 2022 Advanced Micro Devices Inc.\0"
VALUE "ProductName", "HIP unit tests"
VALUE "ProductVersion", HIP_VERSION
VALUE "Comments", "\0"
VALUE "InternalName", "HIP unit tests"
END
END
BLOCK "VarFileInfo"
BEGIN
VALUE "Translation", 0x0409, 1200
END
END
/* End of Version info */
+23
مشاهده پرونده
@@ -0,0 +1,23 @@
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,438 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#[=======================================================================[.rst:
Catch
-----
This module defines a function to help use the Catch test framework.
The :command:`catch_discover_tests` discovers tests by asking the compiled test
executable to enumerate its tests. This does not require CMake to be re-run
when tests change. However, it may not work in a cross-compiling environment,
and setting test properties is less convenient.
This command is intended to replace use of :command:`add_test` to register
tests, and will create a separate CTest test for each Catch test case. Note
that this is in some cases less efficient, as common set-up and tear-down logic
cannot be shared by multiple test cases executing in the same instance.
However, it provides more fine-grained pass/fail information to CTest, which is
usually considered as more beneficial. By default, the CTest test name is the
same as the Catch name; see also ``TEST_PREFIX`` and ``TEST_SUFFIX``.
.. command:: catch_discover_tests
Automatically add tests with CTest by querying the compiled test executable
for available tests::
catch_discover_tests(target
[TEST_SPEC arg1...]
[EXTRA_ARGS arg1...]
[WORKING_DIRECTORY dir]
[TEST_PREFIX prefix]
[TEST_SUFFIX suffix]
[PROPERTIES name1 value1...]
[TEST_LIST var]
[REPORTER reporter]
[OUTPUT_DIR dir]
[OUTPUT_PREFIX prefix}
[OUTPUT_SUFFIX suffix]
)
``catch_discover_tests`` sets up a post-build command on the test executable
that generates the list of tests by parsing the output from running the test
with the ``--list-test-names-only`` argument. This ensures that the full
list of tests is obtained. Since test discovery occurs at build time, it is
not necessary to re-run CMake when the list of tests changes.
However, it requires that :prop_tgt:`CROSSCOMPILING_EMULATOR` is properly set
in order to function in a cross-compiling environment.
Additionally, setting properties on tests is somewhat less convenient, since
the tests are not available at CMake time. Additional test properties may be
assigned to the set of tests as a whole using the ``PROPERTIES`` option. If
more fine-grained test control is needed, custom content may be provided
through an external CTest script using the :prop_dir:`TEST_INCLUDE_FILES`
directory property. The set of discovered tests is made accessible to such a
script via the ``<target>_TESTS`` variable.
The options are:
``target``
Specifies the Catch executable, which must be a known CMake executable
target. CMake will substitute the location of the built executable when
running the test.
``TEST_SPEC arg1...``
Specifies test cases, wildcarded test cases, tags and tag expressions to
pass to the Catch executable with the ``--list-test-names-only`` argument.
``EXTRA_ARGS arg1...``
Any extra arguments to pass on the command line to each test case.
``WORKING_DIRECTORY dir``
Specifies the directory in which to run the discovered test cases. If this
option is not provided, the current binary directory is used.
``TEST_PREFIX prefix``
Specifies a ``prefix`` to be prepended to the name of each discovered test
case. This can be useful when the same test executable is being used in
multiple calls to ``catch_discover_tests()`` but with different
``TEST_SPEC`` or ``EXTRA_ARGS``.
``TEST_SUFFIX suffix``
Similar to ``TEST_PREFIX`` except the ``suffix`` is appended to the name of
every discovered test case. Both ``TEST_PREFIX`` and ``TEST_SUFFIX`` may
be specified.
``PROPERTIES name1 value1...``
Specifies additional properties to be set on all tests discovered by this
invocation of ``catch_discover_tests``.
``TEST_LIST var``
Make the list of tests available in the variable ``var``, rather than the
default ``<target>_TESTS``. This can be useful when the same test
executable is being used in multiple calls to ``catch_discover_tests()``.
Note that this variable is only available in CTest.
``REPORTER reporter``
Use the specified reporter when running the test case. The reporter will
be passed to the Catch executable as ``--reporter reporter``.
``OUTPUT_DIR dir``
If specified, the parameter is passed along as
``--out dir/<test_name>`` to Catch executable. The actual file name is the
same as the test name. This should be used instead of
``EXTRA_ARGS --out foo`` to avoid race conditions writing the result output
when using parallel test execution.
``OUTPUT_PREFIX prefix``
May be used in conjunction with ``OUTPUT_DIR``.
If specified, ``prefix`` is added to each output file name, like so
``--out dir/prefix<test_name>``.
``OUTPUT_SUFFIX suffix``
May be used in conjunction with ``OUTPUT_DIR``.
If specified, ``suffix`` is added to each output file name, like so
``--out dir/<test_name>suffix``. This can be used to add a file extension to
the output e.g. ".xml".
#]=======================================================================]
#------------------------------------------------------------------------------
# TARGET_LIST TEST_SET
function(catch_discover_tests_compile_time_detection TARGET TEST_SET)
cmake_parse_arguments(
""
""
"TEST_PREFIX;TEST_SUFFIX;WORKING_DIRECTORY;TEST_LIST;REPORTER;OUTPUT_DIR;OUTPUT_PREFIX;OUTPUT_SUFFIX"
"TEST_SPEC;EXTRA_ARGS;PROPERTIES"
${ARGN}
)
if(NOT _WORKING_DIRECTORY)
set(_WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
endif()
if(NOT _TEST_LIST)
set(_TEST_LIST ${TARGET}_TESTS)
endif()
## Generate a unique name based on the extra arguments
string(SHA1 args_hash "${_TEST_SPEC} ${_EXTRA_ARGS} ${_REPORTER} ${_OUTPUT_DIR} ${_OUTPUT_PREFIX} ${_OUTPUT_SUFFIX}")
string(SUBSTRING ${args_hash} 0 7 args_hash)
# Define rule to generate test list for aforementioned test executable
set(ctest_include_file "${CMAKE_CURRENT_BINARY_DIR}/${TEST_SET}_include-${args_hash}.cmake")
set(ctest_tests_file "${CMAKE_CURRENT_BINARY_DIR}/${TEST_SET}_tests-${args_hash}.cmake")
foreach(EXE_NAME ${TARGET})
add_custom_command(
TARGET ${EXE_NAME} POST_BUILD
COMMAND "${CMAKE_COMMAND}"
-D "TEST_TARGET=${EXE_NAME}"
-D "TEST_EXECUTABLE=$<TARGET_FILE:${EXE_NAME}>"
-D "TEST_EXECUTOR=${crosscompiling_emulator}"
-D "TEST_WORKING_DIR=${_WORKING_DIRECTORY}"
-D "TEST_SPEC=${_TEST_SPEC}"
-D "TEST_EXTRA_ARGS=${_EXTRA_ARGS}"
-D "TEST_PROPERTIES=${_PROPERTIES}"
-D "TEST_PREFIX=${_TEST_PREFIX}"
-D "TEST_SUFFIX=${_TEST_SUFFIX}"
-D "TEST_LIST=${_TEST_LIST}"
-D "TEST_REPORTER=${_REPORTER}"
-D "TEST_OUTPUT_DIR=${_OUTPUT_DIR}"
-D "TEST_OUTPUT_PREFIX=${_OUTPUT_PREFIX}"
-D "TEST_OUTPUT_SUFFIX=${_OUTPUT_SUFFIX}"
-D "CTEST_FILE=${ctest_tests_file}"
-P "${_CATCH_DISCOVER_TESTS_SCRIPT}"
VERBATIM
)
endforeach()
file(RELATIVE_PATH ctestincludepath ${CMAKE_CURRENT_BINARY_DIR} ${ctest_include_file})
file(RELATIVE_PATH ctestfilepath ${CMAKE_CURRENT_BINARY_DIR} ${ctest_tests_file})
file(WRITE "${ctest_include_file}"
"if(EXISTS \"${ctestfilepath}\")\n"
" include(\"${ctestfilepath}\")\n"
"else()\n"
" message(WARNING \"Test ${TARGET} not built yet.\")\n"
"endif()\n"
)
if(NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
# Add discovered tests to directory TEST_INCLUDE_FILES
set_property(DIRECTORY
APPEND PROPERTY TEST_INCLUDE_FILES "${ctestincludepath}"
)
else()
# Add discovered tests as directory TEST_INCLUDE_FILE if possible
get_property(test_include_file_set DIRECTORY PROPERTY TEST_INCLUDE_FILE SET)
if (NOT ${test_include_file_set})
set_property(DIRECTORY
PROPERTY TEST_INCLUDE_FILE "${ctestincludepath}"
)
else()
message(FATAL_ERROR
"Cannot set more than one TEST_INCLUDE_FILE"
)
endif()
endif()
endfunction()
###############################################################################
#------------------------------------------------------------------------------
# current staging
function(catch_discover_tests TARGET)
cmake_parse_arguments(
""
""
"TEST_PREFIX;TEST_SUFFIX;WORKING_DIRECTORY;TEST_LIST;REPORTER;OUTPUT_DIR;OUTPUT_PREFIX;OUTPUT_SUFFIX"
"TEST_SPEC;EXTRA_ARGS;PROPERTIES"
${ARGN}
)
if(NOT _WORKING_DIRECTORY)
set(_WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
endif()
get_property(crosscompiling_emulator
TARGET ${TARGET}
PROPERTY CROSSCOMPILING_EMULATOR
)
## Generate a unique name based on the extra arguments
string(SHA1 args_hash "${_TEST_SPEC} ${_EXTRA_ARGS} ${_REPORTER} ${_OUTPUT_DIR} ${_OUTPUT_PREFIX} ${_OUTPUT_SUFFIX}")
string(SUBSTRING ${args_hash} 0 7 args_hash)
# Define rule to generate test list for aforementioned test executable
set(ctest_include_file_build "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_include_build-${args_hash}.cmake")
set(ctest_include_file_install "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_include_install-${args_hash}.cmake")
set(ctest_tests_file_name "${TARGET}_tests-${args_hash}.cmake")
set(ctest_tests_file "${CMAKE_CURRENT_BINARY_DIR}/${ctest_tests_file_name}")
file(RELATIVE_PATH ctest_include_rel_path ${CMAKE_CURRENT_BINARY_DIR} ${ctest_include_file_build})
file(RELATIVE_PATH ctest_file_rel_path ${CMAKE_CURRENT_BINARY_DIR} ${ctest_tests_file})
file(RELATIVE_PATH _CATCH_ADD_TEST_SCRIPT ${CMAKE_CURRENT_BINARY_DIR} ${ADD_SCRIPT_PATH})
file(RELATIVE_PATH CATCH_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR} ${CATCH_INCLUDE_PATH})
if(NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
# write build time include file
file(WRITE ${ctest_include_file_build} "set(_TARGET_EXECUTABLE ${TARGET})\n")
file(APPEND ${ctest_include_file_build} "set(TARGET ${TARGET})\n")
file(APPEND ${ctest_include_file_build} "set(_TEST_LIST ${TARGET}_TESTS)\n")
file(APPEND ${ctest_include_file_build} "set(ctestfilepath ${ctest_file_rel_path})\n")
file(APPEND ${ctest_include_file_build} "set(_CATCH_ADD_TEST_SCRIPT ${_CATCH_ADD_TEST_SCRIPT})\n")
file(APPEND ${ctest_include_file_build} "set(crosscompiling_emulator ${crosscompiling_emulator})\n")
file(APPEND ${ctest_include_file_build} "set(_PROPERTIES ${_PROPERTIES})\n")
file(APPEND ${ctest_include_file_build} "include(${CATCH_INCLUDE_PATH})\n")
# Add discovered tests to directory TEST_INCLUDE_FILES
set_property(DIRECTORY
APPEND PROPERTY TEST_INCLUDE_FILES "${ctest_include_rel_path}"
)
# write install time include file
file(WRITE ${ctest_include_file_install} "set(_TARGET_EXECUTABLE ${TARGET})\n")
file(APPEND ${ctest_include_file_install} "set(TARGET ${TARGET})\n")
file(APPEND ${ctest_include_file_install} "set(_TEST_LIST ${TARGET}_TESTS)\n")
file(APPEND ${ctest_include_file_install} "set(ctestfilepath script/${ctest_tests_file_name})\n")
file(APPEND ${ctest_include_file_install} "set(_CATCH_ADD_TEST_SCRIPT script/CatchAddTests.cmake)\n")
file(APPEND ${ctest_include_file_install} "set(crosscompiling_emulator ${crosscompiling_emulator})\n")
file(APPEND ${ctest_include_file_install} "set(_PROPERTIES ${_PROPERTIES})\n")
file(APPEND ${ctest_include_file_install} "include(script/catch_include.cmake)\n")
set_property(GLOBAL
APPEND PROPERTY G_INSTALL_CTEST_INCLUDE_FILES "${ctest_include_file_install}"
)
endif()
endfunction()
###############################################################################
set(_CATCH_DISCOVER_TESTS_SCRIPT
${CMAKE_CURRENT_LIST_DIR}/CatchAddTests.cmake
CACHE INTERNAL "Catch2 full path to CatchAddTests.cmake helper file"
)
###############################################################################
# function to be called by all tests
function(hip_add_exe_to_target_compile_time_detection)
set(options)
# NAME EventTest, TEST_SRC src, TEST_TARGET_NAME build_tests
set(args NAME TEST_TARGET_NAME PLATFORM COMPILE_OPTIONS)
set(list_args TEST_SRC LINKER_LIBS COMMON_SHARED_SRC PROPERTY)
cmake_parse_arguments(
PARSE_ARGV 0
"" # variable prefix
"${options}"
"${args}"
"${list_args}"
)
foreach(SRC_NAME ${TEST_SRC})
if(NOT STANDALONE_TESTS EQUAL "1")
set(_EXE_NAME ${_NAME})
# take the entire source set for building the executable
set(SRC_NAME ${TEST_SRC})
else()
# strip extension of src and use exe name as src name
get_filename_component(_EXE_NAME ${SRC_NAME} NAME_WLE)
endif()
if(NOT RTC_TESTING)
add_executable(${_EXE_NAME} EXCLUDE_FROM_ALL ${SRC_NAME} ${COMMON_SHARED_SRC} $<TARGET_OBJECTS:Main_Object> $<TARGET_OBJECTS:KERNELS>)
else ()
add_executable(${_EXE_NAME} EXCLUDE_FROM_ALL ${SRC_NAME} ${COMMON_SHARED_SRC} $<TARGET_OBJECTS:Main_Object>)
if(HIP_PLATFORM STREQUAL "amd")
target_link_libraries(${_EXE_NAME} hiprtc)
else()
target_link_libraries(${_EXE_NAME} nvrtc)
endif()
endif()
if(UNIX)
set(_LINKER_LIBS ${_LINKER_LIBS} stdc++fs)
set(_LINKER_LIBS ${_LINKER_LIBS} -ldl)
else()
# res files are built resource files using rc files.
# use llvm-rc exe to build the res files
# Thes are used to populate the properties of the built executables
if(EXISTS "${PROP_RC}/catchProp.res")
set(_LINKER_LIBS ${_LINKER_LIBS} "${PROP_RC}/catchProp.res")
endif()
#set(_LINKER_LIBS ${_LINKER_LIBS} -noAutoResponse)
endif()
if(DEFINED _LINKER_LIBS)
target_link_libraries(${_EXE_NAME} ${_LINKER_LIBS})
endif()
# Add dependency on build_tests to build it on this custom target
add_dependencies(${_TEST_TARGET_NAME} ${_EXE_NAME})
# add_dependencies(${_TEST_TARGET_NAME} ${_EXE_NAME})
if (DEFINED _PROPERTY)
set_property(TARGET ${_EXE_NAME} PROPERTY ${_PROPERTY})
endif()
if (DEFINED _COMPILE_OPTIONS)
target_compile_options(${_EXE_NAME} PUBLIC ${_COMPILE_OPTIONS})
endif()
foreach(arg IN LISTS _UNPARSED_ARGUMENTS)
message(WARNING "Unparsed arguments: ${arg}")
endforeach()
get_property(crosscompiling_emulator
TARGET ${_EXE_NAME}
PROPERTY CROSSCOMPILING_EMULATOR
)
set(_EXE_NAME_LIST ${_EXE_NAME_LIST} ${_EXE_NAME})
if(NOT STANDALONE_TESTS EQUAL "1")
break()
endif()
endforeach()
catch_discover_tests("${_EXE_NAME_LIST}" "${_NAME}" PROPERTIES SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST")
endfunction()
###############################################################################
# current staging
# function to be called by all tests
function(hip_add_exe_to_target)
set(options)
set(args NAME TEST_TARGET_NAME PLATFORM COMPILE_OPTIONS)
set(list_args TEST_SRC LINKER_LIBS COMMON_SHARED_SRC PROPERTY)
cmake_parse_arguments(
PARSE_ARGV 0
"" # variable prefix
"${options}"
"${args}"
"${list_args}"
)
foreach(SRC_NAME ${TEST_SRC})
if(NOT STANDALONE_TESTS EQUAL "1")
set(_EXE_NAME ${_NAME})
set(SRC_NAME ${TEST_SRC})
else()
# strip extension of src and use exe name as src name
get_filename_component(_EXE_NAME ${SRC_NAME} NAME_WLE)
endif()
# Create shared lib of all tests
if(NOT RTC_TESTING)
add_executable(${_EXE_NAME} EXCLUDE_FROM_ALL ${SRC_NAME} ${COMMON_SHARED_SRC} $<TARGET_OBJECTS:Main_Object> $<TARGET_OBJECTS:KERNELS>)
else ()
add_executable(${_EXE_NAME} EXCLUDE_FROM_ALL ${SRC_NAME} ${COMMON_SHARED_SRC} $<TARGET_OBJECTS:Main_Object>)
if(HIP_PLATFORM STREQUAL "amd")
target_link_libraries(${_EXE_NAME} hiprtc)
else()
target_link_libraries(${_EXE_NAME} nvrtc)
endif()
endif()
if (DEFINED _PROPERTY)
set_property(TARGET ${_EXE_NAME} PROPERTY ${_PROPERTY})
endif()
if(UNIX)
set(_LINKER_LIBS ${_LINKER_LIBS} stdc++fs)
set(_LINKER_LIBS ${_LINKER_LIBS} -ldl)
set(_LINKER_LIBS ${_LINKER_LIBS} pthread)
set(_LINKER_LIBS ${_LINKER_LIBS} rt)
else()
# res files are built resource files using rc files.
# use llvm-rc exe to build the res files
# Thes are used to populate the properties of the built executables
if(EXISTS "${PROP_RC}/catchProp.res")
set(_LINKER_LIBS ${_LINKER_LIBS} "${PROP_RC}/catchProp.res")
endif()
endif()
if(DEFINED _LINKER_LIBS)
target_link_libraries(${_EXE_NAME} ${_LINKER_LIBS})
endif()
# Add dependency on build_tests to build it on this custom target
add_dependencies(${_TEST_TARGET_NAME} ${_EXE_NAME})
if (DEFINED _COMPILE_OPTIONS)
target_compile_options(${_EXE_NAME} PUBLIC ${_COMPILE_OPTIONS})
endif()
foreach(arg IN LISTS _UNPARSED_ARGUMENTS)
message(WARNING "Unparsed arguments: ${arg}")
endforeach()
# add binary to global list of binaries to install
set_property(GLOBAL APPEND PROPERTY G_INSTALL_EXE_TARGETS ${_EXE_NAME})
catch_discover_tests("${_EXE_NAME}" PROPERTIES SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST")
if(NOT STANDALONE_TESTS EQUAL "1")
break()
endif()
endforeach()
endfunction()
@@ -0,0 +1,34 @@
####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() #######
####### Any changes to this file will be overwritten by the next CMake run ####
####### The input file was Catch2Config.cmake.in ########
get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
macro(set_and_check _var _file)
set(${_var} "${_file}")
if(NOT EXISTS "${_file}")
message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !")
endif()
endmacro()
macro(check_required_components _NAME)
foreach(comp ${${_NAME}_FIND_COMPONENTS})
if(NOT ${_NAME}_${comp}_FOUND)
if(${_NAME}_FIND_REQUIRED_${comp})
set(${_NAME}_FOUND FALSE)
endif()
endif()
endforeach()
endmacro()
####################################################################################
# Avoid repeatedly including the targets
if(NOT TARGET Catch2::Catch2)
# Provide path for scripts
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
include(${CMAKE_CURRENT_LIST_DIR}/Catch2Targets.cmake)
endif()
@@ -0,0 +1,51 @@
# This is a basic version file for the Config-mode of find_package().
# It is used by write_basic_package_version_file() as input file for configure_file()
# to create a version-file which can be installed along a config.cmake file.
#
# The created file sets PACKAGE_VERSION_EXACT if the current version string and
# the requested version string are exactly the same and it sets
# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version,
# but only if the requested major version is the same as the current one.
# The variable CVF_VERSION must be set before calling configure_file().
set(PACKAGE_VERSION "2.13.6")
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
else()
if("2.13.6" MATCHES "^([0-9]+)\\.")
set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}")
else()
set(CVF_VERSION_MAJOR "2.13.6")
endif()
if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR)
set(PACKAGE_VERSION_COMPATIBLE TRUE)
else()
set(PACKAGE_VERSION_COMPATIBLE FALSE)
endif()
if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)
set(PACKAGE_VERSION_EXACT TRUE)
endif()
endif()
# if the installed project requested no architecture check, don't perform the check
if("FALSE")
return()
endif()
# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it:
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "" STREQUAL "")
return()
endif()
# check that the installed version has the same 32/64bit-ness as the one which is currently searching:
if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "")
math(EXPR installedBits " * 8")
set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)")
set(PACKAGE_VERSION_UNSUITABLE TRUE)
endif()
@@ -0,0 +1,99 @@
# Generated by CMake
if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5)
message(FATAL_ERROR "CMake >= 2.6.0 required")
endif()
cmake_policy(PUSH)
cmake_policy(VERSION 2.6...3.17)
#----------------------------------------------------------------
# Generated CMake target import file.
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Protect against multiple inclusion, which would fail when already imported targets are added once more.
set(_targetsDefined)
set(_targetsNotDefined)
set(_expectedTargets)
foreach(_expectedTarget Catch2::Catch2)
list(APPEND _expectedTargets ${_expectedTarget})
if(NOT TARGET ${_expectedTarget})
list(APPEND _targetsNotDefined ${_expectedTarget})
endif()
if(TARGET ${_expectedTarget})
list(APPEND _targetsDefined ${_expectedTarget})
endif()
endforeach()
if("${_targetsDefined}" STREQUAL "${_expectedTargets}")
unset(_targetsDefined)
unset(_targetsNotDefined)
unset(_expectedTargets)
set(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)
return()
endif()
if(NOT "${_targetsDefined}" STREQUAL "")
message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n")
endif()
unset(_targetsDefined)
unset(_targetsNotDefined)
unset(_expectedTargets)
# Compute the installation prefix relative to this file.
get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
if(_IMPORT_PREFIX STREQUAL "/")
set(_IMPORT_PREFIX "")
endif()
# Create imported target Catch2::Catch2
add_library(Catch2::Catch2 INTERFACE IMPORTED)
set_target_properties(Catch2::Catch2 PROPERTIES
INTERFACE_COMPILE_FEATURES "cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_defaulted_functions;cxx_deleted_functions;cxx_final;cxx_lambdas;cxx_noexcept;cxx_override;cxx_range_for;cxx_rvalue_references;cxx_static_assert;cxx_strong_enums;cxx_trailing_return_types;cxx_unicode_literals;cxx_user_literals;cxx_variadic_macros"
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include"
)
if(CMAKE_VERSION VERSION_LESS 3.0.0)
message(FATAL_ERROR "This file relies on consumers using CMake 3.0.0 or greater.")
endif()
# Load information for each installed configuration.
get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
file(GLOB CONFIG_FILES "${_DIR}/Catch2Targets-*.cmake")
foreach(f ${CONFIG_FILES})
include(${f})
endforeach()
# Cleanup temporary variables.
set(_IMPORT_PREFIX)
# Loop over all imported files and verify that they actually exist
foreach(target ${_IMPORT_CHECK_TARGETS} )
foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} )
if(NOT EXISTS "${file}" )
message(FATAL_ERROR "The imported target \"${target}\" references the file
\"${file}\"
but this file does not exist. Possible reasons include:
* The file was deleted, renamed, or moved to another location.
* An install or uninstall procedure did not complete successfully.
* The installation package was faulty and contained
\"${CMAKE_CURRENT_LIST_FILE}\"
but not all the files it references.
")
endif()
endforeach()
unset(_IMPORT_CHECK_FILES_FOR_${target})
endforeach()
unset(_IMPORT_CHECK_TARGETS)
# This file does not depend on other imported targets which have
# been exported from the same project but in a separate export set.
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)
@@ -0,0 +1,134 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
set(prefix "${TEST_PREFIX}")
set(suffix "${TEST_SUFFIX}")
set(spec ${TEST_SPEC})
set(extra_args ${TEST_EXTRA_ARGS})
set(properties ${TEST_PROPERTIES})
set(reporter ${TEST_REPORTER})
set(output_dir ${TEST_OUTPUT_DIR})
set(output_prefix ${TEST_OUTPUT_PREFIX})
set(output_suffix ${TEST_OUTPUT_SUFFIX})
set(script)
set(suite)
set(tests)
function(add_command NAME)
set(_args "")
# use ARGV* instead of ARGN, because ARGN splits arrays into multiple arguments
math(EXPR _last_arg ${ARGC}-1)
foreach(_n RANGE 1 ${_last_arg})
set(_arg "${ARGV${_n}}")
if(_arg MATCHES "[^-./:a-zA-Z0-9_]")
set(_args "${_args} [==[${_arg}]==]") # form a bracket_argument
else()
set(_args "${_args} ${_arg}")
endif()
endforeach()
set(script "${script}${NAME}(${_args})\n" PARENT_SCOPE)
endfunction()
if(WIN32)
set(TEST_EXECUTABLE ${TEST_EXECUTABLE}.exe)
endif()
get_filename_component(TEST_EXECUTABLE ${TEST_EXECUTABLE} ABSOLUTE)
execute_process(
COMMAND ${TEST_EXECUTOR} "${TEST_EXECUTABLE}" ${spec} --list-test-names-only
OUTPUT_VARIABLE output
RESULT_VARIABLE result
WORKING_DIRECTORY "${TEST_WORKING_DIR}"
)
# Catch --list-test-names-only reports the number of tests, so 0 is... surprising
if(${result} EQUAL 0)
message(WARNING
"Test executable '${TEST_EXECUTABLE}' contains no tests!\n"
)
elseif(${result} LESS 0)
message(FATAL_ERROR
"Error running test executable '${TEST_EXECUTABLE}':\n"
" Result: ${result}\n"
" Output: ${output}\n"
)
endif()
string(REPLACE "\n" ";" output "${output}")
# Run test executable to get list of available reporters
execute_process(
COMMAND ${TEST_EXECUTOR} "${TEST_EXECUTABLE}" ${spec} --list-reporters
OUTPUT_VARIABLE reporters_output
RESULT_VARIABLE reporters_result
WORKING_DIRECTORY "${TEST_WORKING_DIR}"
)
if(${reporters_result} EQUAL 0)
message(WARNING
"Test executable '${TEST_EXECUTABLE}' contains no reporters!\n"
)
elseif(${reporters_result} LESS 0)
message(FATAL_ERROR
"Error running test executable '${TEST_EXECUTABLE}':\n"
" Result: ${reporters_result}\n"
" Output: ${reporters_output}\n"
)
endif()
string(FIND "${reporters_output}" "${reporter}" reporter_is_valid)
if(reporter AND ${reporter_is_valid} EQUAL -1)
message(FATAL_ERROR
"\"${reporter}\" is not a valid reporter!\n"
)
endif()
# Prepare reporter
if(reporter)
set(reporter_arg "--reporter ${reporter}")
endif()
# Prepare output dir
if(output_dir AND NOT IS_ABSOLUTE ${output_dir})
set(output_dir "${TEST_WORKING_DIR}/${output_dir}")
if(NOT EXISTS ${output_dir})
file(MAKE_DIRECTORY ${output_dir})
endif()
endif()
# Parse output
foreach(line ${output})
set(test ${line})
# Escape characters in test case names that would be parsed by Catch2
set(test_name ${test})
foreach(char , [ ])
string(REPLACE ${char} "\\${char}" test_name ${test_name})
endforeach(char)
# ...add output dir
if(output_dir)
string(REGEX REPLACE "[^A-Za-z0-9_]" "_" test_name_clean ${test_name})
set(output_dir_arg "--out ${output_dir}/${output_prefix}${test_name_clean}${output_suffix}")
endif()
# ...and add to script
add_command(add_test
"${prefix}${test}${suffix}"
${TEST_EXECUTOR}
"${TEST_EXECUTABLE}"
"${test_name}"
${extra_args}
"${reporter_arg}"
"${output_dir_arg}"
)
add_command(set_tests_properties
"${prefix}${test}${suffix}"
PROPERTIES
${properties}
)
list(APPEND tests "${prefix}${test}${suffix}")
endforeach()
# Create a list of all discovered tests, which users may use to e.g. set
# properties on the tests
add_command(set ${TEST_LIST} ${tests})
# Write CTest script
file(WRITE "${CTEST_FILE}" "${script}")
@@ -0,0 +1,252 @@
#==================================================================================================#
# supported macros #
# - TEST_CASE, #
# - TEMPLATE_TEST_CASE #
# - SCENARIO, #
# - TEST_CASE_METHOD, #
# - CATCH_TEST_CASE, #
# - CATCH_TEMPLATE_TEST_CASE #
# - CATCH_SCENARIO, #
# - CATCH_TEST_CASE_METHOD. #
# #
# Usage #
# 1. make sure this module is in the path or add this otherwise: #
# set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake.modules/") #
# 2. make sure that you've enabled testing option for the project by the call: #
# enable_testing() #
# 3. add the lines to the script for testing target (sample CMakeLists.txt): #
# project(testing_target) #
# set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake.modules/") #
# enable_testing() #
# #
# find_path(CATCH_INCLUDE_DIR "catch.hpp") #
# include_directories(${INCLUDE_DIRECTORIES} ${CATCH_INCLUDE_DIR}) #
# #
# file(GLOB SOURCE_FILES "*.cpp") #
# add_executable(${PROJECT_NAME} ${SOURCE_FILES}) #
# #
# include(ParseAndAddCatchTests) #
# ParseAndAddCatchTests(${PROJECT_NAME}) #
# #
# The following variables affect the behavior of the script: #
# #
# PARSE_CATCH_TESTS_VERBOSE (Default OFF) #
# -- enables debug messages #
# PARSE_CATCH_TESTS_NO_HIDDEN_TESTS (Default OFF) #
# -- excludes tests marked with [!hide], [.] or [.foo] tags #
# PARSE_CATCH_TESTS_ADD_FIXTURE_IN_TEST_NAME (Default ON) #
# -- adds fixture class name to the test name #
# PARSE_CATCH_TESTS_ADD_TARGET_IN_TEST_NAME (Default ON) #
# -- adds cmake target name to the test name #
# PARSE_CATCH_TESTS_ADD_TO_CONFIGURE_DEPENDS (Default OFF) #
# -- causes CMake to rerun when file with tests changes so that new tests will be discovered #
# #
# One can also set (locally) the optional variable OptionalCatchTestLauncher to precise the way #
# a test should be run. For instance to use test MPI, one can write #
# set(OptionalCatchTestLauncher ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${NUMPROC}) #
# just before calling this ParseAndAddCatchTests function #
# #
# The AdditionalCatchParameters optional variable can be used to pass extra argument to the test #
# command. For example, to include successful tests in the output, one can write #
# set(AdditionalCatchParameters --success) #
# #
# After the script, the ParseAndAddCatchTests_TESTS property for the target, and for each source #
# file in the target is set, and contains the list of the tests extracted from that target, or #
# from that file. This is useful, for example to add further labels or properties to the tests. #
# #
#==================================================================================================#
if (CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 2.8.8)
message(FATAL_ERROR "ParseAndAddCatchTests requires CMake 2.8.8 or newer")
endif()
option(PARSE_CATCH_TESTS_VERBOSE "Print Catch to CTest parser debug messages" OFF)
option(PARSE_CATCH_TESTS_NO_HIDDEN_TESTS "Exclude tests with [!hide], [.] or [.foo] tags" OFF)
option(PARSE_CATCH_TESTS_ADD_FIXTURE_IN_TEST_NAME "Add fixture class name to the test name" ON)
option(PARSE_CATCH_TESTS_ADD_TARGET_IN_TEST_NAME "Add target name to the test name" ON)
option(PARSE_CATCH_TESTS_ADD_TO_CONFIGURE_DEPENDS "Add test file to CMAKE_CONFIGURE_DEPENDS property" OFF)
function(ParseAndAddCatchTests_PrintDebugMessage)
if(PARSE_CATCH_TESTS_VERBOSE)
message(STATUS "ParseAndAddCatchTests: ${ARGV}")
endif()
endfunction()
# This removes the contents between
# - block comments (i.e. /* ... */)
# - full line comments (i.e. // ... )
# contents have been read into '${CppCode}'.
# !keep partial line comments
function(ParseAndAddCatchTests_RemoveComments CppCode)
string(ASCII 2 CMakeBeginBlockComment)
string(ASCII 3 CMakeEndBlockComment)
string(REGEX REPLACE "/\\*" "${CMakeBeginBlockComment}" ${CppCode} "${${CppCode}}")
string(REGEX REPLACE "\\*/" "${CMakeEndBlockComment}" ${CppCode} "${${CppCode}}")
string(REGEX REPLACE "${CMakeBeginBlockComment}[^${CMakeEndBlockComment}]*${CMakeEndBlockComment}" "" ${CppCode} "${${CppCode}}")
string(REGEX REPLACE "\n[ \t]*//+[^\n]+" "\n" ${CppCode} "${${CppCode}}")
set(${CppCode} "${${CppCode}}" PARENT_SCOPE)
endfunction()
# Worker function
function(ParseAndAddCatchTests_ParseFile SourceFile TestTarget)
# If SourceFile is an object library, do not scan it (as it is not a file). Exit without giving a warning about a missing file.
if(SourceFile MATCHES "\\\$<TARGET_OBJECTS:.+>")
ParseAndAddCatchTests_PrintDebugMessage("Detected OBJECT library: ${SourceFile} this will not be scanned for tests.")
return()
endif()
# According to CMake docs EXISTS behavior is well-defined only for full paths.
get_filename_component(SourceFile ${SourceFile} ABSOLUTE)
if(NOT EXISTS ${SourceFile})
message(WARNING "Cannot find source file: ${SourceFile}")
return()
endif()
ParseAndAddCatchTests_PrintDebugMessage("parsing ${SourceFile}")
file(STRINGS ${SourceFile} Contents NEWLINE_CONSUME)
# Remove block and fullline comments
ParseAndAddCatchTests_RemoveComments(Contents)
# Find definition of test names
# https://regex101.com/r/JygOND/1
string(REGEX MATCHALL "[ \t]*(CATCH_)?(TEMPLATE_)?(TEST_CASE_METHOD|SCENARIO|TEST_CASE)[ \t]*\\([ \t\n]*\"[^\"]*\"[ \t\n]*(,[ \t\n]*\"[^\"]*\")?(,[ \t\n]*[^\,\)]*)*\\)[ \t\n]*\{+[ \t]*(//[^\n]*[Tt][Ii][Mm][Ee][Oo][Uu][Tt][ \t]*[0-9]+)*" Tests "${Contents}")
if(PARSE_CATCH_TESTS_ADD_TO_CONFIGURE_DEPENDS AND Tests)
ParseAndAddCatchTests_PrintDebugMessage("Adding ${SourceFile} to CMAKE_CONFIGURE_DEPENDS property")
set_property(
DIRECTORY
APPEND
PROPERTY CMAKE_CONFIGURE_DEPENDS ${SourceFile}
)
endif()
# check CMP0110 policy for new add_test() behavior
if(POLICY CMP0110)
cmake_policy(GET CMP0110 _cmp0110_value) # new add_test() behavior
else()
# just to be thorough explicitly set the variable
set(_cmp0110_value)
endif()
foreach(TestName ${Tests})
# Strip newlines
string(REGEX REPLACE "\\\\\n|\n" "" TestName "${TestName}")
# Get test type and fixture if applicable
string(REGEX MATCH "(CATCH_)?(TEMPLATE_)?(TEST_CASE_METHOD|SCENARIO|TEST_CASE)[ \t]*\\([^,^\"]*" TestTypeAndFixture "${TestName}")
string(REGEX MATCH "(CATCH_)?(TEMPLATE_)?(TEST_CASE_METHOD|SCENARIO|TEST_CASE)" TestType "${TestTypeAndFixture}")
string(REGEX REPLACE "${TestType}\\([ \t]*" "" TestFixture "${TestTypeAndFixture}")
# Get string parts of test definition
string(REGEX MATCHALL "\"+([^\\^\"]|\\\\\")+\"+" TestStrings "${TestName}")
# Strip wrapping quotation marks
string(REGEX REPLACE "^\"(.*)\"$" "\\1" TestStrings "${TestStrings}")
string(REPLACE "\";\"" ";" TestStrings "${TestStrings}")
# Validate that a test name and tags have been provided
list(LENGTH TestStrings TestStringsLength)
if(TestStringsLength GREATER 2 OR TestStringsLength LESS 1)
message(FATAL_ERROR "You must provide a valid test name and tags for all tests in ${SourceFile}")
endif()
# Assign name and tags
list(GET TestStrings 0 Name)
if("${TestType}" STREQUAL "SCENARIO")
set(Name "Scenario: ${Name}")
endif()
if(PARSE_CATCH_TESTS_ADD_FIXTURE_IN_TEST_NAME AND "${TestType}" MATCHES "(CATCH_)?TEST_CASE_METHOD" AND TestFixture )
set(CTestName "${TestFixture}:${Name}")
else()
set(CTestName "${Name}")
endif()
if(PARSE_CATCH_TESTS_ADD_TARGET_IN_TEST_NAME)
set(CTestName "${TestTarget}:${CTestName}")
endif()
# add target to labels to enable running all tests added from this target
set(Labels ${TestTarget})
if(TestStringsLength EQUAL 2)
list(GET TestStrings 1 Tags)
string(TOLOWER "${Tags}" Tags)
# remove target from labels if the test is hidden
if("${Tags}" MATCHES ".*\\[!?(hide|\\.)\\].*")
list(REMOVE_ITEM Labels ${TestTarget})
endif()
string(REPLACE "]" ";" Tags "${Tags}")
string(REPLACE "[" "" Tags "${Tags}")
else()
# unset tags variable from previous loop
unset(Tags)
endif()
list(APPEND Labels ${Tags})
set(HiddenTagFound OFF)
foreach(label ${Labels})
string(REGEX MATCH "^!hide|^\\." result ${label})
if(result)
set(HiddenTagFound ON)
break()
endif(result)
endforeach(label)
if(PARSE_CATCH_TESTS_NO_HIDDEN_TESTS AND ${HiddenTagFound} AND ${CMAKE_VERSION} VERSION_LESS "3.9")
ParseAndAddCatchTests_PrintDebugMessage("Skipping test \"${CTestName}\" as it has [!hide], [.] or [.foo] label")
else()
ParseAndAddCatchTests_PrintDebugMessage("Adding test \"${CTestName}\"")
if(Labels)
ParseAndAddCatchTests_PrintDebugMessage("Setting labels to ${Labels}")
endif()
# Escape commas in the test spec
string(REPLACE "," "\\," Name ${Name})
# Work around CMake 3.18.0 change in `add_test()`, before the escaped quotes were necessary,
# only with CMake 3.18.0 the escaped double quotes confuse the call. This change is reverted in 3.18.1
# And properly introduced in 3.19 with the CMP0110 policy
if(_cmp0110_value STREQUAL "NEW" OR ${CMAKE_VERSION} VERSION_EQUAL "3.18")
ParseAndAddCatchTests_PrintDebugMessage("CMP0110 set to NEW, no need for add_test(\"\") workaround")
else()
ParseAndAddCatchTests_PrintDebugMessage("CMP0110 set to OLD adding \"\" for add_test() workaround")
set(CTestName "\"${CTestName}\"")
endif()
# Handle template test cases
if("${TestTypeAndFixture}" MATCHES ".*TEMPLATE_.*")
set(Name "${Name} - *")
endif()
# Add the test and set its properties
add_test(NAME "${CTestName}" COMMAND ${OptionalCatchTestLauncher} $<TARGET_FILE:${TestTarget}> ${Name} ${AdditionalCatchParameters})
# Old CMake versions do not document VERSION_GREATER_EQUAL, so we use VERSION_GREATER with 3.8 instead
if(PARSE_CATCH_TESTS_NO_HIDDEN_TESTS AND ${HiddenTagFound} AND ${CMAKE_VERSION} VERSION_GREATER "3.8")
ParseAndAddCatchTests_PrintDebugMessage("Setting DISABLED test property")
set_tests_properties("${CTestName}" PROPERTIES DISABLED ON)
else()
set_tests_properties("${CTestName}" PROPERTIES FAIL_REGULAR_EXPRESSION "No tests ran"
LABELS "${Labels}")
endif()
set_property(
TARGET ${TestTarget}
APPEND
PROPERTY ParseAndAddCatchTests_TESTS "${CTestName}")
set_property(
SOURCE ${SourceFile}
APPEND
PROPERTY ParseAndAddCatchTests_TESTS "${CTestName}")
endif()
endforeach()
endfunction()
# entry point
function(ParseAndAddCatchTests TestTarget)
message(DEPRECATION "ParseAndAddCatchTest: function deprecated because of possibility of missed test cases. Consider using 'catch_discover_tests' from 'Catch.cmake'")
ParseAndAddCatchTests_PrintDebugMessage("Started parsing ${TestTarget}")
get_target_property(SourceFiles ${TestTarget} SOURCES)
ParseAndAddCatchTests_PrintDebugMessage("Found the following sources: ${SourceFiles}")
foreach(SourceFile ${SourceFiles})
ParseAndAddCatchTests_ParseFile(${SourceFile} ${TestTarget})
endforeach()
ParseAndAddCatchTests_PrintDebugMessage("Finished parsing ${TestTarget}")
endfunction()
@@ -0,0 +1,32 @@
# when ctest is ran, each submodule includes this file to generate the <submodule>_tests.cmake file.
# <submodule>_tests.cmake contains the add_test macro which runs the individual test.
get_filename_component(_cmake_path cmake ABSOLUTE)
execute_process(
COMMAND "${_cmake_path}"
-D "TEST_TARGET=${TARGET}"
-D "TEST_EXECUTABLE=${_TARGET_EXECUTABLE}"
-D "TEST_EXECUTOR=${crosscompiling_emulator}"
-D "TEST_WORKING_DIR=${_WORKING_DIRECTORY}"
-D "TEST_SPEC=${_TEST_SPEC}"
-D "TEST_EXTRA_ARGS=${_EXTRA_ARGS}"
-D "TEST_PROPERTIES=${_PROPERTIES}"
-D "TEST_PREFIX=${_TEST_PREFIX}"
-D "TEST_SUFFIX=${_TEST_SUFFIX}"
-D "TEST_LIST=${_TEST_LIST}"
-D "TEST_REPORTER=${_REPORTER}"
-D "TEST_OUTPUT_DIR=${_OUTPUT_DIR}"
-D "TEST_OUTPUT_PREFIX=${_OUTPUT_PREFIX}"
-D "TEST_OUTPUT_SUFFIX=${_OUTPUT_SUFFIX}"
-D "CTEST_FILE=${ctestfilepath}"
-P "${_CATCH_ADD_TEST_SCRIPT}"
OUTPUT_VARIABLE output
RESULT_VARIABLE result
WORKING_DIRECTORY "${TEST_WORKING_DIR}"
)
if(EXISTS "${ctestfilepath}")
# include the generated ctest file for execution
include(${ctestfilepath})
endif()
+25
مشاهده پرونده
@@ -0,0 +1,25 @@
Copyright 2009-2010 Cybozu Labs, Inc.
Copyright 2011-2014 Kazuho Oku
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,30 @@
# Copyright (c) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
if(CMAKE_BUILD_TYPE MATCHES "^Debug$")
add_definitions(-DHT_LOG_ENABLE)
endif()
add_library(Main_Object EXCLUDE_FROM_ALL OBJECT main.cc hip_test_context.cc hip_test_features.cc)
if(HIP_PLATFORM MATCHES "amd")
set_property(TARGET Main_Object PROPERTY CXX_STANDARD 17)
else()
target_compile_options(Main_Object PUBLIC -std=c++17)
endif()
@@ -0,0 +1,794 @@
#define COMMON
{
"Info": [
"File generated for commit on below mentioned date and time",
__DATE__,
__TIME__,
GITHASH
],
"DisabledTests": [
#if defined COMMON
"Unit_hipMallocFromPoolAsync_MThread_MaxThresh",
"Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool",
"Unit_hipMemPoolTrimTo_Multithreaded",
"Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU",
"Unit_hipStreamPerThread_DeviceReset_1",
"Unit_hipDeviceGetSharedMemConfig_Positive_Basic",
"Unit_hipDeviceGetSharedMemConfig_Positive_Threaded",
"Unit_hipGetDeviceFlags_Positive_Context",
"Unit_hipInit_Negative",
"Unit_hipDeviceReset_Positive_Basic",
"Unit_hipDeviceReset_Positive_Threaded",
"Unit_hipFuncSetCacheConfig_Positive_Basic",
"Unit_hipFuncSetCacheConfig_Negative_Parameters",
"Unit_hipFuncSetSharedMemConfig_Positive_Basic",
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout",
"NOTE: The following test is disabled due to defect - EXSWHTEC-241",
"NOTE: The following test is disabled due to defect - EXSWHTEC-242",
"Unit_hipFuncGetAttributes_Positive_Basic",
"NOTE: The following test is disabled due to defect - EXSWHTEC-244",
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Positive_Basic",
"Unit_hipKernelNameRef_Negative_Parameters",
"Unit_hipMemAdvise_No_Flag_Interference",
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
"Unit_hipDrvMemcpy3D_Positive_Array",
"Unit_hipDrvMemcpy3DAsync_Positive_Array",
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Basic",
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Partial_Range",
"Unit_hipMemGetAddressRange_Positive",
"Unit_hipGraphAddMemcpyNode1D_Negative_Basic",
"Unit_ChannelDescriptor_Positive_16BitFloatingPoint",
"intermittent issue: failure expected but sucess returned",
"Unit_hipMemAdvise_NegtveTsts",
"Note: Following four tests disabled due to defect - EXSWHTEC-203",
"Unit_hipStreamSetCaptureDependencies_Positive_Functional",
"Note: Test disabled due to defect - EXSWHTEC-207",
"Unit_hipStreamCreateWithFlags_DefaultStreamInteraction",
"Unit_hipMemset3DSync",
"Unit_hipStreamAddCallback_StrmSyncTiming",
"Disabling test tracked SWDEV-394199",
"Unit_hipStreamCreateWithPriority_MulthreadNonblockingflag",
"Disabling test tracked SWDEV-395683",
"Unit_hipStreamPerThread_MultiThread",
"Disabling tests tracked with SWDEV-389647..",
"Unit_hipMemcpy2DToArrayAsync_Positive_Synchronization_Behavior",
"Disabling test tracked SWDEV-391555",
"Unit_hipMemcpyPeer_Positive_ZeroSize",
"Unit_hipMemcpyPeerAsync_Positive_ZeroSize",
"Fails in Stress test SWDEV-398971",
"SWDEV-398977 fails in stress tests",
"Unit_hipMemset2DSync",
"SWDEV-398981 fails in stress test",
"Unit_hipStreamCreateWithPriority_MulthreadDefaultflag",
"SWDEV-402054 fails in external github build",
"Unit_hipEventDestroy_WithWaitingStream",
"=== Below tests fail in stress test on 30/06/23 ===",
"Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/96 ===",
"Unit_hipHostGetDevicePointer_Negative",
"Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/18 ===",
"Unit_hipMemcpyAsync_Negative_Parameters",
"Unit_hipMemcpyDtoHAsync_Negative_Parameters",
"Unit_hipMemcpyHtoDAsync_Negative_Parameters",
"Unit_hipMemcpyDtoDAsync_Negative_Parameters",
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_1",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/327 ===",
"Unit_hiprtcDisabledSlpVectorizeComplrOptnTst",
"Unit_hiprtcCombiComplrOptnTst",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/92 ===",
"Unit_hipGetChannelDesc_Negative_Parameters",
"Unit_hipGraphAddChildGraphNode_CmplxNstGrph_UpdKerFun_Clone",
"=== Below tests fail in stress test on 24/07/23 ===",
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
"Unit_hipEventIpc",
"=== SWDEV-427101:Below test fails randomly in PSDB ===",
"Unit_deviceAllocation_InOneThread_AccessInAllThreads",
"=== Below 2 tests are disable due to defect EXSWHTEC-356 ===",
"Unit_Device___hisinf2_Accuracy_Positive",
"Unit_Device___hisnan2_Accuracy_Positive",
"Unit_Device___hbequ2_Accuracy_Positive",
"Unit_Device___hne_Accuracy_Positive",
"Unit_Device___hne2_Accuracy_Positive",
"Unit_Device___hbne2_Accuracy_Positive",
"Unit_Device___hbgeu2_Accuracy_Positive",
"Unit_Device___hbgtu2_Accuracy_Positive",
"Unit_Device___hbleu2_Accuracy_Positive",
"Unit_Device___hbltu2_Accuracy_Positive",
"=== Below 4 tests are disable due to defect EXSWHTEC-355 ===",
"Unit_Device___hadd_Sanity_Positive",
"Unit_Device___uhadd_Sanity_Positive",
"Unit_Device___rhadd_Sanity_Positive",
"Unit_Device___urhadd_Sanity_Positive",
"Unit_hipGraphAddMemcpyNode_Negative_Parameters",
"=== Below 2 tests are disable due to defect EXSWHTEC-369 ===",
"Unit_Device_ilogbf_Accuracy_Positive",
"Unit_Device_ilogb_Accuracy_Positive",
"NOTE: The following test is disabled due to defect - EXSWHTEC-245",
"Unit_hipMemCreate_MapNonContiguousChunks",
"Unit_hipMemMap_PhysicalMemoryReuse_MultiDev",
"Unit_hipMemMap_VMMMemoryReuse_MultiGPU",
"Unit_hipMemSetAccess_FuncTstOnMultDev",
"Unit_hipMemSetAccess_Vmm2PeerDevMemCpy",
"Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy",
"Unit_hipMemSetAccess_GrowVMM",
"Unit_hipMemMap_PhysicalMemory_Map2MultVMMs",
"Unit_hipMemSetAccess_MultiProc",
"=== SWDEV-434171: Below tests took long time to complete in stress test on 17/11/23 ===",
"Unit_Warp_Shfl_Positive_Basic - int",
"Unit_Warp_Shfl_Positive_Basic - unsigned int",
"Unit_Warp_Shfl_Positive_Basic - long",
"Unit_Warp_Shfl_Positive_Basic - unsigned long",
"Unit_Warp_Shfl_Positive_Basic - long long",
"Unit_Warp_Shfl_Positive_Basic - unsigned long long",
"Unit_Warp_Shfl_Positive_Basic - float",
"Unit_Warp_Shfl_Positive_Basic - double",
"Unit_Warp_Shfl_Positive_Basic - __half",
"Unit_Warp_Shfl_Positive_Basic - __half2",
"Unit_Warp_Shfl_XOR_Positive_Basic - int",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned int",
"Unit_Warp_Shfl_XOR_Positive_Basic - long",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long",
"Unit_Warp_Shfl_XOR_Positive_Basic - long long",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long long",
"Unit_Warp_Shfl_XOR_Positive_Basic - float",
"Unit_Warp_Shfl_XOR_Positive_Basic - double",
"Unit_Warp_Shfl_XOR_Positive_Basic - __half",
"Unit_Warp_Shfl_XOR_Positive_Basic - __half2",
"=== SWDEV-434878: Below tests failed in stress test on 24/11/23 ===",
"Unit_hipGraphUpload_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Positive_RangeValidation",
"=== SWDEV-435667: Below tests failing randomly in stress test on 01/12/23 ===",
"Unit_atomicExch_system_Positive_Peer_GPUs - int",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicExch_system_Positive_Peer_GPUs - float",
"Unit_atomicExch_system_Positive_Peer_GPUs - double",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - float",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - double",
"=== SWDEV-439004: Below tests failing randomly in CQE staging ===",
"Unit_hipGraphicsMapResources_Negative_Parameters",
"Unit_hipGraphicsSubResourceGetMappedArray_Negative_Parameters",
"Unit_hipGraphicsResourceGetMappedPointer_Positive_Parameters",
"Unit_hipGraphicsResourceGetMappedPointer_Negative_Parameters",
"Unit_hipGraphicsUnmapResources_Negative_Parameters",
"Unit_hipGraphicsUnregisterResource_Negative_Parameters",
"SWDEV-443760: This test fails when device memory is used for kernel args",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
"Note: Test disabled due to defect - EXSWHTEC-151",
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
"Note: Following two tests disabled due to defect - EXSWHTEC-153",
"Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String",
"Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String",
"Note: Test disabled due to defect - EXSWHTEC-163",
"Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-164",
"Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String",
"Note: Test disabled due to defect - EXSWHTEC-165",
"Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-166",
"Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-167",
"Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String",
"SWDEV-441785: Below tests failing in stress test on 05/01/24 ===",
"Unit_hipMemcpyParam2DAsync_Positive_Basic",
"SWDEV-442583: Below tests failing in stress test on 12/01/24 ===",
"Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters",
"Unit_hipLaunchCooperativeKernelMultiDevice_Negative_MultiKernelSameDevice",
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_MultiKernelSameDevice",
"=== Below tests are failing PSDB ===",
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipLaunchCooperativeKernel_Negative_Parameters",
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_2D",
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_1D",
"Unit_hipDrvGraphAddMemsetNode_hipMalloc_1D",
"Unit_hipDrvGraphAddMemsetNode_hipMallocManaged",
"Unit_hipExtModuleLaunchKernel_Negative_Parameters",
"Unit_hipLaunchKernel_Negative_Parameters",
"Unit_hipModuleLaunchCooperativeKernel_Negative_Parameters",
"Unit_Device_modf_modff_Negative_RTC",
"SWDEV-446588 - Disable graph multi gpu testcases until graph has support for it",
"Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed",
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice",
"Unit_hipGraphUpload_Functional_multidevice_test",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
"Unit_Assert_Positive_Basic_KernelFail",
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint8_t",
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint16_t",
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint32_t",
"=== SWDEV-444987 - Below tests fail in stress testing on 25/01/2023 ===",
"Unit_floatTM",
"Unit_TestMathFuncComplex",
"Unit_AtomicsWithRandomActiveLanesInWavefront_UniformInteger",
"Unit_AtomicsWithRandomActiveLanesInWavefront_DivergentInteger",
"Unit_hipGraphAddMemcpyNodeToSymbol_Positive_Basic",
"Unit_hipStreamBeginCapture_Positive_Functional",
"Unit_atomicAnd_Negative_Parameters_RTC",
"Unit_atomicOr_Negative_Parameters_RTC",
"Unit_atomicXor_Negative_Parameters_RTC",
"Unit_atomicMin_Negative_Parameters_RTC",
"Unit_atomicMax_Negative_Parameters_RTC",
"Unit_Kernel_Launch_bounds_Negative_OutOfBounds",
"Unit_Kernel_Launch_bounds_Negative_Parameters_RTC",
"Unit_Device_sin_Accuracy_Positive - float",
"Unit_Device_sin_Accuracy_Positive - double",
"Unit_Device_cos_Accuracy_Positive - float",
"Unit_Device_cos_Accuracy_Positive - double",
"Unit_Device_tan_Accuracy_Positive - float",
"Unit_Device_tan_Accuracy_Positive - double",
"Unit_Device_asin_Accuracy_Positive - float",
"Unit_Device_asin_Accuracy_Positive - double",
"Unit_Device_acos_Accuracy_Positive - float",
"Unit_Device_acos_Accuracy_Positive - double",
"Unit_Device_atan_Accuracy_Positive - float",
"Unit_Device_atan_Accuracy_Positive - double",
"Unit_Device_sinh_Accuracy_Positive - float",
"Unit_Device_sinh_Accuracy_Positive - double",
"Unit_Device_cosh_Accuracy_Positive - float",
"Unit_Device_cosh_Accuracy_Positive - double",
"Unit_Device_tanh_Accuracy_Positive - float",
"Unit_Device_tanh_Accuracy_Positive - double",
"Unit_Device_asinh_Accuracy_Positive - float",
"Unit_Device_asinh_Accuracy_Positive - double",
"Unit_Device_acosh_Accuracy_Positive - float",
"Unit_Device_acosh_Accuracy_Positive - double",
"Unit_Device_atanh_Accuracy_Positive - float",
"Unit_Device_atanh_Accuracy_Positive - double",
"Unit_Device_sinpi_Accuracy_Positive - float",
"Unit_Device_sinpi_Accuracy_Positive - double",
"Unit_Device_cospi_Accuracy_Positive - float",
"Unit_Device_cospi_Accuracy_Positive - double",
"Unit_Device_tanpi_Accuracy_Positive - float",
"Unit_Device_tanpi_Accuracy_Positive - double",
"Unit_Device_atan2_Accuracy_Positive - float",
"Unit_Device_atan2_Accuracy_Positive - double",
"Unit_Device_sincos_Accuracy_Positive - float",
"Unit_Device_sincos_Accuracy_Positive - double",
"Unit_Device_sincospi_Accuracy_Positive - float",
"Unit_Device_sincospi_Accuracy_Positive - double",
"Unit_Device_fabs_Accuracy_Positive - float",
"Unit_Device_fabs_Accuracy_Positive - double",
"Unit_Device_copysign_Accuracy_Positive - float",
"Unit_Device_copysign_Accuracy_Positive - double",
"Unit_Device_fmax_Accuracy_Positive - float",
"Unit_Device_fmax_Accuracy_Positive - double",
"Unit_Device_fmin_Accuracy_Positive - float",
"Unit_Device_fmin_Accuracy_Positive - double",
"Unit_Device_nextafter_Accuracy_Positive - float",
"Unit_Device_nextafter_Accuracy_Positive - double",
"Unit_Device_fma_Accuracy_Positive - float",
"Unit_Device_fma_Accuracy_Positive - double",
"Unit_Device_fdividef_Accuracy_Positive",
"Unit_Device_isfinite_Accuracy_Positive - float",
"Unit_Device_isfinite_Accuracy_Positive - double",
"Unit_Device_isinf_Accuracy_Positive - float",
"Unit_Device_isinf_Accuracy_Positive - double",
"Unit_Device_isnan_Accuracy_Positive - float",
"Unit_Device_isnan_Accuracy_Positive - double",
"Unit_Device_signbit_Accuracy_Positive - float",
"Unit_Device_signbit_Accuracy_Positive - double",
"Unit_Device_fmod_Accuracy_Positive - float",
"Unit_Device_fmod_Accuracy_Positive - double",
"Unit_Device_remainder_Accuracy_Positive - float",
"Unit_Device_remainder_Accuracy_Positive - double",
"Unit_Device_fdim_Accuracy_Positive - float",
"Unit_Device_fdim_Accuracy_Positive - double",
"Unit_Device_trunc_Accuracy_Positive - float",
"Unit_Device_trunc_Accuracy_Positive - double",
"Unit_Device_round_Accuracy_Positive - float",
"Unit_Device_round_Accuracy_Positive - double",
"Unit_Device_rint_Accuracy_Positive - float",
"Unit_Device_rint_Accuracy_Positive - double",
"Unit_Device_nearbyint_Accuracy_Positive - float",
"Unit_Device_nearbyint_Accuracy_Positive - double",
"Unit_Device_ceil_Accuracy_Positive - float",
"Unit_Device_ceil_Accuracy_Positive - double",
"Unit_Device_floor_Accuracy_Positive - float",
"Unit_Device_floor_Accuracy_Positive - double",
"Unit_Device_lrint_Accuracy_Positive - float",
"Unit_Device_lrint_Accuracy_Positive - double",
"Unit_Device_lround_Accuracy_Positive - float",
"Unit_Device_lround_Accuracy_Positive - double",
"Unit_Device_llrint_Accuracy_Positive - float",
"Unit_Device_llrint_Accuracy_Positive - double",
"Unit_Device_llround_Accuracy_Positive - float",
"Unit_Device_llround_Accuracy_Positive - double",
"Unit_Device_remquo_Accuracy_Positive - float",
"Unit_Device_remquo_Accuracy_Positive - double",
"Unit_Device_modf_Accuracy_Positive - float",
"Unit_Device_modf_Accuracy_Positive - double",
"=== Below tests cause timeout in stress test of 09/02/24 ===",
"Unit_Device___half2half2_Accuracy_Positive",
"Unit_Device_make_half2_Accuracy_Positive",
"Unit_Device___halves2half2_Accuracy_Positive",
"Unit_Device___low2half_Accuracy_Positive",
"Unit_Device___high2half_Accuracy_Positive",
"Unit_Device___low2half2_Accuracy_Positive",
"Unit_Device___high2half2_Accuracy_Positive",
"Unit_Device___lowhigh2highlow_Accuracy_Positive",
"Unit_Device___lows2half2_Accuracy_Positive",
"Unit_Device___highs2half2_Accuracy_Positive",
"Unit_Device___float2half2_rn_Accuracy_Positive",
"Unit_Device___floats2half2_rn_Accuracy_Positive",
"Unit_Device___float22half2_rn_Accuracy_Positive",
"Unit_Device___low2float_Accuracy_Positive",
"Unit_Device___high2float_Accuracy_Positive",
"Unit_Device___half22float2_Accuracy_Positive",
"Unit_Device_hcos_Accuracy_Positive",
"Unit_Device_h2cos_Accuracy_Positive",
"Unit_Device_hsin_Accuracy_Positive",
"Unit_Device_h2sin_Accuracy_Positive",
"Unit_Device_hexp_Accuracy_Positive",
"Unit_Device_h2exp_Accuracy_Positive",
"Unit_Device_hexp10_Accuracy_Positive",
"Unit_Device_h2exp10_Accuracy_Positive",
"Unit_Device_hexp2_Accuracy_Positive",
"Unit_Device_h2exp2_Accuracy_Positive",
"Unit_Device_hlog_Accuracy_Positive",
"Unit_Device_h2log_Accuracy_Positive",
"Unit_Device_hlog10_Accuracy_Positive",
"Unit_Device_h2log10_Accuracy_Positive",
"Unit_Device_hlog2_Accuracy_Positive",
"Unit_Device_h2log2_Accuracy_Positive",
"Unit_Device_hsqrt_Accuracy_Positive",
"Unit_Device_h2sqrt_Accuracy_Positive",
"Unit_Device_hceil_Accuracy_Positive",
"Unit_Device_h2ceil_Accuracy_Positive",
"Unit_Device_hfloor_Accuracy_Positive",
"Unit_Device_h2floor_Accuracy_Positive",
"Unit_Device_htrunc_Accuracy_Positive",
"Unit_Device_h2trunc_Accuracy_Positive",
"Unit_Device_hrcp_Accuracy_Positive",
"Unit_Device_h2rcp_Accuracy_Positive",
"Unit_Device_hrsqrt_Accuracy_Positive",
"Unit_Device_h2rsqrt_Accuracy_Positive",
"Unit_Device_hrint_Accuracy_Positive",
"Unit_Device_h2rint_Accuracy_Positive",
"Unit_Device___habs_Accuracy_Positive",
"Unit_Device___habs2_Accuracy_Positive",
"Unit_Device___hneg_Accuracy_Positive",
"Unit_Device___hneg2_Accuracy_Positive",
"Unit_Device___hadd_wrapper_Accuracy_Positive",
"Unit_Device___hadd2_Accuracy_Positive",
"Unit_Device___hadd_sat_Accuracy_Positive",
"Unit_Device___hadd2_sat_Accuracy_Positive",
"Unit_Device___hsub_Accuracy_Positive",
"Unit_Device___hsub2_Accuracy_Positive",
"Unit_Device___hsub_sat_Accuracy_Positive",
"Unit_Device___hsub2_sat_Accuracy_Positive",
"Unit_Device___hmul_Accuracy_Positive",
"Unit_Device___hmul2_Accuracy_Positive",
"Unit_Device___hmul_sat_Accuracy_Positive",
"Unit_Device___hmul2_sat_Accuracy_Positive",
"Unit_Device___hdiv_Accuracy_Positive",
"Unit_Device___h2div_Accuracy_Positive",
"Unit_Device___hfma_Accuracy_Positive",
"Unit_Device___hfma2_Accuracy_Positive",
"Unit_Device___hfma_sat_Accuracy_Positive",
"Unit_Device___hfma2_sat_Accuracy_Positive",
"Unit_Device___hisinf_Accuracy_Positive",
"Unit_Device___hisinf2_Accuracy_Positive",
"Unit_Device___hisnan_Accuracy_Positive",
"Unit_Device___hisnan2_Accuracy_Positive",
"Unit_Device___heq_Accuracy_Positive",
"Unit_Device___hbeq2_Accuracy_Positive",
"Unit_Device___hequ_Accuracy_Positive",
"Unit_Device___hbequ2_Accuracy_Positive",
"Unit_Device___heq2_Accuracy_Positive",
"Unit_Device___hequ2_Accuracy_Positive",
"Unit_Device___hne_Accuracy_Positive",
"Unit_Device___hbne2_Accuracy_Positive",
"Unit_Device___hneu_Accuracy_Positive",
"Unit_Device___hbneu2_Accuracy_Positive",
"Unit_Device___hne2_Accuracy_Positive",
"Unit_Device___hneu2_Accuracy_Positive",
"Unit_Device___hge_Accuracy_Positive",
"Unit_Device___hbge2_Accuracy_Positive",
"Unit_Device___hgeu_Accuracy_Positive",
"Unit_Device___hbgeu2_Accuracy_Positive",
"Unit_Device___hge2_Accuracy_Positive",
"Unit_Device___hgeu2_Accuracy_Positive",
"Unit_Device___hgt_Accuracy_Positive",
"Unit_Device___hbgt2_Accuracy_Positive",
"Unit_Device___hgtu_Accuracy_Positive",
"Unit_Device___hbgtu2_Accuracy_Positive",
"Unit_Device___hgt2_Accuracy_Positive",
"Unit_Device___hgtu2_Accuracy_Positive",
"Unit_Device___hle_Accuracy_Positive",
"Unit_Device___hble2_Accuracy_Positive",
"Unit_Device___hleu_Accuracy_Positive",
"Unit_Device___hbleu2_Accuracy_Positive",
"Unit_Device___hle2_Accuracy_Positive",
"Unit_Device___hleu2_Accuracy_Positive",
"Unit_Device___hlt_Accuracy_Positive",
"Unit_Device___hblt2_Accuracy_Positive",
"Unit_Device___hltu_Accuracy_Positive",
"Unit_Device___hbltu2_Accuracy_Positive",
"Unit_Device___hlt2_Accuracy_Positive",
"Unit_Device___hltu2_Accuracy_Positive",
"Unit_Device___hmax_Accuracy_Positive",
"Unit_Device___hmin_Accuracy_Positive",
"Unit_Device___hmax_nan_Accuracy_Positive",
"Unit_Device___hmin_nan_Accuracy_Positive",
"Unit_Device___half2int_rn_Accuracy_Positive",
"Unit_Device___half2int_rz_Accuracy_Positive",
"Unit_Device___half2int_rd_Accuracy_Positive",
"Unit_Device___half2int_ru_Accuracy_Positive",
"Unit_Device___half2uint_rn_Accuracy_Positive",
"Unit_Device___half2uint_rz_Accuracy_Positive",
"Unit_Device___half2uint_rd_Accuracy_Positive",
"Unit_Device___half2uint_ru_Accuracy_Positive",
"Unit_Device___half2short_rn_Accuracy_Positive",
"Unit_Device___half2short_rz_Accuracy_Positive",
"Unit_Device___half2short_rd_Accuracy_Positive",
"Unit_Device___half2short_ru_Accuracy_Positive",
"Unit_Device___half2ushort_rn_Accuracy_Positive",
"Unit_Device___half2ushort_rz_Accuracy_Positive",
"Unit_Device___half2ushort_rd_Accuracy_Positive",
"Unit_Device___half2ushort_ru_Accuracy_Positive",
"Unit_Device___half2ll_rn_Accuracy_Positive",
"Unit_Device___half2ll_rz_Accuracy_Positive",
"Unit_Device___half2ll_rd_Accuracy_Positive",
"Unit_Device___half2ll_ru_Accuracy_Positive",
"Unit_Device___half2ull_rn_Accuracy_Positive",
"Unit_Device___half2ull_rz_Accuracy_Positive",
"Unit_Device___half2ull_rd_Accuracy_Positive",
"Unit_Device___half2ull_ru_Accuracy_Positive",
"Unit_Device___half_as_short_Accuracy_Positive",
"Unit_Device___half_as_ushort_Accuracy_Positive",
"Unit_Device___int2half_rn_Accuracy_Positive",
"Unit_Device___int2half_rz_Accuracy_Positive",
"Unit_Device___int2half_rd_Accuracy_Positive",
"Unit_Device___int2half_ru_Accuracy_Positive",
"Unit_Device___uint2half_rn_Accuracy_Positive",
"Unit_Device___uint2half_rz_Accuracy_Positive",
"Unit_Device___uint2half_rd_Accuracy_Positive",
"Unit_Device___uint2half_ru_Accuracy_Positive",
"Unit_Device___short2half_rn_Accuracy_Positive",
"Unit_Device___short2half_rz_Accuracy_Positive",
"Unit_Device___short2half_rd_Accuracy_Positive",
"Unit_Device___short2half_ru_Accuracy_Positive",
"Unit_Device___ushort2half_rn_Accuracy_Positive",
"Unit_Device___ushort2half_rz_Accuracy_Positive",
"Unit_Device___ushort2half_rd_Accuracy_Positive",
"Unit_Device___ushort2half_ru_Accuracy_Positive",
"Unit_Device___ll2half_rn_Accuracy_Positive",
"Unit_Device___ll2half_rz_Accuracy_Positive",
"Unit_Device___ll2half_rd_Accuracy_Positive",
"Unit_Device___ll2half_ru_Accuracy_Positive",
"Unit_Device___ull2half_rn_Accuracy_Positive",
"Unit_Device___ull2half_rz_Accuracy_Positive",
"Unit_Device___ull2half_rd_Accuracy_Positive",
"Unit_Device___ull2half_ru_Accuracy_Positive",
"Unit_Device___short_as_half_Accuracy_Positive",
"Unit_Device___ushort_as_half_Accuracy_Positive",
"Unit_Device___float2half_rn_Accuracy_Positive",
"Unit_Device___float2half_Accuracy_Positive",
"Unit_Device___half2float_Accuracy_Positive",
"Unit_Device___frcp_rn_Accuracy_Positive",
"Unit_Device___fsqrt_rn_Accuracy_Positive",
"Unit_Device___frsqrt_rn_Accuracy_Positive",
"Unit_Device___expf_Accuracy_Positive",
"Unit_Device___exp10f_Accuracy_Positive",
"Unit_Device___logf_Accuracy_Positive",
"Unit_Device___log2f_Accuracy_Positive",
"Unit_Device___log10f_Accuracy_Positive",
"Unit_Device___sinf_Accuracy_Positive",
"Unit_Device___sincosf_sin_Accuracy_Positive",
"Unit_Device___cosf_Accuracy_Positive",
"Unit_Device___sincosf_cos_Accuracy_Positive",
"Unit_Device___fadd_rn_Accuracy_Positive",
"Unit_Device___fsub_rn_Accuracy_Positive",
"Unit_Device___fmul_rn_Accuracy_Positive",
"Unit_Device___fdiv_rn_Accuracy_Positive",
"Unit_Device___fdividef_Accuracy_Positive",
"Unit_Device___fmaf_rn_Accuracy_Positive",
"Unit_Device___drcp_rn_Accuracy_Positive",
"Unit_Device___dsqrt_rn_Accuracy_Positive",
"Unit_Device___dadd_rn_Accuracy_Positive",
"Unit_Device___dsub_rn_Accuracy_Positive",
"Unit_Device___dmul_rn_Accuracy_Positive",
"Unit_Device___ddiv_rn_Accuracy_Positive",
"Unit_Device___fma_rn_Accuracy_Positive",
"Unit_Device_sqrtf_Accuracy_Positive",
"Unit_Device_sqrt_Accuracy_Positive",
"Unit_Device_rsqrtf_Accuracy_Positive",
"Unit_Device_rsqrt_Accuracy_Positive",
"Unit_Device_cbrt_Accuracy_Positive - float",
"Unit_Device_cbrt_Accuracy_Positive - double",
"Unit_Device_rcbrtf_Accuracy_Positive",
"Unit_Device_rcbrt_Accuracy_Positive",
"Unit_Device_hypot_Accuracy_Positive - float",
"Unit_Device_hypot_Accuracy_Positive - double",
"Unit_Device_rhypot_Accuracy_Positive - float",
"Unit_Device_rhypot_Accuracy_Positive - double",
"Unit_Device_norm3d_Accuracy_Positive - float",
"Unit_Device_norm3d_Accuracy_Positive - double",
"Unit_Device_rnorm3d_Accuracy_Positive - float",
"Unit_Device_rnorm3d_Accuracy_Positive - double",
"Unit_Device_norm4d_Accuracy_Positive - float",
"Unit_Device_norm4d_Accuracy_Positive - double",
"Unit_Device_rnorm4d_Accuracy_Positive - float",
"Unit_Device_rnorm4d_Accuracy_Positive - double",
"Unit_Device_exp_Accuracy_Positive - float",
"Unit_Device_exp_Accuracy_Positive - double",
"Unit_Device_exp2_Accuracy_Positive - float",
"Unit_Device_exp2_Accuracy_Positive - double",
"Unit_Device_expm1_Accuracy_Positive - float",
"Unit_Device_expm1_Accuracy_Positive - double",
"Unit_Device_exp10f_Accuracy_Positive",
"Unit_Device_exp10_Accuracy_Positive",
"Unit_Device_frexpf_Accuracy_Positive",
"Unit_Device_frexp_Accuracy_Positive",
"Unit_Device_pow_Accuracy_Positive - float",
"Unit_Device_pow_Accuracy_Positive - double",
"Unit_Device_ldexp_Accuracy_Positive - float",
"Unit_Device_ldexp_Accuracy_Positive - double",
"Unit_Device_powi_Accuracy_Positive - float",
"Unit_Device_powi_Accuracy_Positive - double",
"Unit_Device_scalbn_Accuracy_Positive - float",
"Unit_Device_scalbn_Accuracy_Positive - double",
"Unit_Device_scalbln_Accuracy_Positive - float",
"Unit_Device_scalbln_Accuracy_Positive - double",
"Unit_Device_log_Accuracy_Positive - float",
"Unit_Device_log_Accuracy_Positive - double",
"Unit_Device_log2_Accuracy_Positive - float",
"Unit_Device_log2_Accuracy_Positive - double",
"Unit_Device_log10_Accuracy_Positive - float",
"Unit_Device_log10_Accuracy_Positive - double",
"Unit_Device_log1p_Accuracy_Positive - float",
"Unit_Device_log1p_Accuracy_Positive - double",
"Unit_Device_logb_Accuracy_Positive - float",
"Unit_Device_logb_Accuracy_Positive - double",
"Unit_Device_ilogbf_Accuracy_Positive",
"Unit_Device_ilogb_Accuracy_Positive",
"Unit_Device_erf_Accuracy_Positive - float",
"Unit_Device_erf_Accuracy_Positive - double",
"Unit_Device_erfc_Accuracy_Positive - float",
"Unit_Device_erfc_Accuracy_Positive - double",
"Unit_Device_erfinvf_Accuracy_Positive",
"Unit_Device_erfinv_Accuracy_Positive",
"Unit_Device_erfcinvf_Accuracy_Positive",
"Unit_Device_erfcinv_Accuracy_Positive",
"Unit_Device_normcdff_Accuracy_Positive",
"Unit_Device_normcdf_Accuracy_Positive",
"Unit_Device_tgammaf_Accuracy_Limited_Positive",
"Unit_Device_tgamma_Accuracy_Limited_Positive",
"Unit_Device_lgammaf_Accuracy_Limited_Positive",
"Unit_Device_lgamma_Accuracy_Limited_Positive",
"Unit_Device_cyl_bessel_i0f_Accuracy_Limited_Positive",
"Unit_Device_cyl_bessel_i0_Accuracy_Limited_Positive",
"Unit_Device_cyl_bessel_i1f_Accuracy_Limited_Positive",
"Unit_Device_cyl_bessel_i1_Accuracy_Limited_Positive",
"Unit_Device_y0f_Accuracy_Limited_Positive",
"Unit_Device_y0_Accuracy_Limited_Positive",
"Unit_Device_y1f_Accuracy_Limited_Positive",
"Unit_Device_y1_Accuracy_Limited_Positive",
"Unit_Device_ynf_Accuracy_Limited_Positive",
"Unit_Device_yn_Accuracy_Limited_Positive",
"Unit_Device_j0f_Accuracy_Limited_Positive",
"Unit_Device_j0_Accuracy_Limited_Positive",
"Unit_Device_j1f_Accuracy_Limited_Positive",
"Unit_Device_j1_Accuracy_Limited_Positive",
"Unit_Device_jnf_Accuracy_Limited_Positive",
"Unit_Device_jn_Accuracy_Limited_Positive",
"Unit_Device___double2int_rd_Positive",
"Unit_Device___double2int_rn_Positive",
"Unit_Device___double2int_ru_Positive",
"Unit_Device___double2int_rz_Positive",
"Unit_Device___double2int_Negative_RTC",
"Unit_Device___double2uint_rd_Positive",
"Unit_Device___double2uint_rn_Positive",
"Unit_Device___double2uint_ru_Positive",
"Unit_Device___double2uint_rz_Positive",
"Unit_Device___double2uint_Negative_RTC",
"Unit_Device___double2ll_rd_Positive",
"Unit_Device___double2ll_rn_Positive",
"Unit_Device___double2ll_ru_Positive",
"Unit_Device___double2ll_rz_Positive",
"Unit_Device___double2ll_Negative_RTC",
"Unit_Device___double2ull_rd_Positive",
"Unit_Device___double2ull_rn_Positive",
"Unit_Device___double2ull_ru_Positive",
"Unit_Device___double2ull_rz_Positive",
"Unit_Device___double2ull_Negative_RTC",
"Unit_Device___double2float_rd_Positive",
"Unit_Device___double2float_rn_Positive",
"Unit_Device___double2float_ru_Positive",
"Unit_Device___double2float_rz_Positive",
"Unit_Device___double2float_Negative_RTC",
"Unit_Device___double2hiint_Positive",
"Unit_Device___double2hiint_Negative_RTC",
"Unit_Device___double2loint_Positive",
"Unit_Device___double2loint_Negative_RTC",
"Unit_Device___double_as_longlong_Positive",
"Unit_Device___double_as_longlong_Negative_RTC",
"Unit_Device___float2int_rd_Positive",
"Unit_Device___float2int_rn_Positive",
"Unit_Device___float2int_ru_Positive",
"Unit_Device___float2int_rz_Positive",
"Unit_Device___float2int_Negative_RTC",
"Unit_Device___float2uint_rd_Positive",
"Unit_Device___float2uint_rn_Positive",
"Unit_Device___float2uint_ru_Positive",
"Unit_Device___float2uint_rz_Positive",
"Unit_Device___float2uint_Negative_RTC",
"Unit_Device___float2ll_rd_Positive",
"Unit_Device___float2ll_rn_Positive",
"Unit_Device___float2ll_ru_Positive",
"Unit_Device___float2ll_rz_Positive",
"Unit_Device___float2ll_Negative_RTC",
"Unit_Device___float2ull_rd_Positive",
"Unit_Device___float2ull_rn_Positive",
"Unit_Device___float2ull_ru_Positive",
"Unit_Device___float2ull_rz_Positive",
"Unit_Device___float2ull_Negative_RTC",
"Unit_Device___float_as_int_Positive",
"Unit_Device___float_as_int_Negative_RTC",
"Unit_Device___float_as_uint_Positive",
"Unit_Device___float_as_uint_Negative_RTC",
"Unit_Device___int2float_rd_Positive",
"Unit_Device___int2float_rn_Positive",
"Unit_Device___int2float_ru_Positive",
"Unit_Device___int2float_rz_Positive",
"Unit_Device_int2float___Negative_RTC",
"Unit_Device___uint2float_rd_Positive",
"Unit_Device___uint2float_rn_Positive",
"Unit_Device___uint2float_ru_Positive",
"Unit_Device___uint2float_rz_Positive",
"Unit_Device___uint2float_Negative_RTC",
"Unit_Device___int2double_rn_Positive",
"Unit_Device___int2double_Negative_RTC",
"Unit_Device___uint2double_rn_Positive",
"Unit_Device___uint2double_Negative_RTC",
"Unit_Device___ll2float_rd_Positive",
"Unit_Device___ll2float_rn_Positive",
"Unit_Device___ll2float_ru_Positive",
"Unit_Device___ll2float_rz_Positive",
"Unit_Device___ll2float_Negative_RTC",
"Unit_Device___ull2float_rd_Positive",
"Unit_Device___ull2float_rn_Positive",
"Unit_Device___ull2float_ru_Positive",
"Unit_Device___ull2float_rz_Positive",
"Unit_Device___ull2float_Negative_RTC",
"Unit_Device___ll2double_rd_Positive",
"Unit_Device___ll2double_rn_Positive",
"Unit_Device___ll2double_ru_Positive",
"Unit_Device___ll2double_rz_Positive",
"Unit_Device___ll2double_Negative_RTC",
"Unit_Device___ull2double_rd_Positive",
"Unit_Device___ull2double_rn_Positive",
"Unit_Device___ull2double_ru_Positive",
"Unit_Device___ull2double_rz_Positive",
"Unit_Device___ull2double_Negative_RTC",
"Unit_Device___int_as_float_Positive",
"Unit_Device___int_as_float_Negative_RTC",
"Unit_Device___uint_as_float_Positive",
"Unit_Device___uint_as_float_Negative_RTC",
"Unit_Device___longlong_as_double_Positive",
"Unit_Device___longlong_as_double_Negative_RTC",
"Unit_Device___hiloint2double_Positive",
"Unit_Device___hiloint2double_Negative_RTC",
"Unit_atomicAdd_Negative_Parameters_RTC",
"Unit_atomicSub_Negative_Parameters_RTC",
"Unit_atomicInc_Negative_Parameters_RTC",
"Unit_atomicDec_Negative_Parameters_RTC",
"Unit_atomicCAS_Negative_Parameters_RTC",
"SWDEV-447384, SWDEV-447932: These tests fail in gfx1100, gfx1101 & gfx1102",
"Unit_hipFreeAsync_Negative_Parameters",
"SWDEV-445928: These tests fail in PSDB stress test on 09/02/2024",
"Unit_hipCreateSurfaceObject_Negative_Parameters",
"Unit_hipDestroySurfaceObject_Negative_Parameters",
"Unit_Device___float2half_rd_Accuracy_Limited_Positive",
"Unit_Device___float2half_ru_Accuracy_Limited_Positive",
"Unit_Device___float2half_rz_Accuracy_Limited_Positive",
"Unit_hipGraphInstantiateWithFlags_StreamCaptureDeviceContextChg",
"=== SWDEV-457316 Below tests are disabled temporarily to avoid combined PSDB ===",
"Unit_hipGraphAddMemFreeNode_Negative_NotSupported",
"=== These tests fail on linux PSDB 21/11/24 ===",
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - double",
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - float",
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - double",
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - float",
"Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address - double",
"Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address - float",
"Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address - double",
"Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address - float",
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address - double",
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address - float",
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address - double",
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address - float",
"=== SWDEV-475482 - Disable tests to merge clr change ===",
"Unit_hipCreateTextureObject_LinearResource",
"Unit_hipCreateTextureObject_Pitch2DResource",
"=== SWDEV-454316 : Below tests fail in stress test ===",
"Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address - float",
"Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address - double",
"Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address - float",
"Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address - double",
"=== SWDEV-511679 : Below tests fail in stress test ===",
"Unit_hipIpcOpenMemHandle_Negative_Open_In_Two_Contexts_Same_Device",
"Unit_hipIpcCloseMemHandle_Positive_Reference_Counting",
"=== SWDEV-517063 Below tests are temporarily disabled due to PSDB failure",
"Unit_hipGraphInstantiateWithFlags_FlagAutoFreeOnLaunch_check",
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchInLoop",
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchFillKernel",
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchDoubleKernel",
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchMultiProcess",
"Unit_hipGraphInstantiateWithFlags_WithDefaultAndAutoFreeOnLaunch",
"=== SWDEV-457316 Below test is skipped due ref count logic (Discussed with German) ===",
"Unit_hipGraphAddMemAllocNode_Negative_Free_Alloc_Memory_Again",
"=== SWDEV-530762 : This test fails in Linux PSDB ===",
"Unit_hipDeviceGetGraphMemAttribute_Positive_DoubleMemory",
"=== SWDEV-538600 : This test fails in Linux PSDB ===",
"Unit_hipMemPoolMaxAlloc",
"Unit_hipStreamPerThread_ChildProc",
"=== SWDEV-536226 : Below three tests were disabled due to hang issue ===",
"Unit_hipGetLastError_KernelFailure_ValidAndInvalidOperations",
"Unit_hipGetLastError_KernelFailure_TwoDevices",
"Unit_hipGetLastError_KernelFailure_TwoStreams",
"=== Enable the below test when multi-device graph launches are fully supported ===",
"Unit_hipGraphInstantiateWithFlags_DependencyGraphDeviceCtxtChg",
#endif
#if defined gfx90a || defined gfx942 || defined gfx950
"=== SWDEV-443630 : Below test failed in stress test on 19/01/24 ===",
"Unit_Multi_Grid_Group_Positive_Sync",
"Unit_Warp_Shfl_Up_Positive_Basic - int",
"Unit_Warp_Shfl_Up_Positive_Basic - unsigned int",
"Unit_Warp_Shfl_Up_Positive_Basic - long",
"Unit_Warp_Shfl_Up_Positive_Basic - unsigned long",
"Unit_Warp_Shfl_Up_Positive_Basic - long long",
"Unit_Warp_Shfl_Up_Positive_Basic - unsigned long long",
"Unit_Warp_Shfl_Up_Positive_Basic - float",
"Unit_Warp_Shfl_Up_Positive_Basic - double",
"Unit_Warp_Shfl_Up_Positive_Basic - __half",
"Unit_Warp_Shfl_Up_Positive_Basic - __half2",
"Unit_Warp_Shfl_Down_Positive_Basic - int",
"Unit_Warp_Shfl_Down_Positive_Basic - unsigned int",
"Unit_Warp_Shfl_Down_Positive_Basic - long",
"Unit_Warp_Shfl_Down_Positive_Basic - unsigned long",
"Unit_Warp_Shfl_Down_Positive_Basic - long long",
"Unit_Warp_Shfl_Down_Positive_Basic - unsigned long long",
"Unit_Warp_Shfl_Down_Positive_Basic - float",
"Unit_Warp_Shfl_Down_Positive_Basic - double",
"Unit_Warp_Shfl_Down_Positive_Basic - __half",
"Unit_Warp_Shfl_Down_Positive_Basic - __half2",
"Unit_Device_norm_Sanity_Positive - float",
"Unit_Device_norm_Sanity_Positive - double",
"Unit_Device_rnorm_Sanity_Positive - float",
"Unit_Device_rnorm_Sanity_Positive - double",
"Unit_Device___float2half_rd_SmallVals_Sanity_Positive",
"Unit_Device___float2half_ru_SmallVals_Sanity_Positive",
"Unit_Device___float2half_rz_SmallVals_Sanity_Positive",
"Unit_safeAtomicMin_Positive_SameAddress - float",
#endif
#if defined gfx1030
"=== SWDEV-445961: These tests hang in PSDB stress test on 09/02/2024 ===",
"Unit_hipStreamBeginCapture_hipStreamPerThread",
#endif
#if defined gfx1200 || defined gfx1201
"=== SWDEV-470751 : Fine Grain memory is MTYPE_NC due to HW bug.",
"Unit_hipEventCreateWithFlags_DisableSystemFence_CohHstMem",
#endif
"=== Following tests disabled as it should be a local perf test",
"Performance_hipExtLaunchKernelGGL_QueryGPUFrequency",
"End of json"
]
}
@@ -0,0 +1,998 @@
#define COMMON
{
"Info": [
"File generated for commit on below mentioned date and time",
__DATE__,
__TIME__,
GITHASH
],
"DisabledTests": [
#if defined COMMON
"Unit_hipMallocFromPoolAsync_MThread_MaxThresh",
"Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool",
"Unit_hipMemPoolTrimTo_Multithreaded",
"Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU",
"Unit_hipMalloc_CoherentTst",
"Unit_hipGraphAddHostNode_ClonedGraphwithHostNode",
"Unit_hipEventIpc",
"Unit_hipMalloc3D_Negative",
"Unit_hipMemPoolApi_BasicAlloc",
"Unit_hipMemPoolApi_BasicTrim",
"Unit_hipMemPoolApi_BasicReuse",
"Unit_hipMemPoolApi_Opportunistic",
"Unit_hipMalloc3D_ValidatePitch",
"Unit_hipMemAllocPitch_ValidatePitch",
"Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional",
"Unit_hipMallocManaged_CoherentTstWthAdvise",
"Unit_hipMallocManaged_Advanced",
"Unit_hipMemRangeGetAttribute_NegativeTests",
"Unit_hipMemRangeGetAttribute_AccessedBy1",
"Unit_hipMemRangeGetAttribte_3",
"Unit_hipMemRangeGetAttribute_4",
"Unit_hipMemRangeGetAttribute_PrefetchAndGtAttr",
"Unit_hipMemAdvise_TstFlags",
"Unit_hipMemAdvise_PrefrdLoc",
"Unit_hipMemAdvise_ReadMostly",
"Unit_hipMemAdvise_TstFlgOverrideEffect",
"Unit_hipMemAdvise_TstAccessedByFlg",
"Unit_hipMemAdvise_TstAccessedByFlg4",
"Unit_hipMemAdvise_TstMemAdvisePrefrdLoc",
"Unit_hipMemAdvise_TstMemAdviseMultiFlag",
"Unit_hipMemAdvise_ReadMosltyMgpuTst",
"Unit_hipMemAdvise_TstSetUnsetPrfrdLoc",
"Unit_hipMallocManaged_DeviceContextChange - unsigned char",
"Unit_hipMallocManaged_DeviceContextChange - int",
"Unit_hipMallocManaged_DeviceContextChange - float",
"Unit_hipMallocManaged_DeviceContextChange - double",
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
"Unit_hipStreamPerThread_StrmWaitEvt",
"Unit_hipMemGetInfo_DifferentMallocSmall",
"Unit_hipMemGetInfo_MallocArray - int",
"Unit_hipMemGetInfo_MallocArray - int4",
"Unit_hipMemGetInfo_MallocArray - char",
"Unit_hipMemGetInfo_Malloc3D",
"Unit_hipMemGetInfo_Malloc3DArray - char",
"Unit_hipMemGetInfo_Malloc3DArray - int",
"Unit_hipMemGetInfo_Malloc3DArray - int4",
"Unit_hipMemGetInfo_ParaSmall",
"Unit_hipMemGetInfo_ParaMultiSmall",
"Unit_hipMultiThreadDevice_NearZero",
"Unit_hipStreamPerThread_DeviceReset_1",
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
"Unit_hipStreamPerThread_StrmWaitEvt",
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional",
"Unit_hipStreamWaitEvent_DifferentStreams",
"Unit_hipStreamQuery_WithFinishedWork",
"SWDEV-347670 - blocking tests have TDR, causing hangs",
"Unit_hipStreamValue_Wait32_Blocking_Mask_Gte",
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_1",
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_2",
"Unit_hipStreamValue_Wait32_Blocking_Mask_And",
"Unit_hipStreamValue_Wait32_Blocking_NoMask_Eq",
"Unit_hipStreamValue_Wait32_Blocking_NoMask_Gte",
"Unit_hipStreamValue_Wait32_Blocking_NoMask_And",
"Unit_hipStreamValue_Wait64_Blocking_Mask_Gte_1",
"Unit_hipStreamValue_Wait64_Blocking_Mask_Gte_2",
"Unit_hipStreamValue_Wait64_Blocking_Mask_Eq_1",
"Unit_hipStreamValue_Wait64_Blocking_Mask_Eq_2",
"Unit_hipStreamValue_Wait64_Blocking_Mask_And",
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Gte",
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Eq",
"Unit_hipStreamValue_Wait32_Blocking_NoMask_Nor",
"Unit_hipStreamValue_Wait64_Blocking_NoMask_And",
"Unit_hipGetDeviceFlags_Positive_Context",
"Unit_hipDeviceGetPCIBusId_Negative_PartialFill",
"Unit_hipDeviceGetSharedMemConfig_Positive_Basic",
"Unit_hipDeviceGetSharedMemConfig_Positive_Threaded",
"Unit_hipDeviceReset_Positive_Basic",
"Unit_hipDeviceReset_Positive_Threaded",
"Unit_hipInit_Negative",
"Unit_hipGraphMemcpyNodeSetParams_Functional",
"Unit_hipGraphNodeGetDependentNodes_Functional",
"Unit_hipGraphNodeGetDependencies_Functional",
"Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology",
"Unit_hipGraphAddEventRecordNode_MultipleRun",
"Unit_hipGraphAddEventRecordNode_Functional_ElapsedTime",
"Unit_hipStreamBeginCapture_captureComplexGraph",
"Note: needs to be enabled when streamPerThread issues are fixed",
"Unit_hipStreamSynchronize_NullStreamAndStreamPerThread",
"Note: intermittent Seg fault failure ",
"Unit_hipGraphAddEventRecordNode_Functional_WithoutFlags",
"Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph",
"Unit_hipFuncSetCacheConfig_Positive_Basic",
"Unit_hipFuncSetCacheConfig_Negative_Parameters",
"Unit_hipFuncSetSharedMemConfig_Positive_Basic",
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout",
"Unit_hipEventCreateWithFlags_DisableSystemFence_HstVisMem",
"Unit_hipEventCreateWithFlags_DefaultFlg_HstVisMem",
"Unit_hipEventCreateWithFlags_DisableSystemFence_NonCohHstMem",
"Unit_hipEventCreateWithFlags_DefaultFlg_NonCohHstMem",
"Unit_hipEventCreateWithFlags_DisableSystemFence_CohHstMem",
"Unit_hipEventCreateWithFlags_DefaultFlg_CohHstMem",
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Positive_Basic",
"Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Positive_Basic",
"Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Positive_Basic",
"Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Positive_Basic",
"Unit_hipKernelNameRef_Negative_Parameters",
"Unit_hipKernelNameRef_Positive_Basic",
"Unit_hipMemAdvise_No_Flag_Interference",
"Unit_hipGraphAddEventRecordNode_Functional_WithoutFlags",
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep",
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ClonedGrph",
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ChldNode",
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
"Unit_hipDrvMemcpy3D_Positive_Array",
"Unit_hipDrvMemcpy3DAsync_Positive_Array",
"Unit_hipMemGetAddressRange_Positive",
"Note: devicelib hangs and failures",
"Unit_deviceAllocation_Malloc_PerThread_PrimitiveDataType",
"Unit_deviceAllocation_New_PerThread_PrimitiveDataType",
"Unit_deviceAllocation_Malloc_PerThread_StructDataType",
"Unit_deviceAllocation_New_PerThread_StructDataType",
"Unit_deviceAllocation_Malloc_AcrossKernels",
"Unit_deviceAllocation_New_AcrossKernels",
"Unit_deviceAllocation_Malloc_SingleCodeObj",
"Unit_deviceAllocation_New_SingleCodeObj",
"Unit_deviceAllocation_Malloc_PerThread_Graph",
"Unit_deviceAllocation_New_PerThread_Graph",
"Unit_deviceAllocation_Malloc_DeviceFunc",
"Unit_deviceAllocation_VirtualFunction",
"Unit_deviceAllocation_Malloc_MulKernels_MulThreads",
"Unit_deviceAllocation_New_MulKernels_MulThreads",
"Unit_deviceAllocation_Malloc_MulCodeObj",
"Unit_deviceAllocation_New_MulCodeObj",
"Unit_deviceAllocation_New_DeviceFunc",
"====================================================",
"Note: this tests were disabled because some seemed to hang the machine on Windows with Navi32;",
"all the ones calling TestMemoryAcrossMulKernels()/TestMemoryAcrossMulKernelsUsingGraph() were disabled",
"Unit_deviceAllocation_InOneThread_AccessInAllThreads",
"Unit_deviceAllocation_Malloc_AcrossKernels",
"Unit_deviceAllocation_New_AcrossKernels",
"Unit_deviceAllocation_Malloc_ComplexDataType",
"Unit_deviceAllocation_New_ComplexDataType",
"Unit_deviceAllocation_Malloc_UnionType",
"Unit_deviceAllocation_New_UnionType",
"Unit_deviceAllocation_Malloc_SingleCodeObj",
"Unit_deviceAllocation_New_SingleCodeObj",
"Unit_deviceAllocation_Malloc_PerThread_Graph",
"Unit_deviceAllocation_New_PerThread_Graph",
"====================================================",
"Unit_hipGraphAddEventRecordNode_MultipleRun",
"Unit_hipDeviceGetPCIBusId_Negative_PartialFill",
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Nor",
"Unit_hipStreamQuery_WithFinishedWork",
"Unit_hipLaunchHostFunc_Graph",
"Unit_hipLaunchHostFunc_KernelHost",
"Unit_ChannelDescriptor_Positive_16BitFloatingPoint",
"Unit_hipStreamSetCaptureDependencies_Positive_Functional",
"Note: Following four tests disabled due to defect - EXSWHTEC-203",
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint16_t",
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint32_t",
"Note: Test disabled due to defect - EXSWHTEC-207",
"Unit_hipGraphExecMemsetNodeSetParams_Negative_Updating_Non1D_Node",
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint8_t",
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint16_t",
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint32_t",
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint8_t",
"Unit_hipDeviceGetUuid_Positive",
"Disabling test tracked SWDEV-394199",
"Unit_hipStreamCreateWithPriority_MulthreadNonblockingflag",
"SWDEV-396617 ExecMemcpyNodeSetParamsFromSymbol fails in direction",
"SWDEV-396616 hipMemMap returns invalid error",
"Unit_hipMemVmm_Basic",
"SWDEV-396615 mGPUs not considered correctly",
"Unit_hipManagedKeyword_MultiGpu",
"Disabling test tracked SWDEV-391555",
"Unit_hipMemcpyPeer_Positive_ZeroSize",
"Unit_hipMemcpyPeerAsync_Positive_ZeroSize",
"SWDEV-400049 tdr intermittently",
"Unit_hipMemsetDSync init16_t",
"Unit_hipStreamAddCallback_StrmSyncTiming",
"SWDEV-402082 - PAL Backend fails to reserve address on GPU except first one",
"Unit_hipGraphInstantiateWithFlags_FlagAutoFreeOnLaunch_check",
"SWDEV-398981 fails in stress test",
"Unit_hipStreamCreateWithPriority_MulthreadDefaultflag",
"Disabling below tests temporarily due to change in API behavior",
"Unit_hipMemPrefetchAsync_NonPageSz",
"Unit_hipStreamCreateWithFlags_DefaultStreamInteraction",
"SWDEV-402054 fails in external github build",
"Unit_hipEventDestroy_WithWaitingStream",
"Note: UUID returned empty on some windows nodes",
"Unit_hipDeviceGetUuid_Positive",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/96 ===",
"Unit_hipHostGetDevicePointer_Negative",
"Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/18 ===",
"Unit_hipMemcpyAsync_Negative_Parameters",
"Unit_hipMemcpyDtoHAsync_Negative_Parameters",
"Unit_hipMemcpyHtoDAsync_Negative_Parameters",
"Unit_hipMemcpyDtoDAsync_Negative_Parameters",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/92 ===",
"Unit_hipGetChannelDesc_Negative_Parameters",
"=== SWDEV-431191:Below tests failed in stress test on 03/11/23 ===",
"Unit_hipHostMalloc_AllocateMoreThanAvailGPUMemory",
"Unit_hipHostMalloc_AllocateUseMoreThanAvailGPUMemory",
"=== SWDEV-432250:Below tests failed in stress test on 10/11/23 ===",
"Unit_hipVectorTypes_test_on_device",
"=== Below test is disabled due to defect EXSWHTEC-347 ===",
"Unit_hipPointerSetAttribute_Positive_SyncMemops",
"NOTE: The following test is disabled due to defect - EXSWHTEC-241",
"NOTE: The following test is disabled due to defect - EXSWHTEC-242",
"Unit_hipFuncGetAttributes_Positive_Basic",
"NOTE: The following test is disabled due to defect - EXSWHTEC-243",
"NOTE: The following test is disabled due to defect - EXSWHTEC-244",
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
"Unit_hipMemAddressFree_negative",
"=== Below 2 tests are disable due to defect EXSWHTEC-369 ===",
"Unit_Device_ilogbf_Accuracy_Positive",
"Unit_Device_ilogb_Accuracy_Positive",
"NOTE: The following test is disabled due to defect - EXSWHTEC-245",
"Unit_hipMemAddressFree_negative",
"Unit_hipMemAddressReserve_AlignmentTest",
"Unit_hipGraphAddMemcpyNode_Negative_Parameters",
"Unit_hipMemCreate_ChkWithKerLaunch",
"Unit_hipMemCreate_MapNonContiguousChunks",
"Unit_hipMemMap_MapPartialPhysicalMem",
"Unit_hipMemMap_VMMMemoryReuse_MultiGPU",
"Unit_hipMemSetAccess_SetGet",
"Unit_hipMemSetAccess_MultDevSetGet",
"Unit_hipMemSetAccess_EntireVMMRangeSetGet",
"Unit_hipMemGetAccess_NegTst",
"Unit_hipMemSetAccess_FuncTstOnMultDev",
"Unit_hipMemSetAccess_Vmm2UnifiedMemCpy",
"Unit_hipMemSetAccess_Vmm2DevMemCpy",
"Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy",
"Unit_hipMemSetAccess_GrowVMM",
"Unit_hipMemSetAccess_negative",
"=== SWDEV-434171: Below tests took long time to complete in stress test on 17/11/23 ===",
"Unit_Warp_Shfl_Positive_Basic - int",
"Unit_Warp_Shfl_Positive_Basic - unsigned int",
"Unit_Warp_Shfl_Positive_Basic - long",
"Unit_Warp_Shfl_Positive_Basic - unsigned long",
"Unit_Warp_Shfl_Positive_Basic - long long",
"Unit_Warp_Shfl_Positive_Basic - unsigned long long",
"Unit_Warp_Shfl_Positive_Basic - float",
"Unit_Warp_Shfl_Positive_Basic - double",
"Unit_Warp_Shfl_XOR_Positive_Basic - int",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned int",
"Unit_Warp_Shfl_XOR_Positive_Basic - long",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long",
"Unit_Warp_Shfl_XOR_Positive_Basic - long long",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long long",
"Unit_Warp_Shfl_XOR_Positive_Basic - float",
"Unit_Warp_Shfl_XOR_Positive_Basic - double",
"Unit_Warp_Shfl_XOR_Positive_Basic - __half",
"Unit_Warp_Shfl_XOR_Positive_Basic - __half2",
"Unit_Coalesced_Group_Sync_Positive_Basic - uint16_t",
"Unit_Coalesced_Group_Sync_Positive_Basic - uint32_t",
"=== SWDEV-434878: Below tests failed in stress test on 24/11/23 ===",
"Unit_hipGraphUpload_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Positive_RangeValidation",
"=== SWDEV-435667: Below tests failing randomly in stress test on 01/12/23 ===",
"Unit_atomicExch_system_Positive_Peer_GPUs - int",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicExch_system_Positive_Peer_GPUs - float",
"Unit_atomicExch_system_Positive_Peer_GPUs - double",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - float",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - double",
"=== SWDEV-435667: Below tests failing randomly in stress test on 08/12/23 ===",
"Unit_hipMemPoolSetAccess_Negative_Parameters",
"SWDEV-438524: Below tests taking long time to run in stress test on 15/12/23 ===",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - int",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - long",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - long long",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - float",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - double",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - int",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - long",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - long long",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - float",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - double",
"Unit_Coalesced_Group_Shfl_Positive_Basic - int",
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Shfl_Positive_Basic - long",
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Shfl_Positive_Basic - long long",
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Shfl_Positive_Basic - float",
"Unit_Coalesced_Group_Shfl_Positive_Basic - double",
"SWDEV-438524: Below tests causing TDR & machine down in stress test on 15/12/23 ===",
"Unit_hipExtModuleLaunchKernel_Functional",
"Unit_hipExtLaunchKernelGGL_Functional",
"SWDEV-413997: VMM test still failing in windows",
"Unit_hipMemSetAccess_ChangeAccessProp",
"SWDEV-444041: These tests fail randomly in gfx1030 MGU",
"Unit_hipMemMap_SameMemoryReuse",
"Unit_hipMemMap_negative",
"Unit_hipMemSetAccess_Vmm2PeerDevMemCpy",
"SWDEV-444041: These tests fail in gfx1100 MGPU",
"Unit_hipMemMap_PhysicalMemoryReuse_SingleGPU",
"Unit_hipMemMap_PhysicalMemory_Map2MultVMMs",
"Unit_hipMemMap_PhysicalMemoryReuse_MultiDev",
"Unit_hipMemSetAccess_Vmm2VMMMemCpy",
"SWDEV-444031: This test fails in gfx1101 MGPU",
"Unit_hipMemSetAccess_Multithreaded",
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/274 ===",
"Unit_Printf_flags_Sanity_Positive",
"Unit_Printf_length_Sanity_Positive",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
"=== Below tests are failing PSDB ===",
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_3",
"Unit_hipGraphAddMemAllocNode_Positive_FreeInGraph",
"Unit_hipFreeAsync_Negative_Parameters",
"Unit_hipMallocMipmappedArray_DiffSizes",
"Unit_hipMallocMipmappedArray_MultiThread",
"Unit_hipMallocMipmappedArray_Negative_InvalidFlags",
"Unit_hipGetMipmappedArrayLevel_Negative",
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
"Unit_hipFreeMipmappedArrayMultiTArray - int",
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
"Unit_Thread_Block_Tile_Dynamic_Getters_Positive_Basic",
"Performance_hipMemcpy2D_HostToHost",
"Performance_hipMemcpy2DAsync_HostToHost",
"Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_2D",
"Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_1D",
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_2D",
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_1D",
"Unit_hipDrvGraphAddMemsetNode_hipMalloc_1D",
"Unit_hipDrvGraphAddMemsetNode_hipMallocManaged",
"Unit_hipModuleLaunchKernel_Negative_Parameters",
"Unit_hipExtModuleLaunchKernel_Negative_Parameters",
"Unit_hipLaunchKernel_Negative_Parameters",
"Unit_tex1Dfetch_Positive_ReadModeElementType - char",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned char",
"Unit_tex1Dfetch_Positive_ReadModeElementType - short",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned short",
"Unit_tex1Dfetch_Positive_ReadModeElementType - int",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned int",
"Unit_tex1Dfetch_Positive_ReadModeElementType - float",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DGrad_Positive_ReadModeElementType - char",
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned char",
"Unit_tex1DGrad_Positive_ReadModeElementType - short",
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned short",
"Unit_tex1DGrad_Positive_ReadModeElementType - int",
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned int",
"Unit_tex1DGrad_Positive_ReadModeElementType - float",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - char",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned char",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - short",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned short",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - int",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned int",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - float",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - char",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned char",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - short",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned short",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - int",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned int",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - float",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLod_Positive_ReadModeElementType - char",
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned char",
"Unit_tex1DLod_Positive_ReadModeElementType - short",
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned short",
"Unit_tex1DLod_Positive_ReadModeElementType - int",
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned int",
"Unit_tex1DLod_Positive_ReadModeElementType - float",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3D_Positive_ReadModeElementType - char",
"Unit_tex3D_Positive_ReadModeElementType - unsigned char",
"Unit_tex3D_Positive_ReadModeElementType - short",
"Unit_tex3D_Positive_ReadModeElementType - unsigned short",
"Unit_tex3D_Positive_ReadModeElementType - int",
"Unit_tex3D_Positive_ReadModeElementType - unsigned int",
"Unit_tex3D_Positive_ReadModeElementType - float",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3DLod_Positive_ReadModeElementType - char",
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned char",
"Unit_tex3DLod_Positive_ReadModeElementType - short",
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned short",
"Unit_tex3DLod_Positive_ReadModeElementType - int",
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned int",
"Unit_tex3DLod_Positive_ReadModeElementType - float",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3DGrad_Positive_ReadModeElementType - char",
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned char",
"Unit_tex3DGrad_Positive_ReadModeElementType - short",
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned short",
"Unit_tex3DGrad_Positive_ReadModeElementType - int",
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned int",
"Unit_tex3DGrad_Positive_ReadModeElementType - float",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemap_Positive_ReadModeElementType - char",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemap_Positive_ReadModeElementType - short",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemap_Positive_ReadModeElementType - int",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemap_Positive_ReadModeElementType - float",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLod_Positive_ReadModeElementType - char",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLod_Positive_ReadModeElementType - short",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLod_Positive_ReadModeElementType - int",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLod_Positive_ReadModeElementType - float",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - char",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapGrad_Positive_ReadModeElementType - short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - int",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapGrad_Positive_ReadModeElementType - float",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - char",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayered_Positive_ReadModeElementType - short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - int",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayered_Positive_ReadModeElementType - float",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - char",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - int",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - float",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - int",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - float",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2Dgather_Positive_ReadModeElementType - char",
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned char",
"Unit_tex2Dgather_Positive_ReadModeElementType - short",
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned short",
"Unit_tex2Dgather_Positive_ReadModeElementType - int",
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned int",
"Unit_tex2Dgather_Positive_ReadModeElementType - float",
"Unit_tex2D_Positive_ReadModeElementType - char",
"Unit_tex2D_Positive_ReadModeElementType - unsigned char",
"Unit_tex2D_Positive_ReadModeElementType - short",
"Unit_tex2D_Positive_ReadModeElementType - unsigned short",
"Unit_tex2D_Positive_ReadModeElementType - int",
"Unit_tex2D_Positive_ReadModeElementType - unsigned int",
"Unit_tex2D_Positive_ReadModeElementType - float",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayered_Positive_ReadModeElementType - char",
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned char",
"Unit_tex2DLayered_Positive_ReadModeElementType - short",
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned short",
"Unit_tex2DLayered_Positive_ReadModeElementType - int",
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned int",
"Unit_tex2DLayered_Positive_ReadModeElementType - float",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DGrad_Positive_ReadModeElementType - char",
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned char",
"Unit_tex2DGrad_Positive_ReadModeElementType - short",
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned short",
"Unit_tex2DGrad_Positive_ReadModeElementType - int",
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned int",
"Unit_tex2DGrad_Positive_ReadModeElementType - float",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - char",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned char",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - short",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned short",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - int",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned int",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - float",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLod_Positive_ReadModeElementType - char",
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned char",
"Unit_tex2DLod_Positive_ReadModeElementType - short",
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned short",
"Unit_tex2DLod_Positive_ReadModeElementType - int",
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned int",
"Unit_tex2DLod_Positive_ReadModeElementType - float",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - char",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned char",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - short",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned short",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - int",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned int",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - float",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_hipModuleLaunchKernel_Negative_Parameters",
"Unit_hipModuleGetTexRef_Positive_Basic",
"Unit_Kernel_Launch_bounds_Negative_OutOfBounds",
"Unit_Kernel_Launch_bounds_Negative_Parameters_RTC",
"Unit_AtomicBuiltins_Negative_Parameters_RTC",
"Unit_hipMemcpy2D_H2D-D2D-D2H - int",
"Unit_hipMemcpy2D_H2D-D2D-D2H - float",
"Unit_hipMemcpy2D_H2D-D2D-D2H - double",
"Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset - int",
"Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset - float",
"Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset - double",
"Unit_hipMemcpy2D_H2D-D2D-D2H_Managed_WithOffset - int",
"Unit_hipMemcpy2D_H2D-D2D-D2H_Managed_WithOffset - float",
"Unit_hipMemcpy2D_H2D-D2D-D2H_Managed_WithOffset - double",
"Unit_hipMemcpy2DAsync_Host&PinnedMem - int",
"Unit_hipMemcpy2DAsync_Host&PinnedMem - float",
"Unit_hipMemcpy2DAsync_Host&PinnedMem - double",
"Unit_hipMemPoolGetAccess_Negative_Parameters",
"Unit_hipMemPoolSetAttribute_Negative_Parameters",
"Unit_hipMemPoolGetAttribute_Negative_Parameters",
"Unit_Thread_Block_Getters_Positive_Basic",
"Unit_hipMemset3DAsync_capturehipMemset3DAsync",
"Unit_hipMemset2DAsync_capturehipMemset2DAsync",
"Unit_hipOccupancyMaxPotBlkSizeVariableSMemWithFlags_Functional",
"Unit_hipDynamicShared",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - double",
"Unit_atomicExch_Positive - int",
"Unit_atomicExch_Positive - unsigned int",
"Unit_atomicExch_Positive - unsigned long",
"Unit_atomicExch_Positive - unsigned long long",
"Unit_atomicExch_Positive - float",
"Unit_atomicExch_Positive - double",
"Unit___hip_atomic_exchange_Positive_Wavefront - int",
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned int",
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long",
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long long",
"Unit___hip_atomic_exchange_Positive_Wavefront - float",
"Unit___hip_atomic_exchange_Positive_Wavefront - double",
"Unit___hip_atomic_exchange_Positive_Workgroup - int",
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned int",
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long",
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long long",
"Unit___hip_atomic_exchange_Positive_Workgroup - float",
"Unit___hip_atomic_exchange_Positive_Workgroup - double",
"Unit___syncthreads_Positive_Basic",
"Unit___syncthreads_count_Positive_Basic",
"Unit___syncthreads_and_Positive_Basic",
"Unit___syncthreads_or_Positive_Basic",
"Note: Test disabled due to defect - EXSWHTEC-151",
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
"Note: Test disabled due to defect - EXSWHTEC-152",
"Unit_hipModuleUnload_Negative_Module_Is_Nullptr",
"Note: Following two tests disabled due to defect - EXSWHTEC-153",
"Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String",
"Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String",
"Note: Test disabled due to defect - EXSWHTEC-163",
"Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-164",
"Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String",
"Note: Test disabled due to defect - EXSWHTEC-165",
"Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-166",
"Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-167",
"Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String",
"Below tests hang in Jenkins PSDB",
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint8_t",
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint16_t",
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint32_t",
"Unit_coalesced_groups",
"Unit_coalesced_groups_shfl_down",
"Unit_coalesced_groups_shfl_up",
"=== SWDEV-441604: Below tests take long time to run in stress test on 12/01/24 ===",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - int",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned int",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - long",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned long",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - long long",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned long long",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - float",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - double",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - int",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned int",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - long",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned long",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - long long",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned long long",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - float",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - double",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - int",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned int",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - long",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned long",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - long long",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned long long",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - float",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - double",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - int",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned int",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - long",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned long",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - long long",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned long long",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - float",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - double",
"Unit_Thread_Block_Tile_Getters_Positive_Basic",
"SWDEV-446588 - Disable graph multi gpu testcases until graph has support for it",
"Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed",
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice",
"Unit_hipGraphUpload_Functional_multidevice_test",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
"Unit_StaticAssert_Positive_Basic_RTC",
"Unit_Assert_Positive_Basic_KernelFail",
"=== Below tests are disabled due to defect EXSWHTEC-356 ===",
"Unit_Device___hisinf2_Accuracy_Positive",
"Unit_Device___hisnan2_Accuracy_Positive",
"Unit_Device___hbequ2_Accuracy_Positive",
"Unit_Device___hne_Accuracy_Positive",
"Unit_Device___hne2_Accuracy_Positive",
"Unit_Device___hbne2_Accuracy_Positive",
"Unit_Device___hbgeu2_Accuracy_Positive",
"Unit_Device___hbgtu2_Accuracy_Positive",
"Unit_Device___hbleu2_Accuracy_Positive",
"Unit_Device___hbltu2_Accuracy_Positive",
"=== Below 4 tests are disable due to defect EXSWHTEC-355 ===",
"Unit_Device___hadd_Sanity_Positive",
"Unit_Device___uhadd_Sanity_Positive",
"Unit_Device___rhadd_Sanity_Positive",
"Unit_Device___urhadd_Sanity_Positive",
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint8_t",
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint16_t",
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint32_t",
"Below tests failed in stress test of 25/01/24 ===",
"Unit_atomicAnd_Negative_Parameters_RTC",
"Unit_atomicOr_Negative_Parameters_RTC",
"Unit_atomicXor_Negative_Parameters_RTC",
"Unit_atomicMin_Negative_Parameters_RTC",
"Unit_atomicMax_Negative_Parameters_RTC",
"=== Below tests cause timeout in stress test of 09/02/24 ===",
"Unit_Device___half2half2_Accuracy_Positive",
"Unit_Device_make_half2_Accuracy_Positive",
"Unit_Device___halves2half2_Accuracy_Positive",
"Unit_Device___low2half_Accuracy_Positive",
"Unit_Device___high2half_Accuracy_Positive",
"Unit_Device___low2half2_Accuracy_Positive",
"Unit_Device___high2half2_Accuracy_Positive",
"Unit_Device___lowhigh2highlow_Accuracy_Positive",
"Unit_Device___lows2half2_Accuracy_Positive",
"Unit_Device___highs2half2_Accuracy_Positive",
"Unit_Device___float2half2_rn_Accuracy_Positive",
"Unit_Device___floats2half2_rn_Accuracy_Positive",
"Unit_Device___float22half2_rn_Accuracy_Positive",
"Unit_Device___low2float_Accuracy_Positive",
"Unit_Device___high2float_Accuracy_Positive",
"Unit_Device___half22float2_Accuracy_Positive",
"Unit_Device_hcos_Accuracy_Positive",
"Unit_Device_h2cos_Accuracy_Positive",
"Unit_Device_hsin_Accuracy_Positive",
"Unit_Device_h2sin_Accuracy_Positive",
"Unit_Device_hexp_Accuracy_Positive",
"Unit_Device_h2exp_Accuracy_Positive",
"Unit_Device_hexp10_Accuracy_Positive",
"Unit_Device_h2exp10_Accuracy_Positive",
"Unit_Device_hexp2_Accuracy_Positive",
"Unit_Device_h2exp2_Accuracy_Positive",
"Unit_Device_hlog_Accuracy_Positive",
"Unit_Device_h2log_Accuracy_Positive",
"Unit_Device_hlog10_Accuracy_Positive",
"Unit_Device_h2log10_Accuracy_Positive",
"Unit_Device_hlog2_Accuracy_Positive",
"Unit_Device_h2log2_Accuracy_Positive",
"Unit_Device_hsqrt_Accuracy_Positive",
"Unit_Device_h2sqrt_Accuracy_Positive",
"Unit_Device_hceil_Accuracy_Positive",
"Unit_Device_h2ceil_Accuracy_Positive",
"Unit_Device_hfloor_Accuracy_Positive",
"Unit_Device_h2floor_Accuracy_Positive",
"Unit_Device_htrunc_Accuracy_Positive",
"Unit_Device_h2trunc_Accuracy_Positive",
"Unit_Device_hrcp_Accuracy_Positive",
"Unit_Device_h2rcp_Accuracy_Positive",
"Unit_Device_hrsqrt_Accuracy_Positive",
"Unit_Device_h2rsqrt_Accuracy_Positive",
"Unit_Device_hrint_Accuracy_Positive",
"Unit_Device_h2rint_Accuracy_Positive",
"Unit_Device___habs_Accuracy_Positive",
"Unit_Device___habs2_Accuracy_Positive",
"Unit_Device___hneg_Accuracy_Positive",
"Unit_Device___hneg2_Accuracy_Positive",
"Unit_Device___hadd_wrapper_Accuracy_Positive",
"Unit_Device___hadd2_Accuracy_Positive",
"Unit_Device___hadd_sat_Accuracy_Positive",
"Unit_Device___hadd2_sat_Accuracy_Positive",
"Unit_Device___hsub_Accuracy_Positive",
"Unit_Device___hsub2_Accuracy_Positive",
"Unit_Device___hsub_sat_Accuracy_Positive",
"Unit_Device___hsub2_sat_Accuracy_Positive",
"Unit_Device___hmul_Accuracy_Positive",
"Unit_Device___hmul2_Accuracy_Positive",
"Unit_Device___hmul_sat_Accuracy_Positive",
"Unit_Device___hmul2_sat_Accuracy_Positive",
"Unit_Device___hdiv_Accuracy_Positive",
"Unit_Device___h2div_Accuracy_Positive",
"Unit_Device___hfma_Accuracy_Positive",
"Unit_Device___hfma2_Accuracy_Positive",
"Unit_Device___hfma_sat_Accuracy_Positive",
"Unit_Device___hfma2_sat_Accuracy_Positive",
"Unit_Device___hisinf_Accuracy_Positive",
"Unit_Device___hisinf2_Accuracy_Positive",
"Unit_Device___hisnan_Accuracy_Positive",
"Unit_Device___hisnan2_Accuracy_Positive",
"Unit_Device___heq_Accuracy_Positive",
"Unit_Device___hbeq2_Accuracy_Positive",
"Unit_Device___hequ_Accuracy_Positive",
"Unit_Device___hbequ2_Accuracy_Positive",
"Unit_Device___heq2_Accuracy_Positive",
"Unit_Device___hequ2_Accuracy_Positive",
"Unit_Device___hne_Accuracy_Positive",
"Unit_Device___hbne2_Accuracy_Positive",
"Unit_Device___hneu_Accuracy_Positive",
"Unit_Device___hbneu2_Accuracy_Positive",
"Unit_Device___hne2_Accuracy_Positive",
"Unit_Device___hneu2_Accuracy_Positive",
"Unit_Device___hge_Accuracy_Positive",
"Unit_Device___hbge2_Accuracy_Positive",
"Unit_Device___hgeu_Accuracy_Positive",
"Unit_Device___hbgeu2_Accuracy_Positive",
"Unit_Device___hge2_Accuracy_Positive",
"Unit_Device___hgeu2_Accuracy_Positive",
"Unit_Device___hgt_Accuracy_Positive",
"Unit_Device___hbgt2_Accuracy_Positive",
"Unit_Device___hgtu_Accuracy_Positive",
"Unit_Device___hbgtu2_Accuracy_Positive",
"Unit_Device___hgt2_Accuracy_Positive",
"Unit_Device___hgtu2_Accuracy_Positive",
"Unit_Device___hle_Accuracy_Positive",
"Unit_Device___hble2_Accuracy_Positive",
"Unit_Device___hleu_Accuracy_Positive",
"Unit_Device___hbleu2_Accuracy_Positive",
"Unit_Device___hle2_Accuracy_Positive",
"Unit_Device___hleu2_Accuracy_Positive",
"Unit_Device___hlt_Accuracy_Positive",
"Unit_Device___hblt2_Accuracy_Positive",
"Unit_Device___hltu_Accuracy_Positive",
"Unit_Device___hbltu2_Accuracy_Positive",
"Unit_Device___hlt2_Accuracy_Positive",
"Unit_Device___hltu2_Accuracy_Positive",
"Unit_Device___hmax_Accuracy_Positive",
"Unit_Device___hmin_Accuracy_Positive",
"Unit_Device___hmax_nan_Accuracy_Positive",
"Unit_Device___hmin_nan_Accuracy_Positive",
"Unit_Device___half2int_rn_Accuracy_Positive",
"Unit_Device___half2int_rz_Accuracy_Positive",
"Unit_Device___half2int_rd_Accuracy_Positive",
"Unit_Device___half2int_ru_Accuracy_Positive",
"Unit_Device___half2uint_rn_Accuracy_Positive",
"Unit_Device___half2uint_rz_Accuracy_Positive",
"Unit_Device___half2uint_rd_Accuracy_Positive",
"Unit_Device___half2uint_ru_Accuracy_Positive",
"Unit_Device___half2short_rn_Accuracy_Positive",
"Unit_Device___half2short_rz_Accuracy_Positive",
"Unit_Device___half2short_rd_Accuracy_Positive",
"Unit_Device___half2short_ru_Accuracy_Positive",
"Unit_Device___half2ushort_rn_Accuracy_Positive",
"Unit_Device___half2ushort_rz_Accuracy_Positive",
"Unit_Device___half2ushort_rd_Accuracy_Positive",
"Unit_Device___half2ushort_ru_Accuracy_Positive",
"Unit_Device___half2ll_rn_Accuracy_Positive",
"Unit_Device___half2ll_rz_Accuracy_Positive",
"Unit_Device___half2ll_rd_Accuracy_Positive",
"Unit_Device___half2ll_ru_Accuracy_Positive",
"Unit_Device___half2ull_rn_Accuracy_Positive",
"Unit_Device___half2ull_rz_Accuracy_Positive",
"Unit_Device___half2ull_rd_Accuracy_Positive",
"Unit_Device___half2ull_ru_Accuracy_Positive",
"Unit_Device___half_as_short_Accuracy_Positive",
"Unit_Device___half_as_ushort_Accuracy_Positive",
"Unit_Device___int2half_rn_Accuracy_Positive",
"Unit_Device___int2half_rz_Accuracy_Positive",
"Unit_Device___int2half_rd_Accuracy_Positive",
"Unit_Device___int2half_ru_Accuracy_Positive",
"Unit_Device___uint2half_rn_Accuracy_Positive",
"Unit_Device___uint2half_rz_Accuracy_Positive",
"Unit_Device___uint2half_rd_Accuracy_Positive",
"Unit_Device___uint2half_ru_Accuracy_Positive",
"Unit_Device___short2half_rn_Accuracy_Positive",
"Unit_Device___short2half_rz_Accuracy_Positive",
"Unit_Device___short2half_rd_Accuracy_Positive",
"Unit_Device___short2half_ru_Accuracy_Positive",
"Unit_Device___ushort2half_rn_Accuracy_Positive",
"Unit_Device___ushort2half_rz_Accuracy_Positive",
"Unit_Device___ushort2half_rd_Accuracy_Positive",
"Unit_Device___ushort2half_ru_Accuracy_Positive",
"Unit_Device___ll2half_rn_Accuracy_Positive",
"Unit_Device___ll2half_rz_Accuracy_Positive",
"Unit_Device___ll2half_rd_Accuracy_Positive",
"Unit_Device___ll2half_ru_Accuracy_Positive",
"Unit_Device___ull2half_rn_Accuracy_Positive",
"Unit_Device___ull2half_rz_Accuracy_Positive",
"Unit_Device___ull2half_rd_Accuracy_Positive",
"Unit_Device___ull2half_ru_Accuracy_Positive",
"Unit_Device___short_as_half_Accuracy_Positive",
"Unit_Device___ushort_as_half_Accuracy_Positive",
"Unit_Device___float2half_rn_Accuracy_Positive",
"Unit_Device___float2half_Accuracy_Positive",
"Unit_Device___half2float_Accuracy_Positive",
"Unit_Device___frcp_rn_Accuracy_Positive",
"Unit_Device___fsqrt_rn_Accuracy_Positive",
"Unit_Device___frsqrt_rn_Accuracy_Positive",
"Unit_Device___expf_Accuracy_Positive",
"Unit_Device___exp10f_Accuracy_Positive",
"Unit_Device___logf_Accuracy_Positive",
"Unit_Device___log2f_Accuracy_Positive",
"Unit_Device___log10f_Accuracy_Positive",
"Unit_Device___sinf_Accuracy_Positive",
"Unit_Device___sincosf_sin_Accuracy_Positive",
"Unit_Device___cosf_Accuracy_Positive",
"Unit_Device___sincosf_cos_Accuracy_Positive",
"Unit_Device___fadd_rn_Accuracy_Positive",
"Unit_Device___fsub_rn_Accuracy_Positive",
"Unit_Device___fmul_rn_Accuracy_Positive",
"Unit_Device___fdiv_rn_Accuracy_Positive",
"Unit_Device___fdividef_Accuracy_Positive",
"Unit_Device___fmaf_rn_Accuracy_Positive",
"Unit_Device___drcp_rn_Accuracy_Positive",
"Unit_Device___dsqrt_rn_Accuracy_Positive",
"Unit_Device___dadd_rn_Accuracy_Positive",
"Unit_Device___dsub_rn_Accuracy_Positive",
"Unit_Device___dmul_rn_Accuracy_Positive",
"Unit_Device___ddiv_rn_Accuracy_Positive",
"Unit_Device___fma_rn_Accuracy_Positive",
"Unit_atomicAdd_Negative_Parameters_RTC",
"Unit_atomicSub_Negative_Parameters_RTC",
"Unit_atomicInc_Negative_Parameters_RTC",
"Unit_atomicDec_Negative_Parameters_RTC",
"Unit_atomicCAS_Negative_Parameters_RTC",
"SWDEV-450909: Test failed in stress testing",
"Unit_RTC_LinkDestroy_Default",
"=== SWDEV-457316 Below tests are disabled temporarily to avoid combined PSDB ===",
"Unit_hipGraphAddMemFreeNode_Negative_NotSupported",
"=== SWDEV-454245, SWDEV-454247 : Below tests fail on 29/03/24 ===",
"Unit_hipStreamBeginCaptureToGraph_IndepGraphsThreads",
"Unit_hipStreamBeginCaptureToGraph_CaptureDepGraph",
"=== SWDEV-486448 - Following tests disabled due to taking too much time to execute, ~700s per test",
"Unit_Coalesced_Group_Tiled_Partition_Getters_Positive_Basic",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - int",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - float",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - double",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - int",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - float",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - double",
"=== SWDEV-454316 : Below tests fail in stress test ===",
"Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address - float",
"Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address - double",
"Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address - float",
"Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address - float",
"Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address - double",
"=== SWDEV-475482 - Disable tests to merge clr change",
"Unit_hipCreateTextureObject_LinearResource",
"Unit_hipCreateTextureObject_Pitch2DResource",
"========================================================================================",
"=== SWDEV-468258 Below tests are temporarily disabled - windows PSDB failed",
"Unit_hipHostAlloc_Basic",
"Unit_hipHostAlloc_Default",
"Unit_hipHostAlloc_Negative_NonCoherent",
"Unit_hipHostAlloc_Negative_Coherent",
"Unit_hipHostAlloc_Negative_NumaUser",
"=== Following tests disabled due to SWDEV-486363",
"Unit_hipStreamQuery_spt_WithFinishedWork",
"Unit_hipStreamQuery_spt_NegativeCases",
"Unit_hipStreamQuery_spt_WithPendingWork",
"Unit_hipStreamSynchronize_spt_FinishWork",
"Unit_hipStreamSynchronize_spt_SynchronizeStreamAndQueryNullStream",
"====================================================",
"Test Unit_hipGraphUserObj_ClonedGraph disabled due to SWDEV-483112",
"Unit_hipGraphUserObj_ClonedGraph",
"====================================================",
"=== SWDEV-517063 Below tests are temporarily disabled due to PSDB failure",
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchInLoop",
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchFillKernel",
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchDoubleKernel",
"Unit_hipGraphInstantiateWithFlags_WithDefaultAndAutoFreeOnLaunch",
"=== SWDEV-457316 Below test is skipped due ref count logic (Discussed with German) ===",
"Unit_hipGraphAddMemAllocNode_Negative_Free_Alloc_Memory_Again",
#endif
"=== Following tests disabled as it should be a local perf test",
"Performance_hipExtLaunchKernelGGL_QueryGPUFrequency",
"End of json"
]
}
@@ -0,0 +1,637 @@
{
"DisabledTests":
[
"Note: Windows disabled",
"Unit_hipMalloc_CoherentTst",
"Unit_hipTextureMipmapObj2D_Check",
"Unit_hipGraphAddHostNode_ClonedGraphwithHostNode",
"Unit_hipEventIpc",
"Unit_hipMalloc3D_Negative",
"Unit_hipMemPoolApi_BasicAlloc",
"Unit_hipMemPoolApi_BasicTrim",
"Unit_hipMemPoolApi_BasicReuse",
"Unit_hipMemPoolApi_Opportunistic",
"Unit_hipMalloc3D_ValidatePitch",
"Unit_hipMemAllocPitch_ValidatePitch",
"Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional",
"Unit_hipMallocManaged_CoherentTstWthAdvise",
"Unit_hipMallocManaged_Advanced",
"Unit_hipMemRangeGetAttribute_NegativeTests",
"Unit_hipMemRangeGetAttribute_AccessedBy1",
"Unit_hipMemRangeGetAttribte_3",
"Unit_hipMemRangeGetAttribute_4",
"Unit_hipMemRangeGetAttribute_PrefetchAndGtAttr",
"Unit_hipMemAdvise_TstFlags",
"Unit_hipMemAdvise_PrefrdLoc",
"Unit_hipMemAdvise_ReadMostly",
"Unit_hipMemAdvise_TstFlgOverrideEffect",
"Unit_hipMemAdvise_TstAccessedByFlg",
"Unit_hipMemAdvise_TstAccessedByFlg4",
"Unit_hipMemAdvise_TstMemAdvisePrefrdLoc",
"Unit_hipMemAdvise_TstMemAdviseMultiFlag",
"Unit_hipMemAdvise_ReadMosltyMgpuTst",
"Unit_hipMemAdvise_TstSetUnsetPrfrdLoc",
"Unit_hipMallocManaged_DeviceContextChange - unsigned char",
"Unit_hipMallocManaged_DeviceContextChange - int",
"Unit_hipMallocManaged_DeviceContextChange - float",
"Unit_hipMallocManaged_DeviceContextChange - double",
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
"Unit_hipStreamPerThread_StrmWaitEvt",
"Unit_hipMemGetInfo_DifferentMallocSmall",
"Unit_hipMemGetInfo_MallocArray - int",
"Unit_hipMemGetInfo_MallocArray - int4",
"Unit_hipMemGetInfo_MallocArray - char",
"Unit_hipMemGetInfo_Malloc3D",
"Unit_hipMemGetInfo_Malloc3DArray - char",
"Unit_hipMemGetInfo_Malloc3DArray - int",
"Unit_hipMemGetInfo_Malloc3DArray - int4",
"Unit_hipMemGetInfo_ParaSmall",
"Unit_hipMemGetInfo_ParaMultiSmall",
"Unit_hipMultiThreadDevice_NearZero",
"Unit_hipStreamPerThread_DeviceReset_1",
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
"Unit_hipStreamPerThread_StrmWaitEvt",
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional",
"Unit_hipStreamWaitEvent_DifferentStreams",
"Unit_hipStreamQuery_WithFinishedWork",
"Unit_hipStreamValue_Wait32_Blocking_Mask_Gte",
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_1",
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_2",
"Unit_hipStreamValue_Wait32_Blocking_Mask_And",
"Unit_hipStreamValue_Wait32_Blocking_NoMask_Eq",
"Unit_hipStreamValue_Wait32_Blocking_NoMask_Gte",
"Unit_hipStreamValue_Wait32_Blocking_NoMask_And",
"Unit_hipStreamValue_Wait64_Blocking_Mask_Gte_1",
"Unit_hipStreamValue_Wait64_Blocking_Mask_Gte_2",
"Unit_hipStreamValue_Wait64_Blocking_Mask_Eq_1",
"Unit_hipStreamValue_Wait64_Blocking_Mask_Eq_2",
"Unit_hipStreamValue_Wait64_Blocking_Mask_And",
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Gte",
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Eq",
"Unit_hipGetDeviceFlags_Positive_Context",
"Unit_hipIpcCloseMemHandle_Negative_Close_In_Originating_Process",
"Unit_hipIpcOpenMemHandle_Negative_Open_In_Creating_Process",
"Unit_hipDeviceGetPCIBusId_Negative_PartialFill",
"Unit_hipDeviceGetSharedMemConfig_Positive_Basic",
"Unit_hipDeviceGetSharedMemConfig_Positive_Threaded",
"Unit_hipDeviceReset_Positive_Basic",
"Unit_hipDeviceReset_Positive_Threaded",
"Unit_hipInit_Negative",
"Unit_hipGraphMemcpyNodeSetParams_Functional",
"Unit_hipGraphNodeGetDependentNodes_Functional",
"Unit_hipGraphNodeGetDependencies_Functional",
"Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology",
"Unit_hipGraphAddEventRecordNode_MultipleRun",
"Unit_hipGraphAddEventRecordNode_Functional_ElapsedTime",
"Unit_hipStreamBeginCapture_captureComplexGraph",
"Note: needs to be enabled when streamPerThread issues are fixed",
"Unit_hipStreamSynchronize_NullStreamAndStreamPerThread",
"Note: intermittent Seg fault failure ",
"Unit_hipGraphAddEventRecordNode_Functional_WithoutFlags",
"Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph",
"Unit_hipFuncSetCacheConfig_Positive_Basic",
"Unit_hipFuncSetCacheConfig_Negative_Parameters",
"Unit_hipFuncSetSharedMemConfig_Positive_Basic",
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout",
"Unit_hipEventCreateWithFlags_DisableSystemFence_HstVisMem",
"Unit_hipEventCreateWithFlags_DefaultFlg_HstVisMem",
"Unit_hipEventCreateWithFlags_DisableSystemFence_NonCohHstMem",
"Unit_hipEventCreateWithFlags_DefaultFlg_NonCohHstMem",
"Unit_hipEventCreateWithFlags_DisableSystemFence_CohHstMem",
"Unit_hipEventCreateWithFlags_DefaultFlg_CohHstMem",
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Positive_Basic",
"Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Positive_Basic",
"Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Positive_Basic",
"Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Positive_Basic",
"Unit_hipKernelNameRef_Negative_Parameters",
"Unit_hipKernelNameRef_Positive_Basic",
"Unit_hipMemAdvise_AccessedBy_All_Devices",
"Unit_hipMemAdvise_No_Flag_Interference",
"Unit_hipGraphAddEventRecordNode_Functional_WithoutFlags",
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep",
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ClonedGrph",
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ChldNode",
"Unit_hipMemGetAddressRange_Negative",
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
"Unit_hipDrvMemcpy3D_Positive_Array",
"Unit_hipDrvMemcpy3DAsync_Positive_Array",
"Unit_hipMemGetAddressRange_Positive",
"Note: devicelib hangs and failures",
"Unit_deviceAllocation_Malloc_PerThread_PrimitiveDataType",
"Unit_deviceAllocation_New_PerThread_PrimitiveDataType",
"Unit_deviceAllocation_Malloc_PerThread_StructDataType",
"Unit_deviceAllocation_New_PerThread_StructDataType",
"Unit_deviceAllocation_Malloc_AcrossKernels",
"Unit_deviceAllocation_New_AcrossKernels",
"Unit_deviceAllocation_Malloc_SingleCodeObj",
"Unit_deviceAllocation_New_SingleCodeObj",
"Unit_deviceAllocation_Malloc_PerThread_Graph",
"Unit_deviceAllocation_New_PerThread_Graph",
"Unit_deviceAllocation_Malloc_DeviceFunc",
"Unit_deviceAllocation_VirtualFunction",
"Unit_deviceAllocation_Malloc_MulKernels_MulThreads",
"Unit_deviceAllocation_New_MulKernels_MulThreads",
"Unit_deviceAllocation_Malloc_MulCodeObj",
"Unit_deviceAllocation_New_MulCodeObj",
"Unit_deviceAllocation_New_DeviceFunc",
"Unit_hipGraphAddEventRecordNode_MultipleRun",
"Unit_hipDeviceGetPCIBusId_Negative_PartialFill",
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Nor",
"Unit_hipStreamQuery_WithFinishedWork",
"Unit_hipLaunchHostFunc_Graph",
"Unit_hipLaunchHostFunc_KernelHost",
"Unit_ChannelDescriptor_Positive_16BitFloatingPoint",
"Unit_hipStreamSetCaptureDependencies_Positive_Functional",
"Note: Following four tests disabled due to defect - EXSWHTEC-203",
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint16_t",
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint32_t",
"Note: Test disabled due to defect - EXSWHTEC-207",
"Unit_hipGraphExecMemsetNodeSetParams_Negative_Updating_Non1D_Node",
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint8_t",
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint16_t",
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint32_t",
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint8_t",
"Unit_hipDeviceGetUuid_Positive",
"Disabling test tracked SWDEV-394199",
"Unit_hipStreamCreateWithPriority_MulthreadNonblockingflag",
"SWDEV-396617 ExecMemcpyNodeSetParamsFromSymbol fails in direction",
"Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative_Parameters",
"SWDEV-396616 hipMemMap returns invalid error",
"Unit_hipMemVmm_Basic",
"SWDEV-396615 mGPUs not considered correctly",
"Unit_hipManagedKeyword_MultiGpu",
"Disabling test tracked SWDEV-391555",
"Unit_hipMemcpyPeer_Positive_ZeroSize",
"Unit_hipMemcpyPeerAsync_Positive_ZeroSize",
"SWDEV-400049 tdr intermittently",
"Unit_hipMemsetDSync init16_t",
"Unit_hipStreamAddCallback_StrmSyncTiming",
"SWDEV-402082 - PAL Backend fails to reserve address on GPU except first one",
"Unit_hipGraphInstantiateWithFlags_FlagAutoFreeOnLaunch_check",
"SWDEV-398981 fails in stress test",
"Unit_hipStreamCreateWithPriority_MulthreadDefaultflag",
"Disabling below tests temporarily due to change in API behavior",
"Unit_hipMemPrefetchAsync_NonPageSz",
"Unit_hipStreamCreateWithFlags_DefaultStreamInteraction",
"SWDEV-402054 fails in external github build",
"Unit_hipEventDestroy_WithWaitingStream",
"Note: UUID returned empty on some windows nodes",
"Unit_hipDeviceGetUuid_Positive",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/96 ===",
"Unit_hipHostGetDevicePointer_Negative",
"Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/18 ===",
"Unit_hipMemcpyAsync_Negative_Parameters",
"Unit_hipMemcpyDtoHAsync_Negative_Parameters",
"Unit_hipMemcpyHtoDAsync_Negative_Parameters",
"Unit_hipMemcpyDtoDAsync_Negative_Parameters",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/92 ===",
"Unit_hipGetChannelDesc_Negative_Parameters",
"Unit_hipTextureMipmapRef2D_Positive_Check",
"Unit_hipTextureMipmapRef2D_Negative_Parameters",
"=== SWDEV-430116:Below tests failed in stress test on 27/10/23 ===",
"Unit_hipFreeAsync_negative",
"Unit_hipLaunchHostFunc_multistreams",
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Negative",
"=== SWDEV-431191:Below tests failed in stress test on 03/11/23 ===",
"Unit_hipHostMalloc_AllocateMoreThanAvailGPUMemory",
"Unit_hipHostMalloc_AllocateUseMoreThanAvailGPUMemory",
"Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - char1",
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - char2",
"Unit_Layered1DTexture_Check_HostBufferToFromLayered1DArray - uint",
"Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - uchar4",
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - unsigned char",
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - float",
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - uint2",
"=== SWDEV-432250:Below tests failed in stress test on 10/11/23 ===",
"Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - ushort1",
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - int2",
"Unit_hipVectorTypes_test_on_device",
"Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - ushort4",
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - float4",
"=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===",
"Unit_hiprtc_stdheaders",
"Unit_hipMemAddressFree_negative",
"Unit_hipMemAddressReserve_AlignmentTest",
"Unit_hipMemAddressReserve_Negative",
"Unit_hipMemCreate_BasicAllocateDeAlloc_MultGranularity",
"Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPostUnmap",
"Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPreUse",
"Unit_hipMemCreate_ChkWithKerLaunch",
"Unit_hipMemCreate_MapNonContiguousChunks",
"Unit_hipMemCreate_ChkWithMemset",
"Unit_hipMemCreate_Negative",
"Unit_hipMemGetAllocationGranularity_MinGranularity",
"Unit_hipMemGetAllocationGranularity_RecommendedGranularity",
"Unit_hipMemGetAllocationGranularity_AllGPUs",
"Unit_hipMemGetAllocationGranularity_NegativeTests",
"Unit_hipMemGetAllocationPropertiesFromHandle_functional",
"Unit_hipMemGetAllocationPropertiesFromHandle_Negative",
"Unit_hipMemMap_SameMemoryReuse",
"Unit_hipMemMap_PhysicalMemoryReuse_SingleGPU",
"Unit_hipMemMap_PhysicalMemory_Map2MultVMMs",
"Unit_hipMemMap_PhysicalMemoryReuse_MultiDev",
"Unit_hipMemMap_VMMMemoryReuse_SingleGPU",
"Unit_hipMemMap_VMMMemoryReuse_MultiGPU",
"Unit_hipMemMap_MapPartialPhysicalMem",
"Unit_hipMemMap_MapPartialVMMMem",
"Unit_hipMemMap_negative",
"Unit_hipMemRelease_negative",
"Unit_hipMemRetainAllocationHandle_SetGet",
"Unit_hipMemRetainAllocationHandle_NegTst",
"Unit_hipMemSetAccess_SetGet",
"Unit_hipMemSetAccess_MultDevSetGet",
"Unit_hipMemSetAccess_EntireVMMRangeSetGet",
"Unit_hipMemGetAccess_NegTst",
"Unit_hipMemSetAccess_FuncTstOnMultDev",
"Unit_hipMemSetAccess_ChangeAccessProp",
"Unit_hipMemSetAccess_Vmm2UnifiedMemCpy",
"Unit_hipMemSetAccess_Vmm2DevMemCpy",
"Unit_hipMemSetAccess_Vmm2PeerDevMemCpy",
"Unit_hipMemSetAccess_Vmm2PeerPeerMemCpy",
"Unit_hipMemSetAccess_Vmm2VMMMemCpy",
"Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy",
"Unit_hipMemSetAccess_GrowVMM",
"Unit_hipMemSetAccess_Multithreaded",
"Unit_hipMemSetAccess_MultiProc",
"Unit_hipMemSetAccess_negative",
"Unit_hipMemUnmap_negative",
"=== SWDEV-434171: Below tests took long time to complete in stress test on 17/11/23 ===",
"Unit_Warp_Shfl_Positive_Basic - int",
"Unit_Warp_Shfl_Positive_Basic - unsigned int",
"Unit_Warp_Shfl_Positive_Basic - long",
"Unit_Warp_Shfl_Positive_Basic - unsigned long",
"Unit_Warp_Shfl_Positive_Basic - long long",
"Unit_Warp_Shfl_Positive_Basic - unsigned long long",
"Unit_Warp_Shfl_Positive_Basic - float",
"Unit_Warp_Shfl_Positive_Basic - double",
"Unit_Warp_Shfl_XOR_Positive_Basic - int",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned int",
"Unit_Warp_Shfl_XOR_Positive_Basic - long",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long",
"Unit_Warp_Shfl_XOR_Positive_Basic - long long",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long long",
"Unit_Warp_Shfl_XOR_Positive_Basic - float",
"Unit_Warp_Shfl_XOR_Positive_Basic - double",
"=== SWDEV-434878: Below tests failed in stress test on 24/11/23 ===",
"Unit_hipGraphUpload_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Positive_RangeValidation",
"=== SWDEV-435667: Below tests failing randomly in stress test on 01/12/23 ===",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - int",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned int",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned long",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned long long",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - float",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - double",
"Unit_atomicExch_Positive_Multi_Kernel - int",
"Unit_atomicExch_Positive_Multi_Kernel - unsigned int",
"Unit_atomicExch_Positive_Multi_Kernel - unsigned long",
"Unit_atomicExch_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicExch_Positive_Multi_Kernel - float",
"Unit_atomicExch_Positive_Multi_Kernel - double",
"Unit_atomicExch_system_Positive_Peer_GPUs - int",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicExch_system_Positive_Peer_GPUs - float",
"Unit_atomicExch_system_Positive_Peer_GPUs - double",
"Unit_atomicExch_system_Positive_Host_And_GPU - int",
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long",
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicExch_system_Positive_Host_And_GPU - float",
"Unit_atomicExch_system_Positive_Host_And_GPU - double",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - float",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - double",
"SWDEV-438524: Below tests taking long time to run in stress test on 15/12/23 ===",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - int",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - long",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - long long",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - float",
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - double",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - int",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - long",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - long long",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - float",
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - double",
"Unit_Coalesced_Group_Shfl_Positive_Basic - int",
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Shfl_Positive_Basic - long",
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Shfl_Positive_Basic - long long",
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Shfl_Positive_Basic - float",
"Unit_Coalesced_Group_Shfl_Positive_Basic - double",
"SWDEV-438524: Below tests causing TDR & machine down in stress test on 15/12/23 ===",
"Unit_hipExtModuleLaunchKernel_Functional",
"Unit_hipExtLaunchKernelGGL_Functional",
"SWDEV-438524:Below tests failed in stress test on 15/12/23 ===",
"Unit_Device_memcpy_Negative",
"Unit_Device_memset_Negative",
"Unit_Device_Complex_make_Negative",
"Unit_Device_Complex_Cast_Negative",
"Unit_Device_Complex_Unary_float_Negative",
"Unit_Device_Complex_Unary_double_Negative",
"Unit_Device_Complex_Binary_float_Negative",
"Unit_Device_Complex_Binary_double_Negative",
"Unit_Device_Complex_hipCfma_Negative",
"Unit_Device__hip_hc_8pk_Negative",
"Note: Linux disabled",
"Unit_hipStreamPerThread_DeviceReset_1",
"Unit_hipDeviceGetSharedMemConfig_Positive_Basic",
"Unit_hipDeviceGetSharedMemConfig_Positive_Threaded",
"Unit_hipGetDeviceFlags_Positive_Context",
"Unit_hipIpcCloseMemHandle_Negative_Close_In_Originating_Process",
"Unit_hipIpcOpenMemHandle_Negative_Open_In_Creating_Process",
"Unit_hipInit_Negative",
"Unit_hipDeviceReset_Positive_Basic",
"Unit_hipDeviceReset_Positive_Threaded",
"Unit_hipFuncSetCacheConfig_Positive_Basic",
"Unit_hipFuncSetCacheConfig_Negative_Parameters",
"Unit_hipFuncSetSharedMemConfig_Positive_Basic",
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout",
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Positive_Basic",
"Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Positive_Basic",
"Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Positive_Basic",
"Unit_hipKernelNameRef_Negative_Parameters",
"Unit_hipMemAdvise_AccessedBy_All_Devices",
"Unit_hipMemAdvise_No_Flag_Interference",
"Unit_hipMemGetAddressRange_Negative",
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
"Unit_hipDrvMemcpy3D_Positive_Array",
"Unit_hipDrvMemcpy3DAsync_Positive_Array",
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Basic",
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Partial_Range",
"Unit_hipMemGetAddressRange_Positive",
"Unit_hipGraphAddMemcpyNode1D_Negative_Basic",
"Unit_ChannelDescriptor_Positive_16BitFloatingPoint",
"intermittent issue: failure expected but success returned",
"Unit_hipMemAdvise_NegtveTsts",
"Note: Following four tests disabled due to defect - EXSWHTEC-203",
"Unit_hipStreamSetCaptureDependencies_Positive_Functional",
"Note: Test disabled due to defect - EXSWHTEC-207",
"Unit_hipIpcGetMemHandle_Positive_Unique_Handles_Separate_Allocations",
"Unit_hipStreamCreateWithFlags_DefaultStreamInteraction",
"Unit_hipMemset3DSync",
"Unit_hipStreamAddCallback_StrmSyncTiming",
"Disabling test tracked SWDEV-394199",
"Unit_hipStreamCreateWithPriority_MulthreadNonblockingflag",
"Disabling test tracked SWDEV-395683",
"Unit_hipStreamPerThread_MultiThread",
"SWDEV-396963",
"Unit_hipMemcpy2DFromArrayAsync_Positive_Synchronization_Behavior",
"Disabling tests tracked with SWDEV-389647..",
"Unit_hipMemcpy2DToArrayAsync_Positive_Synchronization_Behavior",
"Disabling test tracked SWDEV-391555",
"Unit_hipMemcpyPeer_Positive_ZeroSize",
"Unit_hipMemcpyPeerAsync_Positive_ZeroSize",
"Fails in Stress test SWDEV-398971",
"SWDEV-398977 fails in stress tests",
"Unit_hipMemset2DSync",
"SWDEV-398981 fails in stress test",
"Unit_hipStreamCreateWithPriority_MulthreadDefaultflag",
"SWDEV-402054 fails in external github build",
"Unit_hipEventDestroy_WithWaitingStream",
"=== Below tests fail in stress test on 23/06/23 ===",
"Unit_hipIpcMemAccess_ParameterValidation",
"Unit_hipMemcpy2DFromArrayAsync_Positive_Synchronization_Behavior",
"Unit_hipGraphClone_Test_hipGraphExecMemcpyNodeSetParams",
"Unit_hipGraphClone_Test_hipGraphMemcpyNodeSetParams1D_and_exec",
"=== Below tests fail in stress test on 30/06/23 ===",
"Unit_hipStreamValue_Write - TestParams<uint32_t, PtrType::HostPtr>",
"Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/96 ===",
"Unit_hipHostGetDevicePointer_Negative",
"Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/18 ===",
"Unit_hipMemcpyAsync_Negative_Parameters",
"Unit_hipMemcpyDtoHAsync_Negative_Parameters",
"Unit_hipMemcpyHtoDAsync_Negative_Parameters",
"Unit_hipMemcpyDtoDAsync_Negative_Parameters",
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_1",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/327 ===",
"Unit_hiprtcGpuRdcComplrOptnTst",
"Unit_hiprtcDisabledSlpVectorizeComplrOptnTst",
"Unit_hiprtcRpassInlineComplrOptnTst",
"Unit_hiprtcCombiComplrOptnTst",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/92 ===",
"Unit_hipGetChannelDesc_Negative_Parameters",
"Unit_hipGraphAddChildGraphNode_CmplxNstGrph_UpdKerFun_Clone",
"=== Below tests fail in stress test on 24/07/23 ===",
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
"Unit_hipEventIpc",
"=== SWDEV-427101:Below test fails randomly in PSDB ===",
"Unit_deviceAllocation_InOneThread_AccessInAllThreads",
"=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===",
"Unit_hiprtc_stdheaders",
"Unit_hipMemAddressFree_negative",
"Unit_hipMemAddressReserve_AlignmentTest",
"Unit_hipMemAddressReserve_Negative",
"Unit_hipMemCreate_BasicAllocateDeAlloc_MultGranularity",
"Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPostUnmap",
"Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPreUse",
"Unit_hipMemCreate_ChkWithKerLaunch",
"Unit_hipMemCreate_MapNonContiguousChunks",
"Unit_hipMemCreate_ChkWithMemset",
"Unit_hipMemCreate_Negative",
"Unit_hipMemGetAllocationGranularity_MinGranularity",
"Unit_hipMemGetAllocationGranularity_RecommendedGranularity",
"Unit_hipMemGetAllocationGranularity_AllGPUs",
"Unit_hipMemGetAllocationGranularity_NegativeTests",
"Unit_hipMemGetAllocationPropertiesFromHandle_functional",
"Unit_hipMemGetAllocationPropertiesFromHandle_Negative",
"Unit_hipMemMap_SameMemoryReuse",
"Unit_hipMemMap_PhysicalMemoryReuse_SingleGPU",
"Unit_hipMemMap_PhysicalMemory_Map2MultVMMs",
"Unit_hipMemMap_PhysicalMemoryReuse_MultiDev",
"Unit_hipMemMap_VMMMemoryReuse_SingleGPU",
"Unit_hipMemMap_VMMMemoryReuse_MultiGPU",
"Unit_hipMemMap_MapPartialPhysicalMem",
"Unit_hipMemMap_MapPartialVMMMem",
"Unit_hipMemMap_negative",
"Unit_hipMemRelease_negative",
"Unit_hipMemRetainAllocationHandle_SetGet",
"Unit_hipMemRetainAllocationHandle_NegTst",
"Unit_hipMemSetAccess_SetGet",
"Unit_hipMemSetAccess_MultDevSetGet",
"Unit_hipMemSetAccess_EntireVMMRangeSetGet",
"Unit_hipMemGetAccess_NegTst",
"Unit_hipMemSetAccess_FuncTstOnMultDev",
"Unit_hipMemSetAccess_ChangeAccessProp",
"Unit_hipMemSetAccess_Vmm2UnifiedMemCpy",
"Unit_hipMemSetAccess_Vmm2DevMemCpy",
"Unit_hipMemSetAccess_Vmm2PeerDevMemCpy",
"Unit_hipMemSetAccess_Vmm2PeerPeerMemCpy",
"Unit_hipMemSetAccess_Vmm2VMMMemCpy",
"Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy",
"Unit_hipMemSetAccess_GrowVMM",
"Unit_hipMemSetAccess_Multithreaded",
"Unit_hipMemSetAccess_MultiProc",
"Unit_hipMemSetAccess_negative",
"Unit_hipMemUnmap_negative",
"=== SWDEV-434171: Below tests took long time to complete in stress test on 17/11/23 ===",
"Unit_Warp_Shfl_Positive_Basic - int",
"Unit_Warp_Shfl_Positive_Basic - unsigned int",
"Unit_Warp_Shfl_Positive_Basic - long",
"Unit_Warp_Shfl_Positive_Basic - unsigned long",
"Unit_Warp_Shfl_Positive_Basic - long long",
"Unit_Warp_Shfl_Positive_Basic - unsigned long long",
"Unit_Warp_Shfl_Positive_Basic - float",
"Unit_Warp_Shfl_Positive_Basic - double",
"Unit_Warp_Shfl_XOR_Positive_Basic - int",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned int",
"Unit_Warp_Shfl_XOR_Positive_Basic - long",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long",
"Unit_Warp_Shfl_XOR_Positive_Basic - long long",
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long long",
"Unit_Warp_Shfl_XOR_Positive_Basic - float",
"Unit_Warp_Shfl_XOR_Positive_Basic - double",
"=== SWDEV-434878: Below tests failed in stress test on 24/11/23 ===",
"Unit_hipGraphUpload_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Positive_RangeValidation",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Positive_RangeValidation",
"=== SWDEV-435667: Below tests failing randomly in stress test on 01/12/23 ===",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - int",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned int",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned long",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned long long",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - float",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - double",
"Unit_atomicExch_Positive_Multi_Kernel - int",
"Unit_atomicExch_Positive_Multi_Kernel - unsigned int",
"Unit_atomicExch_Positive_Multi_Kernel - unsigned long",
"Unit_atomicExch_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicExch_Positive_Multi_Kernel - float",
"Unit_atomicExch_Positive_Multi_Kernel - double",
"Unit_atomicExch_system_Positive_Peer_GPUs - int",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long",
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicExch_system_Positive_Peer_GPUs - float",
"Unit_atomicExch_system_Positive_Peer_GPUs - double",
"Unit_atomicExch_system_Positive_Host_And_GPU - int",
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long",
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicExch_system_Positive_Host_And_GPU - float",
"Unit_atomicExch_system_Positive_Host_And_GPU - double",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - float",
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - double",
"=== SWDEV-439004: Below tests failing randomly in CQE staging ===",
"Unit_hipLaunchCooperativeKernel_Streams",
"Unit_hipGLGetDevices_Positive_Basic",
"Unit_hipGLGetDevices_Positive_Parameters",
"Unit_hipGLGetDevices_Negative_Parameters",
"Unit_hipGraphicsGLRegisterBuffer_Positive_Basic",
"Unit_hipGraphicsGLRegisterBuffer_Positive_Register_Twice",
"Unit_hipGraphicsGLRegisterBuffer_Negative_Parameters",
"Unit_hipGraphicsGLRegisterImage_Positive_Basic",
"Unit_hipGraphicsGLRegisterImage_Positive_Register_Twice",
"Unit_hipGraphicsGLRegisterImage_Negative_Parameters",
"Unit_hipGraphicsMapResources_Positive_Basic",
"Unit_hipGraphicsMapResources_Negative_Parameters",
"Unit_hipGraphicsSubResourceGetMappedArray_Positive_Basic",
"Unit_hipGraphicsSubResourceGetMappedArray_Negative_Parameters",
"Unit_hipGraphicsResourceGetMappedPointer_Positive_Basic",
"Unit_hipGraphicsResourceGetMappedPointer_Positive_Parameters",
"Unit_hipGraphicsResourceGetMappedPointer_Negative_Parameters",
"Unit_hipGraphicsUnmapResources_Negative_Parameters",
"Unit_hipGraphicsUnregisterResource_Negative_Parameters",
"Unit_hipGraphExecMemcpyNodeSetParams1D_Negative",
"Note: gfx1100, gfx1101, gfx1102",
"=== Below tests soft hang in stress test on 13/09/23 ===",
"Unit_hipMemsetFunctional_ZeroValue_hipMemsetD16",
"Unit_hipIpcMemAccess_Semaphores",
"Unit_hipStreamAttachMemAsync_Negative_Parameters",
"hipStreamPerThread_CoopLaunch",
"hipCGMultiGridGroupType",
"Grid_Group_Getters_Positive_Basic",
"Grid_Group_Getters_Via_Non_Member_Functions_Positive_Basic",
"Grid_Group_Sync_Positive_Basic",
"dynamic_loading_device_kernels_from_library",
"Note: Image extension disabled",
"Unit_hipMemset2DSync",
"Unit_hipMemset3DSync",
"Note: CONFIG_NUMA disabled",
"Unit_hipHostMalloc_WthEnv0Flg3",
"Unit_hipHostGetFlags_flagCombos",
"Unit_hipHostGetFlags_DifferentThreads",
"Unit_hipHostMalloc_WthEnv1Flg3",
"Note: no valid pci bdf in wsl",
"Unit_hipDeviceGetPCIBusId_CheckPciBusIDWithLspci",
"Note: TDR",
"Unit_deviceAllocation_InOneThread_AccessInAllThreads",
"Unit_deviceAllocation_Malloc_UnionType",
"Unit_deviceAllocation_New_ComplexDataType",
"Unit_deviceAllocation_New_UnionType",
"Unit_hipFreeImplicitSyncDev - char",
"Unit_hipFreeImplicitSyncDev - float",
"Unit_hipFreeImplicitSyncDev - float2",
"Unit_hipFreeImplicitSyncDev - float4",
"Unit_hipFreeImplicitSyncHost - char",
"Unit_hipFreeImplicitSyncHost - float",
"Unit_hipFreeImplicitSyncHost - float2",
"Unit_hipFreeImplicitSyncHost - float4",
"Unit_hipStreamDestroy_WithPendingWork",
"Unit_printf_specifier",
"Unit_tiled_partition",
"Note: TDR (pass)",
"Unit_hipStreamSynchronize_FinishWork",
"Unit_hipStreamSynchronize_NullStreamSynchronization",
"Unit_hipStreamQuery_NullStreamQuery",
"Unit_hipStreamQuery_SubmitWorkOnStreamAndQueryNullStream",
"Unit_hipStreamQuery_WithPendingWork",
"SWDEV-411303 fails in WSL. Profiling not support in WSL",
"Unit_hipEvent",
"Unit_hipEventDestroy_Unfinished",
"Unit_hipEventElapsedTime_NotReady_Negative",
"Note: TDR (random fail)",
"Unit_hipEventDestroy_WithWaitingStream",
"Unit_hipMemsetSync",
"Unit_hipMemsetDSync - int8_t",
"Unit_hipMemsetDSync - int16_t",
"Unit_hipMemsetDSync - uint32_t",
"Unit_hipStreamValue_Wait32_NonBlacking_Mask_Gte",
"Note: hsa_amd_ipc_ is dummy",
"Unit_hipIpcOpenMemHandle_Negative_Open_In_Two_Contexts_Same_Device",
"Unit_hipIpcGetMemHandle_Positive_Unique_Handles_Reused_Memory",
"Unit_hipIpcCloseMemHandle_Positive_Reference_Counting",
"Unit_hipIpcMemAccess_Semaphores",
"Unit_hipIpcMemAccess_ParameterValidation",
"Note: test dropped in latest gerritgit",
"Unit_hipStreamCreate_WithPriorityPerformance_Default_high",
"Unit_hipStreamCreate_WithPriorityPerformance_Nonblocking_high",
"Unit_hipStreamCreate_WithPriorityPerformance_Default_low",
"=== Following tests disabled as it should be a local perf test",
"Performance_hipExtLaunchKernelGGL_QueryGPUFrequency"
]
}
@@ -0,0 +1,239 @@
{
"DisabledTests": [
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/85 ===",
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/215 ===",
"Unit_ChannelDescriptor_Positive_Basic_1D - long",
"Unit_ChannelDescriptor_Positive_Basic_1D - unsigned long",
"Unit_ChannelDescriptor_Positive_Basic_1D - ulong1",
"Unit_ChannelDescriptor_Positive_Basic_1D - signed long",
"Unit_ChannelDescriptor_Positive_Basic_1D - long1",
"Unit_ChannelDescriptor_Positive_Basic_2D - ulong2",
"Unit_ChannelDescriptor_Positive_Basic_2D - long2",
"Unit_ChannelDescriptor_Positive_Basic_3D - ulong3",
"Unit_ChannelDescriptor_Positive_Basic_3D - long3",
"Unit_ChannelDescriptor_Positive_Basic_4D - ulong4",
"Unit_ChannelDescriptor_Positive_Basic_4D - long4",
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/38 ===",
"Unit_hipFreeAsync_Negative_Parameters",
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/92 ===",
"Unit_hipGetTexObjectResourceDesc_positive",
"Unit_hipGetTexObjectResourceDesc_Negative_Parameters",
"Unit_hipGetTexObjectTextureDesc_positive",
"Unit_hipGetTexObjectTextureDesc_Negative_Parameters",
"Unit_hipTexObjectDestroy_positive",
"=== Below tests tests fail randomly in PSDB ===",
"Unit_hipGraphInstantiateWithFlags_DependencyGraphDeviceCtxtChg",
"Unit_hipGraphUpload_Functional_multidevice_test",
"Unit_hipMemcpyParam2D_multiDevice-D2D - char",
"Unit_hipMemcpyParam2D_multiDevice-D2D - float",
"Unit_hipMemcpyParam2D_multiDevice-D2D - int",
"Unit_hipMemcpyParam2D_multiDevice-D2D - double",
"Unit_hipMemcpyParam2D_multiDevice-D2D - long double",
"Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice - char",
"Unit_hipMemsetFunctional_ZeroValue_hipMemsetD16",
"Unit_hipStreamAttachMemAsync_Negative_Parameters",
"hipStreamPerThread_CoopLaunch",
"hipCGMultiGridGroupType",
"Grid_Group_Getters_Positive_Basic",
"Grid_Group_Getters_Via_Non_Member_Functions_Positive_Basic",
"Grid_Group_Sync_Positive_Basic",
"dynamic_loading_device_kernels_from_library",
"Unit_tiled_partition",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - int",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned int",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned long long",
"Unit_atomicExch_Positive_Same_Address_Compile_Time - float",
"Unit_atomicExch_system_Positive_Host_And_GPU - int",
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicExch_system_Positive_Host_And_GPU - float",
"Unit_hipModuleUnload_Negative_Double_Unload",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
"Unit_Device_Complex_Unary_Negative_Parameters_RTC",
"Unit_Device_Complex_Binary_Negative_Parameters_RTC",
"Unit_Device_Complex_hipCfma_Negative_Parameters_RTC",
"Unit_Device_make_Complex_Negative_Parameters_RTC",
"Unit_Device_Complex_Cast_Negative_Parameters_RTC",
"=== Below 2 tests are disabled due to defect EXSWHTEC-342 ===",
"Unit_hipDeviceSetLimit_Negative_Parameters",
"Unit_hipDeviceGetLimit_Negative_Parameters",
"=== Below tests are failing PSDB ===",
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_3",
"Unit_hipMemPoolSetAccess_Negative_Parameters",
"Unit_hipMallocMipmappedArray_Negative_NumLevels",
"Unit_hipFreeMipmappedArray_Negative_Nullptr",
"Unit_hipFreeMipmappedArrayMultiTArray - int",
"Unit_hipFreeMipmappedArray_Negative_Parameters",
"Unit_hipCreateSurfaceObject_Negative_Parameters",
"Unit_hipDestroySurfaceObject_Negative_Parameters",
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
"Unit_hipModuleLoad_Positive_Basic",
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
"Unit_hipModuleLoadData_Positive_Basic",
"Unit_hipModuleLoadData_Negative_Parameters",
"Unit_hipModuleLoadDataEx_Positive_Basic",
"Unit_hipModuleLoadDataEx_Negative_Parameters",
"Performance_hipMemsetD16",
"Performance_hipMemsetD16Async",
"Performance_hipMemsetD32",
"Performance_hipMemsetD32Async",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy_Positive_Synchronization_Behavior",
"Unit_tex1Dfetch_Positive_ReadModeElementType - char",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned char",
"Unit_tex1Dfetch_Positive_ReadModeElementType - short",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned short",
"Unit_tex1Dfetch_Positive_ReadModeElementType - int",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned int",
"Unit_tex1Dfetch_Positive_ReadModeElementType - float",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemap_Positive_ReadModeElementType - char",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemap_Positive_ReadModeElementType - short",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemap_Positive_ReadModeElementType - int",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemap_Positive_ReadModeElementType - float",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLod_Positive_ReadModeElementType - char",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLod_Positive_ReadModeElementType - short",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLod_Positive_ReadModeElementType - int",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLod_Positive_ReadModeElementType - float",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - char",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapGrad_Positive_ReadModeElementType - short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - int",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapGrad_Positive_ReadModeElementType - float",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - char",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayered_Positive_ReadModeElementType - short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - int",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayered_Positive_ReadModeElementType - float",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - char",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - int",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - float",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - int",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - float",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_hipLaunchKernel_Negative_Parameters",
"Unit_Assert_Positive_Basic_KernelFail",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
"Unit_hipMemImportFromShareableHandle_Positive_MultiProc",
"Unit_hipMemMapArrayAsync_Positive_Basic",
"=== Disabling failing nvidia tests ===",
"Unit_hipDeviceSetLimit_Negative_PrintfFifoSize",
"Unit_hipDeviceSetLimit_Negative_MallocHeapSize",
"=== Disabling tests which no longer behave the same on nvidia platform ===",
"Unit_hipGraphInstantiateWithParams_Negative",
"Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph",
"Unit_hipDeviceSynchronize_Positive_Nullstream",
"Unit_hipDeviceSynchronize_Functional",
"Unit_hipDeviceReset_Positive_Basic",
"Unit_hipDeviceReset_Positive_Threaded",
"Unit_hipModuleGetTexRef_Positive_Basic"
]
}
@@ -0,0 +1,71 @@
{
"DisabledTests": [
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/215 ===",
"Unit_ChannelDescriptor_Positive_Basic_1D - long",
"Unit_ChannelDescriptor_Positive_Basic_1D - unsigned long",
"Unit_ChannelDescriptor_Positive_Basic_1D - ulong1",
"Unit_ChannelDescriptor_Positive_Basic_1D - signed long",
"Unit_ChannelDescriptor_Positive_Basic_1D - long1",
"Unit_ChannelDescriptor_Positive_Basic_2D - ulong2",
"Unit_ChannelDescriptor_Positive_Basic_2D - long2",
"Unit_ChannelDescriptor_Positive_Basic_3D - ulong3",
"Unit_ChannelDescriptor_Positive_Basic_3D - long3",
"Unit_ChannelDescriptor_Positive_Basic_4D - ulong4",
"Unit_ChannelDescriptor_Positive_Basic_4D - long4",
"Unit_hipModuleUnload_Negative_Double_Unload",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
"Unit_Device_Complex_Unary_Negative_Parameters_RTC",
"Unit_Device_Complex_Binary_Negative_Parameters_RTC",
"Unit_Device_Complex_hipCfma_Negative_Parameters_RTC",
"Unit_Device_make_Complex_Negative_Parameters_RTC",
"Unit_Device_Complex_Cast_Negative_Parameters_RTC",
"=== Below 2 tests are disabled due to defect EXSWHTEC-342 ===",
"Unit_hipDeviceSetLimit_Negative_Parameters",
"Unit_hipDeviceGetLimit_Negative_Parameters",
"=== Below tests tests fail in PSDB ===",
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
"Unit_hipModuleLoad_Positive_Basic",
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
"Unit_hipModuleLoadData_Positive_Basic",
"Unit_hipModuleLoadData_Negative_Parameters",
"Unit_hipModuleLoadDataEx_Positive_Basic",
"Unit_hipModuleLoadDataEx_Negative_Parameters",
"Performance_hipMemsetD16",
"Performance_hipMemsetD16Async",
"Performance_hipMemsetD32",
"Performance_hipMemsetD32Async",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy_Positive_Synchronization_Behavior",
"Unit_hipMemMapArrayAsync_Positive_Basic",
"=== SWDEV-475987 : Disable tests to merge hipother change 12/08/2024 ===",
"Unit_hipMalloc3DArray_Negative_InvalidFormat",
"Unit_hipMalloc3DArray_Negative_BadChannelLayout",
"Unit_hipMalloc3DArray_Negative_8BitFloat",
"Unit_hipMalloc3DArray_Negative_DifferentChannelSizes",
"Unit_hipMalloc3DArray_Negative_BadChannelSize",
"Unit_hipMallocMipmappedArray_Negative_InvalidFormat",
"Unit_hipMallocMipmappedArray_Negative_BadChannelLayout",
"Unit_hipMallocMipmappedArray_Negative_8BitFloat",
"Unit_hipMallocMipmappedArray_Negative_DifferentChannelSizes",
"Unit_hipMallocMipmappedArray_Negative_BadChannelSize",
"Unit_hipMallocArray_Negative_DifferentChannelSizes",
"Unit_hipMallocArray_Negative_8bitFloat - float",
"Unit_hipMallocArray_Negative_8bitFloat - float2",
"Unit_hipMallocArray_Negative_8bitFloat - float4",
"Unit_hipMallocArray_Negative_BadNumberOfBits",
"Unit_hipMallocArray_Negative_3ChannelElement",
"Unit_hipMallocArray_Negative_ChannelAfterZeroChannel",
"Unit_hipMallocArray_Negative_InvalidChannelFormat",
"=== Disabling tests which no longer behave the same on nvidia platform ===",
"Unit_hipGraphInstantiateWithParams_Negative",
"Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph",
"Unit_hipDeviceSynchronize_Positive_Nullstream",
"Unit_hipDeviceSynchronize_Functional",
"Unit_hipDeviceReset_Positive_Basic",
"Unit_hipDeviceReset_Positive_Threaded",
"Unit_hipModuleGetTexRef_Positive_Basic"
]
}
@@ -0,0 +1,341 @@
#include <cstdlib>
#include <hip_test_common.hh>
#include <picojson.h>
#include <fstream>
#include <sstream>
#include <regex>
#include "hip_test_context.hh"
#include "hip_test_filesystem.hh"
#include "hip_test_features.hh"
void TestContext::detectOS() {
#if (HT_WIN == 1)
p_windows = true;
#elif (HT_LINUX == 1)
p_linux = true;
#endif
}
void TestContext::detectPlatform() {
#if (HT_AMD == 1)
amd = true;
#elif (HT_NVIDIA == 1)
nvidia = true;
#endif
}
std::string TestContext::substringFound(std::vector<std::string> list, std::string filename) {
std::string match = "";
for (unsigned int i = 0; i < list.size(); i++) {
if (filename.find(list.at(i)) != std::string::npos) {
match = list.at(i);
break;
}
}
return match;
}
std::string TestContext::getCurrentArch() {
#if HT_LINUX
const char* cmd =
"/opt/rocm/bin/rocm_agent_enumerator | awk '$0 != \"gfx000\"' | xargs | sed -e 's/ /;/g' | "
"tr -d '\n'";
std::array<char, 1024> buffer;
std::string result;
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
if (!pipe) {
printf("popen() failed!");
return "";
}
while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
std::string res = buffer.data();
result = res;
}
std::string s_visible_devices = TestContext::getEnvVar("HIP_VISIBLE_DEVICES");
auto parser = [](std::string input, char c) -> std::vector<std::string> {
std::vector<std::string> ret;
auto loc = input.find(c);
while (loc != std::string::npos) {
auto t_str = input.substr(0, loc);
ret.push_back(t_str);
input.erase(0, loc + 1);
loc = input.find(c);
}
if (input.size() > 0) {
ret.push_back(input);
}
return ret;
};
std::vector<std::string> archs = parser(result, ';');
std::vector<std::string> v_visible_devices = parser(s_visible_devices, ',');
std::vector<int> visible_devices;
std::for_each(v_visible_devices.begin(), v_visible_devices.end(),
[&](const std::string& in) { visible_devices.push_back(std::stoi(in)); });
if (archs.size() == 0) {
return ""; // rocm_agent_enum gave us garbage
}
auto first_arch = archs[0];
if (!std::all_of(archs.begin(), archs.end(),
[&](const std::string& in) { return in == first_arch; })) {
// We have multiple archs in rocm_agent_enum
// Check if they are same or not by applying HIP_VISIBLE_DEVICES filter
std::vector<std::string> filtered_archs;
if (visible_devices.size() > 0) {
for (size_t i = 0; i < visible_devices.size(); i++) {
filtered_archs.push_back(archs[visible_devices[i]]);
}
} else {
filtered_archs = archs;
}
auto first_filtered_arch = filtered_archs[0];
if (!std::all_of(filtered_archs.begin(), filtered_archs.end(),
[&](const std::string& in) { return in == first_filtered_arch; })) {
LogPrintf("%s",
"[ERROR] Cannot run tests on Hetrogenous Architecture. Please set "
"HIP_VISIBLE_DEVICES with devices of same arch");
std::abort();
}
return first_filtered_arch;
}
return first_arch;
#else
return "";
#endif
}
std::string TestContext::getMatchingConfigFile(std::string config_dir) {
std::string configFileToUse = "";
if (isLinux() && isAmd()) {
std::string cur_arch = getCurrentArch();
LogPrintf("The arch present: %s", cur_arch.c_str());
configFileToUse = config_dir + "/config_" + getConfig().platform + "_" + getConfig().os + "_" +
cur_arch + ".json";
} else {
configFileToUse =
config_dir + "/config_" + getConfig().platform + "_" + getConfig().os + ".json";
}
if (fs::exists(configFileToUse)) {
return configFileToUse;
}
return "";
}
std::string& TestContext::getCommonJsonFile() {
fs::path config_dir = exe_path;
config_dir = config_dir.parent_path();
int levels = 0;
bool configFolderFound = false;
std::vector<std::string> configList;
std::string configFile;
// check a max of 5 levels down the executable path
while (levels < 5) {
fs::path temp_path = config_dir;
temp_path /= "config";
if (fs::exists(temp_path)) {
config_dir = fs::absolute(temp_path);
configFolderFound = true;
break;
} else {
config_dir = config_dir.parent_path();
levels++;
}
}
// get config.json files if config folder.
if (configFolderFound) {
json_file_ = getMatchingConfigFile(config_dir.string());
}
return json_file_;
}
void TestContext::getConfigFiles() {
config_.platform = (amd ? "amd" : (nvidia ? "nvidia" : "unknown"));
config_.os = (p_windows ? "windows" : (p_linux ? "linux" : "unknown"));
if (config_.os == "unknown" || config_.platform == "unknown") {
LogPrintf("%s", "Either Config or Os is unknown, this wont end well");
abort();
}
std::string env_config = TestContext::getEnvVar("HIP_CATCH_EXCLUDE_FILE");
LogPrintf("Env Config file: %s",
(!env_config.empty()) ? env_config.c_str() : "Not found");
// HIP_CATCH_EXCLUDE_FILE is set for custom file path
if (!env_config.empty()) {
if(fs::exists(env_config)) {
config_.json_files.push_back(env_config);
}
} else {
std::string jsonFile = getCommonJsonFile();
// get common json file
if (jsonFile != "") {
config_.json_files.push_back(getCommonJsonFile());
}
}
for (const auto& fl : config_.json_files) {
LogPrintf("Config file path: %s", fl.c_str());
}
}
TestContext::TestContext(int argc, char** argv) {
detectOS();
detectPlatform();
setExePath(argc, argv);
getConfigFiles();
parseJsonFiles();
parseOptions(argc, argv);
}
void TestContext::setExePath(int argc, char** argv) {
if (argc == 0) return;
fs::path p = std::string(argv[0]);
if (p.has_filename()) p.remove_filename();
exe_path = p.string();
}
bool TestContext::isWindows() const { return p_windows; }
bool TestContext::isLinux() const { return p_linux; }
bool TestContext::isNvidia() const { return nvidia; }
bool TestContext::isAmd() const { return amd; }
void TestContext::parseOptions(int argc, char** argv) {
// Test name is at [1] position
if (argc != 2) return;
current_test = std::string(argv[1]);
}
bool TestContext::skipTest() const {
// Direct Match
auto flags = std::regex::ECMAScript;
for (const auto& i : skip_test) {
auto regex = std::regex(i.c_str(), flags);
if (std::regex_match(current_test, regex)) {
return true;
}
}
// TODO add test case skip as well
return false;
}
std::string TestContext::currentPath() const { return fs::current_path().string(); }
bool TestContext::parseJsonFiles() {
// Check if file exists
for (const auto& fl : config_.json_files) {
if (!fs::exists(fl)) {
LogPrintf("Unable to find the file: %s", fl.c_str());
return true;
}
// Open the file
std::ifstream js_file(fl);
std::string json_str((std::istreambuf_iterator<char>(js_file)), std::istreambuf_iterator<char>());
LogPrintf("Json contents:: %s", json_str.data());
picojson::value v;
std::string err = picojson::parse(v, json_str);
if (err.size() > 1) {
LogPrintf("Error from PicoJson: %s", err.data());
return false;
}
if (!v.is<picojson::object>()) {
LogPrintf("%s", "Data in json is not in correct format, it should be an object");
return false;
}
const picojson::object& o = v.get<picojson::object>();
for (picojson::object::const_iterator i = o.begin(); i != o.end(); ++i) {
// Processing for DisabledTests
if (i->first == "DisabledTests") {
// Value should contain list of values
if (!i->second.is<picojson::array>()) return false;
auto& val = i->second.get<picojson::array>();
for (auto ai = val.begin(); ai != val.end(); ai++) {
std::string tmp = ai->get<std::string>();
std::string newRegexName;
for (const auto& c : tmp) {
if (c == '*')
newRegexName += ".*";
else
newRegexName += c;
}
skip_test.insert(newRegexName);
}
}
}
}
return true;
}
void TestContext::cleanContext() {
for (auto& pair : compiledKernels) {
hipError_t error = hipModuleUnload(pair.second.module);
if (error != hipSuccess) {
throw std::runtime_error("Unable to unload rtc module");
}
}
}
void TestContext::trackRtcState(std::string kernelNameExpression, hipModule_t loadedModule,
hipFunction_t kernelFunction) {
rtcState state{loadedModule, kernelFunction};
compiledKernels[kernelNameExpression] = state;
}
hipFunction_t TestContext::getFunction(const std::string kernelNameExpression) {
auto it{compiledKernels.find(kernelNameExpression)};
if (it != compiledKernels.end()) {
return it->second.kernelFunction;
} else {
return nullptr;
}
}
void TestContext::addResults(HCResult r) {
std::unique_lock<std::mutex> lock(resultMutex);
results.push_back(r);
if ((!r.conditionsResult) ||
((r.result != hipSuccess) && (r.result != hipErrorPeerAccessAlreadyEnabled))) {
hasErrorOccured_.store(true);
}
}
void TestContext::finalizeResults() {
std::unique_lock<std::mutex> lock(resultMutex);
// clear the results whatever happens
std::shared_ptr<void> emptyVec(nullptr, [this](auto) { results.clear(); });
for (const auto& i : results) {
INFO("HIP API Result check\n File:: "
<< i.file << "\n Line:: " << i.line << "\n API:: " << i.call
<< "\n Result:: " << i.result << "\n Result Str:: " << hipGetErrorString(i.result));
REQUIRE(((i.result == hipSuccess) || (i.result == hipErrorPeerAccessAlreadyEnabled) ||
(i.result == hipErrorNotSupported)));
REQUIRE(i.conditionsResult);
}
hasErrorOccured_.store(false); // Clear the flag
}
bool TestContext::hasErrorOccured() { return hasErrorOccured_.load(); }
TestContext::~TestContext() {
// Show this message when there are unchecked results
if (results.size() != 0) {
std::cerr << "HIP_CHECK_THREAD_FINALIZE() has not been called after HIP_CHECK_THREAD\n"
<< "Please call HIP_CHECK_THREAD_FINALIZE after joining threads\n"
<< "There is/are " << results.size() << " unchecked results from threads."
<< std::endl;
std::abort(); // Crash to bring users attention to this message and avoid accidental passing of
// tests without checking for errors
}
}
@@ -0,0 +1,138 @@
#include "hip_test_features.hh"
#include <iostream>
#include <assert.h>
#include <map>
#include "hip_test_context.hh"
std::vector<std::unordered_set<std::string>> GCNArchFeatMap = {
{"gfx90a", "gfx942", "gfx950"}, // CT_FEATURE_FINEGRAIN_HWSUPPORT
{"gfx90a", "gfx942", "gfx950"}, // CT_FEATURE_HMM
{"gfx90a", "gfx942", "gfx950"}, // CT_FEATURE_TEXTURES_NOT_SUPPORTED
};
#if HT_AMD
std::string TrimAndGetGFXName(const std::string& full_gfx_name) {
std::string gfx_name("");
// Split the first part of the delimiter
std::string delimiter = ":";
auto pos = full_gfx_name.find(delimiter);
if (pos == std::string::npos) {
gfx_name = full_gfx_name;
} else {
gfx_name = full_gfx_name.substr(0, pos);
}
assert(gfx_name.substr(0,3) == "gfx");
return gfx_name;
}
#endif
// Check if the GCN Maps
bool CheckIfFeatSupported(enum CTFeatures test_feat, std::string gcn_arch) {
#if HT_NVIDIA
return true; // returning true since feature check does not exist for NV.
#elif HT_AMD
assert(test_feat >= 0 && test_feat < CTFeatures::CT_FEATURE_LAST);
gcn_arch = TrimAndGetGFXName(gcn_arch);
assert(gcn_arch != "");
return (GCNArchFeatMap[test_feat].find(gcn_arch) != GCNArchFeatMap[test_feat].cend());
#else
std::cout<<"Platform has to be either AMD or NVIDIA, asserting..."<<std::endl;
assert(false);
#endif
}
// Return true if agentTarget has corresponding generic target which will be returned in
// genericTarget;
// false, otherwise.
// Note: it will naturely return false on Nvidia device
bool getGenericTarget(const std::string& agentTarget, std::string& genericTarget) {
// The map is subject to change per removing policy
static std::map<std::string, std::string> genericTargetMap{
// "gfx9-generic"
{"gfx900", "gfx9-generic"},
{"gfx902", "gfx9-generic"},
{"gfx904", "gfx9-generic"},
{"gfx906", "gfx9-generic"},
{"gfx909", "gfx9-generic"},
{"gfx90c", "gfx9-generic"},
// "gfx9-4-generic
{"gfx940", "gfx9-4-generic"},
{"gfx941", "gfx9-4-generic"},
{"gfx942", "gfx9-4-generic"},
{"gfx950", "gfx9-4-generic"},
// "gfx10-1-generic"
{"gfx1010", "gfx10-1-generic"},
{"gfx1011", "gfx10-1-generic"},
{"gfx1012", "gfx10-1-generic"},
{"gfx1013", "gfx10-1-generic"},
// "gfx10-3-generic"
{"gfx1030", "gfx10-3-generic"},
{"gfx1031", "gfx10-3-generic"},
{"gfx1032", "gfx10-3-generic"},
{"gfx1033", "gfx10-3-generic"},
{"gfx1034", "gfx10-3-generic"},
{"gfx1035", "gfx10-3-generic"},
{"gfx1036", "gfx10-3-generic"},
// "gfx11-generic"
{"gfx1100", "gfx11-generic"},
{"gfx1101", "gfx11-generic"},
{"gfx1102", "gfx11-generic"},
{"gfx1103", "gfx11-generic"},
{"gfx1150", "gfx11-generic"},
{"gfx1151", "gfx11-generic"},
// "gfx12-generic"
{"gfx1200", "gfx12-generic"},
{"gfx1201", "gfx12-generic"},
};
auto search = genericTargetMap.find(agentTarget);
if (search == genericTargetMap.end()) return false;
genericTarget = search->second;
return true;
}
/*
Return true, if gcnArchName has corresponding generic target;
false, otherwise.
If gcnArchName is nullptr, it will be queried from deviceId;
otherwise, deviceId will be ignored.
The specific arches have the following mapping to generic targets,
Generic GFX11
--offload-arch=gfx11-generic - includes [gfx1100-gfx1103], gfx1150, gfx1151
Generic GFX10.3
--offload-arch=gfx10.3-generic - includes [gfx1030-gfx1036]
Generic GFX10.1
--offload-arch=gfx10.1-generic - includes [gfx1010-gfx1013]
Generic GFX9 / Consumer
--offload-arch=gfx9-generic - includes gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c
Generic GFX9.4 / Data center
--offload-arch=gfx9-4-generic - includes gfx940, gfx941, gfx942, gfx950
*/
bool isGenericTargetSupported(char* gcnArchName, int deviceId) {
hipDeviceProp_t props{};
if (gcnArchName == nullptr) {
if (hipGetDeviceProperties(&props, deviceId) != hipSuccess) return false;
gcnArchName = props.gcnArchName;
}
std::string target{gcnArchName};
std::string genericTarget{};
auto pos = target.find(':');
if (pos != std::string::npos) {
target[pos] = 0;
target.resize(pos);
}
return getGenericTarget(target, genericTarget);
}
@@ -0,0 +1,56 @@
#define CATCH_CONFIG_RUNNER
#include <cmd_options.hh>
#include <hip_test_common.hh>
#include <iostream>
CmdOptions cmd_options;
int main(int argc, char** argv) {
auto& context = TestContext::get(argc, argv);
if (context.skipTest()) {
// CTest uses this regex to figure out if the test has been skipped
std::cout << "HIP_SKIP_THIS_TEST" << std::endl;
return 0;
}
Catch::Session session;
using namespace Catch::clara;
// clang-format off
auto cli = session.cli()
| Opt(cmd_options.iterations, "iterations")
["-I"]["--iterations"]
("Number of iterations used for performance tests (default: 1000)")
| Opt(cmd_options.warmups, "warmups")
["-W"]["--warmups"]
("Number of warmup iterations used for performance tests (default: 100)")
| Opt(cmd_options.no_display)
["-S"]["--no-display"]
("Do not display the output of performance tests")
| Opt(cmd_options.progress)
["-P"]["--progress"]
("Show progress bar when running performance tests")
| Opt(cmd_options.cg_iterations, "cg_iterations")
["-C"]["--cg-iterations"]
("Number of iterations used for cooperative groups sync tests (default: 5)")
| Opt(cmd_options.accuracy_iterations, "accuracy_iterations")
["-A"]["--accuracy-iterations"]
("Number of iterations used for math accuracy tests with randomly generated inputs (default: 2^32)")
| Opt(cmd_options.accuracy_max_memory, "accuracy_max_memory")
["-M"]["--accuracy-max-memory"]
("Percentage of global device memory allowed for math accuracy tests (default: 80%)")
| Opt(cmd_options.reduce_iterations, "reduce_iterations")
["-R"]["--reduce-iterations"]
("Number of iterations for fuzzing reduce operations (default: 1)")
| Opt(cmd_options.reduce_input_size, "reduce_input_size")
["-Z"]["--reduce-input-size"]
("Size of the input for the reduce sync operations performance test (megabytes) (default: 50)")
;
// clang-format on
session.cli(cli);
int out = session.run(argc, argv);
TestContext::get().cleanContext();
return out;
}
@@ -0,0 +1,5 @@
#define CATCH_CONFIG_MAIN
#include <catch.hpp>
#include <cmd_options.hh>
CmdOptions cmd_options;
@@ -0,0 +1,40 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <cstdint>
#include <limits>
struct CmdOptions {
int iterations = 10;
int warmups = 100;
int cg_iterations = 5;
bool no_display = false;
bool progress = false;
uint64_t accuracy_iterations = std::numeric_limits<uint32_t>::max() + 1ull;
uint64_t reduce_iterations = 1;
uint64_t reduce_input_size = 50;
int accuracy_max_memory = 80;
};
extern CmdOptions cmd_options;
@@ -0,0 +1,184 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <optional>
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
struct CPUGrid {
CPUGrid() = default;
CPUGrid(const dim3 grid_dim, const dim3 block_dim)
: grid_dim_{grid_dim},
block_dim_{block_dim},
block_count_{grid_dim.x * grid_dim.y * grid_dim.z},
threads_in_block_count_{block_dim.x * block_dim.y * block_dim.z},
thread_count_{block_count_ * threads_in_block_count_} {}
inline std::optional<unsigned int> thread_rank_in_block(
const unsigned int thread_rank_in_grid) const {
if (thread_rank_in_grid > thread_count_) {
return std::nullopt;
}
return thread_rank_in_grid % threads_in_block_count_;
}
inline std::optional<dim3> block_idx(const unsigned int thread_rank_in_grid) const {
if (thread_rank_in_grid > thread_count_) {
return std::nullopt;
}
dim3 block_idx;
const auto block_rank_in_grid = thread_rank_in_grid / threads_in_block_count_;
block_idx.x = block_rank_in_grid % grid_dim_.x;
block_idx.y = (block_rank_in_grid / grid_dim_.x) % grid_dim_.y;
block_idx.z = block_rank_in_grid / (grid_dim_.x * grid_dim_.y);
return block_idx;
}
inline std::optional<dim3> thread_idx(const unsigned int thread_rank_in_grid) const {
if (thread_rank_in_grid > thread_count_) {
return std::nullopt;
}
dim3 thread_idx;
const auto thread_rank_in_block = thread_rank_in_grid % threads_in_block_count_;
thread_idx.x = thread_rank_in_block % block_dim_.x;
thread_idx.y = (thread_rank_in_block / block_dim_.x) % block_dim_.y;
thread_idx.z = thread_rank_in_block / (block_dim_.x * block_dim_.y);
return thread_idx;
}
dim3 grid_dim_;
dim3 block_dim_;
unsigned int block_count_;
unsigned int threads_in_block_count_;
unsigned int thread_count_;
};
struct CPUMultiGrid {
CPUMultiGrid(const unsigned int num_grids, const dim3 grid_dims[], const dim3 block_dims[]) {
thread_count_ = 0;
grid_count_ = num_grids;
grids_.reserve(grid_count_);
for (int i = 0; i < grid_count_; i++) {
grids_.emplace_back(grid_dims[i], block_dims[i]);
thread_count_ += grids_[i].thread_count_;
}
}
inline unsigned int thread0_rank_in_multi_grid(const unsigned int grid_rank) const {
unsigned int multi_grid_thread_rank_0 = 0;
unsigned int multi_grid_thread_count = 0;
for (int i = 0; i <= grid_rank; i++) {
multi_grid_thread_rank_0 = multi_grid_thread_count;
multi_grid_thread_count += grids_[i].thread_count_;
}
return multi_grid_thread_rank_0;
}
std::vector<CPUGrid> grids_;
unsigned int grid_count_;
unsigned int thread_count_;
};
/* Generate dimensions for 1D, 2D and 3D blocks of threads */
inline dim3 GenerateThreadDimensions() {
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, 0));
const auto multipliers = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3,
1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5};
return GENERATE_COPY(
dim3(1, 1, 1), dim3(props.maxThreadsDim[0], 1, 1), dim3(1, props.maxThreadsDim[1], 1),
dim3(1, 1, props.maxThreadsDim[2]),
map([max = props.maxThreadsDim[0], warp_size = props.warpSize](
double i) { return dim3(std::min(static_cast<int>(i * warp_size), max), 1, 1); },
values(multipliers)),
map([max = props.maxThreadsDim[1], warp_size = props.warpSize](
double i) { return dim3(1, std::min(static_cast<int>(i * warp_size), max), 1); },
values(multipliers)),
map([max = props.maxThreadsDim[2], warp_size = props.warpSize](
double i) { return dim3(1, 1, std::min(static_cast<int>(i * warp_size), max)); },
values(multipliers)),
dim3(16, 8, 8), dim3(32, 32, 1), dim3(64, 8, 2), dim3(16, 16, 3), dim3(props.warpSize - 1, 3, 3),
dim3(props.warpSize + 1, 3, 3));
}
/* Generate dimensions for 1D, 2D and 3D grids of blocks */
inline dim3 GenerateBlockDimensions() {
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, 0));
const auto multipliers = {0.5, 0.9, 1.0, 1.1, 1.5, 1.9, 2.0, 3.0, 4.0};
return GENERATE_COPY(dim3(1, 1, 1),
map([sm = props.multiProcessorCount](
double i) { return dim3(static_cast<int>(i * sm), 1, 1); },
values(multipliers)),
map([sm = props.multiProcessorCount](
double i) { return dim3(1, static_cast<int>(i * sm), 1); },
values(multipliers)),
map([sm = props.multiProcessorCount](
double i) { return dim3(1, 1, static_cast<int>(i * sm)); },
values(multipliers)),
dim3(5, 5, 5));
}
/* Generate dimensions for 1D, 2D and 3D blocks of threads - reduced set */
inline dim3 GenerateThreadDimensionsForShuffle() {
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, 0));
const auto multipliers = {0.5, 0.9, 1.0, 1.5, 2.0};
return GENERATE_COPY(
dim3(1, 1, 1), dim3(props.maxThreadsDim[0], 1, 1), dim3(1, props.maxThreadsDim[1], 1),
dim3(1, 1, props.maxThreadsDim[2]),
map([max = props.maxThreadsDim[0], warp_size = props.warpSize](
double i) { return dim3(std::min(static_cast<int>(i * warp_size), max), 1, 1); },
values(multipliers)),
map([max = props.maxThreadsDim[1], warp_size = props.warpSize](
double i) { return dim3(1, std::min(static_cast<int>(i * warp_size), max), 1); },
values(multipliers)),
map([max = props.maxThreadsDim[2], warp_size = props.warpSize](
double i) { return dim3(1, 1, std::min(static_cast<int>(i * warp_size), max)); },
values(multipliers)),
dim3(16, 8, 8), dim3(32, 32, 1), dim3(64, 8, 2), dim3(16, 16, 3), dim3(props.warpSize - 1, 3, 3),
dim3(props.warpSize + 1, 3, 3));
}
/* Generate dimensions for 1D, 2D and 3D grids of blocks - reduced set */
inline dim3 GenerateBlockDimensionsForShuffle() {
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, 0));
const auto multipliers = {0.5, 1.0};
return GENERATE_COPY(dim3(1, 1, 1),
map([sm = props.multiProcessorCount](
double i) { return dim3(static_cast<int>(i * sm), 1, 1); },
values(multipliers)),
map([sm = props.multiProcessorCount](
double i) { return dim3(1, static_cast<int>(i * sm), 1); },
values(multipliers)),
map([sm = props.multiProcessorCount](
double i) { return dim3(1, 1, static_cast<int>(i * sm)); },
values(multipliers)),
dim3(5, 5, 5));
}
@@ -0,0 +1,26 @@
#include <hip_test_common.hh>
static int HmmAttrPrint() {
int managed = 0;
INFO(
"The following are the attribute values related to HMM for"
" device 0:\n");
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeDirectManagedMemAccessFromHost, 0));
INFO("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed);
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeConcurrentManagedAccess, 0));
INFO("hipDeviceAttributeConcurrentManagedAccess: " << managed);
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributePageableMemoryAccess, 0));
INFO("hipDeviceAttributePageableMemoryAccess: " << managed);
HIP_CHECK(
hipDeviceGetAttribute(&managed, hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0));
INFO("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" << managed);
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0));
INFO("hipDeviceAttributeManagedMemory: " << managed);
if (managed != 1) {
WARN(
"GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute so defaulting to system "
"memory.");
}
return managed;
}
@@ -0,0 +1,334 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip_test_common.hh>
template <class T, size_t N, hipArray_Format Format> struct type_and_size_and_format {
using type = T;
static constexpr size_t size = N;
static constexpr hipArray_Format format = Format;
};
// Create a map of type to scalar type, vector size and scalar type format enum.
// This is useful for creating simpler function that depend on the vector size.
template <typename T> struct vector_info;
template <>
struct vector_info<int> : type_and_size_and_format<int, 1, HIP_AD_FORMAT_SIGNED_INT32> {};
template <> struct vector_info<float> : type_and_size_and_format<float, 1, HIP_AD_FORMAT_FLOAT> {};
template <>
struct vector_info<short> : type_and_size_and_format<short, 1, HIP_AD_FORMAT_SIGNED_INT16> {};
template <>
struct vector_info<char> : type_and_size_and_format<char, 1, HIP_AD_FORMAT_SIGNED_INT8> {};
template <>
struct vector_info<unsigned int>
: type_and_size_and_format<unsigned int, 1, HIP_AD_FORMAT_UNSIGNED_INT32> {};
template <>
struct vector_info<unsigned short>
: type_and_size_and_format<unsigned short, 1, HIP_AD_FORMAT_UNSIGNED_INT16> {};
template <>
struct vector_info<unsigned char>
: type_and_size_and_format<unsigned char, 1, HIP_AD_FORMAT_UNSIGNED_INT8> {};
template <>
struct vector_info<int2> : type_and_size_and_format<int, 2, HIP_AD_FORMAT_SIGNED_INT32> {};
template <> struct vector_info<float2> : type_and_size_and_format<float, 2, HIP_AD_FORMAT_FLOAT> {};
template <>
struct vector_info<short2> : type_and_size_and_format<short, 2, HIP_AD_FORMAT_SIGNED_INT16> {};
template <>
struct vector_info<char2> : type_and_size_and_format<char, 2, HIP_AD_FORMAT_SIGNED_INT8> {};
template <>
struct vector_info<uint2>
: type_and_size_and_format<unsigned int, 2, HIP_AD_FORMAT_UNSIGNED_INT32> {};
template <>
struct vector_info<ushort2>
: type_and_size_and_format<unsigned short, 2, HIP_AD_FORMAT_UNSIGNED_INT16> {};
template <>
struct vector_info<uchar2>
: type_and_size_and_format<unsigned char, 2, HIP_AD_FORMAT_UNSIGNED_INT8> {};
template <>
struct vector_info<int4> : type_and_size_and_format<int, 4, HIP_AD_FORMAT_SIGNED_INT32> {};
template <> struct vector_info<float4> : type_and_size_and_format<float, 4, HIP_AD_FORMAT_FLOAT> {};
template <>
struct vector_info<short4> : type_and_size_and_format<short, 4, HIP_AD_FORMAT_SIGNED_INT16> {};
template <>
struct vector_info<char4> : type_and_size_and_format<char, 4, HIP_AD_FORMAT_SIGNED_INT8> {};
template <>
struct vector_info<uint4>
: type_and_size_and_format<unsigned int, 4, HIP_AD_FORMAT_UNSIGNED_INT32> {};
template <>
struct vector_info<ushort4>
: type_and_size_and_format<unsigned short, 4, HIP_AD_FORMAT_UNSIGNED_INT16> {};
template <>
struct vector_info<uchar4>
: type_and_size_and_format<unsigned char, 4, HIP_AD_FORMAT_UNSIGNED_INT8> {};
template <
typename T,
typename std::enable_if<std::is_scalar<T>::value == false>::type* = nullptr>
static inline __host__ __device__ constexpr int rank() {
return sizeof(T) / sizeof(decltype(T::x));
}
template<
typename T,
typename std::enable_if<rank<T>() == 1>::type* = nullptr>
static inline bool isEqual(const T &val0, const T &val1) {
return val0.x == val1.x;
}
template<
typename T,
typename std::enable_if<rank<T>() == 2>::type* = nullptr>
static inline bool isEqual(const T &val0, const T &val1) {
return val0.x == val1.x &&
val0.y == val1.y;
}
template<
typename T,
typename std::enable_if<rank<T>() == 4>::type* = nullptr>
static inline bool isEqual(const T &val0, const T &val1) {
return val0.x == val1.x &&
val0.y == val1.y &&
val0.z == val1.z &&
val0.w == val1.w;
}
template<
typename T,
typename std::enable_if<std::is_scalar<T>::value>::type* = nullptr>
static inline bool isEqual(const T &val0, const T &val1) {
return val0 == val1;
}
template<
typename T,
typename std::enable_if<rank<T>() == 1>::type* = nullptr>
const std::string getString(const T& t)
{
std::ostringstream os;
if constexpr (std::is_same<decltype(T::x), char>::value ||
std::is_same<decltype(T::x), unsigned char>::value) {
os << "(" << static_cast<int>(t.x) << ")";
} else {
os << "(" << t.x << ")";
}
return os.str();
}
template<
typename T,
typename std::enable_if<rank<T>() == 2>::type* = nullptr>
const std::string getString(const T& t)
{
std::ostringstream os;
if constexpr (std::is_same<decltype(T::x), char>::value ||
std::is_same<decltype(T::x), unsigned char>::value) {
os << "(" << static_cast<int>(t.x) << ", " << static_cast<int>(t.y) << ")";
} else {
os << "(" << t.x << ", " << t.y << ")";
}
return os.str();
}
template<
typename T,
typename std::enable_if<rank<T>() == 3>::type* = nullptr>
const std::string getString(const T& t)
{
std::ostringstream os;
if constexpr (std::is_same<decltype(T::x), char>::value ||
std::is_same<decltype(T::x), unsigned char>::value) {
os << "(" << static_cast<int>(t.x) << ", " << static_cast<int>(t.y) << ", " <<
static_cast<int>(t.z) << ")";
} else {
os << "(" << t.x << ", " << t.y << ", " << t.z << ")";
}
return os.str();
}
template<
typename T,
typename std::enable_if<rank<T>() == 4>::type* = nullptr>
const std::string getString(const T& t)
{
std::ostringstream os;
if constexpr (std::is_same<decltype(T::x), char>::value ||
std::is_same<decltype(T::x), unsigned char>::value) {
os << "(" << static_cast<int>(t.x) << ", " << static_cast<int>(t.y) << ", " <<
static_cast<int>(t.z) << ", " << static_cast<int>(t.w) << ")";
} else {
os << "(" << t.x << ", " << t.y << ", " << t.z << ", " << t.w << ")";
}
return os.str();
}
template<
typename T,
typename std::enable_if<std::is_scalar<T>::value>::type* = nullptr>
std::string getString(const T& t)
{
std::ostringstream os;
if constexpr (std::is_same<T, char>::value ||
std::is_same<T, unsigned char>::value) {
os << static_cast<int>(t);
} else {
os << t;
}
return os.str();
}
template<typename T>
static inline T getRandom() {
double r = 0;
if (std::is_signed<T>::value) {
r = (std::rand() - RAND_MAX / 2.0) / (RAND_MAX / 2.0 + 1.);
} else {
r = std::rand() / (RAND_MAX + 1.);
}
if constexpr (std::is_floating_point<T>::value) {
// Restrict any float within (-1000, 1000)
// to prevent too big float value that would make caculation sick
return static_cast<T>(r * 1000.);
} else {
return static_cast<T>(std::numeric_limits<T>::max() * r);
}
}
template<
typename T,
typename std::enable_if<rank<T>() == 1>::type* = nullptr>
static inline void initVal(T &val) {
val.x = getRandom<decltype(T::x)>();
}
template<
typename T,
typename std::enable_if<rank<T>() == 2>::type* = nullptr>
static inline void initVal(T &val) {
val.x = getRandom<decltype(T::x)>();
val.y = getRandom<decltype(T::x)>();
}
template<
typename T,
typename std::enable_if<rank<T>() == 4>::type* = nullptr>
static inline void initVal(T &val) {
val.x = getRandom<decltype(T::x)>();
val.y = getRandom<decltype(T::x)>();
val.z = getRandom<decltype(T::x)>();
val.w = getRandom<decltype(T::x)>();
}
template<
typename T,
typename std::enable_if<std::is_scalar<T>::value>::type* = nullptr>
static inline void initVal(T &val) {
val = getRandom<T>();
}
/*Convert normalized floatx to typex*/
template <typename T, typename F> inline __device__ T getTypeFromNormalizedFloat(const F &f) {
T t;
if constexpr (std::is_scalar<T>::value)
t = static_cast<T>(f.x * std::numeric_limits<T>::max());
else {
if constexpr (rank<T>() > 0)
t.x = static_cast<decltype(T::x)>(f.x * std::numeric_limits<decltype(T::x)>::max());
if constexpr (rank<T>() > 1)
t.y = static_cast<decltype(T::y)>(f.y * std::numeric_limits<decltype(T::y)>::max());
if constexpr (rank<T>() > 2)
t.z = static_cast<decltype(T::z)>(f.z * std::numeric_limits<decltype(T::z)>::max());
if constexpr (rank<T>() > 3)
t.w = static_cast<decltype(T::w)>(f.w * std::numeric_limits<decltype(T::w)>::max());
}
return t;
}
/*Convert typex to normalized floatx*/
template <class T>
inline auto getNormalizedFloatType(const T &t) {
if constexpr (std::is_scalar<T>::value)
return static_cast<float>(t) / std::numeric_limits<T>::max();
else {
if constexpr (rank<T>() == 1) {
float1 f{static_cast<float>(t.x) / std::numeric_limits<decltype(T::x)>::max()};
return f;
}
if constexpr (rank<T>() == 2) {
float2 f{static_cast<float>(t.x) / std::numeric_limits<decltype(T::x)>::max(),
static_cast<float>(t.y) / std::numeric_limits<decltype(T::y)>::max()};
return f;
}
if constexpr (rank<T>() == 3) {
float3 f{static_cast<float>(t.x) / std::numeric_limits<decltype(T::x)>::max(),
static_cast<float>(t.y) / std::numeric_limits<decltype(T::y)>::max(),
static_cast<float>(t.z) / std::numeric_limits<decltype(T::z)>::max()};
return f;
}
if constexpr (rank<T>() == 4) {
float4 f{static_cast<float>(t.x) / std::numeric_limits<decltype(T::x)>::max(),
static_cast<float>(t.y) / std::numeric_limits<decltype(T::y)>::max(),
static_cast<float>(t.z) / std::numeric_limits<decltype(T::z)>::max(),
static_cast<float>(t.w) / std::numeric_limits<decltype(T::w)>::max()};
return f;
}
}
}
/*Check if T is floatx*/
template <typename T> inline bool constexpr isFloat() {
if constexpr (std::is_scalar<T>::value)
return std::is_floating_point<T>::value;
else {
return std::is_floating_point<decltype(T::x)>::value;
}
return false;
}
template <typename T>
void getStatics(T* data, size_t N, double& mean, double* deviation = nullptr) {
double t = 0;
for (size_t i = 0; i < N; i++)
t += static_cast<double>(data[i]);
mean = t / N;
if (!deviation) return;
double d = 0;
for (size_t i = 0; i < N; i++) {
double delta = data[i] - mean;
d += delta * delta;
}
*deviation = sqrt(d / (N - 1));
}
template <typename T> bool verify(T* data, T* data1, size_t N) {
for (size_t i = 0; i < N; i++) {
if (!isEqual(data[i], data1[i])) {
printf("Difference [ %zu ]:%s ----%s\n", i, getString(data[i]).c_str(),
getString(data1[i]).c_str());
return false;
}
}
return true;
}
@@ -0,0 +1,399 @@
/*
Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include "hip_test_common.hh"
#include "hip_array_common.hh"
#include <iostream>
#include <fstream>
#include <regex>
#include <type_traits>
#define TOL 0.001
#define guarantee(cond, str) \
{ \
if (!(cond)) { \
INFO("guarantee failed: " << str); \
abort(); \
} \
}
namespace HipTest {
template <typename T>
size_t checkVectors(T* A, T* B, T* Out, size_t N, T (*F)(T a, T b), bool expectMatch = true,
bool reportMismatch = true) {
size_t mismatchCount = 0;
size_t firstMismatch = 0;
size_t mismatchesToPrint = 10;
for (size_t i = 0; i < N; i++) {
T expected = F(A[i], B[i]);
if (std::fabs(Out[i] - expected) > TOL) {
if (mismatchCount == 0) {
firstMismatch = i;
}
mismatchCount++;
if ((mismatchCount <= mismatchesToPrint) && expectMatch) {
INFO("Mismatch at " << i << " Computed: " << Out[i] << " Expeted: " << expected);
CHECK(false);
}
}
}
if (reportMismatch) {
if (expectMatch) {
if (mismatchCount) {
INFO(mismatchCount << " Mismatches First Mismatch at index : " << firstMismatch);
REQUIRE(false);
}
} else {
if (mismatchCount == 0) {
INFO("Expected Mismatch but not found any");
REQUIRE(false);
}
}
}
return mismatchCount;
}
template <typename T> // pointer type
bool checkArray(T* hData, T* hOutputData, size_t width, size_t height, size_t depth = 1) {
for (size_t i = 0; i < depth; i++) {
for (size_t j = 0; j < height; j++) {
for (size_t k = 0; k < width; k++) {
int offset = i * width * height + j * width + k;
if (!isEqual(hData[offset], hOutputData[offset])) {
INFO("Mismatch at [" << i << "," << j << "," << k << "]:" << getString(hData[offset])
<< "----" << getString(hOutputData[offset]));
CHECK(false);
return false;
}
}
}
}
return true;
}
template <typename T>
size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch = true,
bool reportMismatch = true) {
return checkVectors<T>(
A_h, B_h, result_H, N, [](T a, T b) { return a + b; }, expectMatch, reportMismatch);
}
template <typename T>
size_t checkVectorSUB(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch = true,
bool reportMismatch = true) {
return checkVectors<T>(
A_h, B_h, result_H, N, [](T a, T b) { return a - b; }, expectMatch, reportMismatch);
}
template <typename T>
void checkTest(T* expected_H, T* result_H, size_t N, bool expectMatch = true) {
checkVectors<T>(
expected_H, expected_H, result_H, N,
[](T a, T b) {
guarantee(a == b, "Both values should be equal");
return a;
},
expectMatch);
}
// Setters and Memory Management
template <typename T> void setDefaultData(size_t numElements, T* A_h, T* B_h, T* C_h) {
// Initialize the host data:
for (size_t i = 0; i < numElements; i++) {
if (std::is_same<T, int>::value || std::is_same<T, unsigned int>::value) {
if (A_h) A_h[i] = 3;
if (B_h) B_h[i] = 4;
if (C_h) C_h[i] = 5;
} else if (std::is_same<T, char>::value || std::is_same<T, unsigned char>::value) {
if (A_h) A_h[i] = 'a';
if (B_h) B_h[i] = 'b';
if (C_h) C_h[i] = 'c';
} else {
if (A_h) A_h[i] = 3.146f + i;
if (B_h) B_h[i] = 1.618f + i;
if (C_h) C_h[i] = 1.4f + i;
}
}
}
template <typename T>
bool initArraysForHost(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) {
size_t Nbytes = N * sizeof(T);
if (usePinnedHost) {
if (A_h) {
HIP_CHECK(hipHostMalloc((void**)A_h, Nbytes));
}
if (B_h) {
HIP_CHECK(hipHostMalloc((void**)B_h, Nbytes));
}
if (C_h) {
HIP_CHECK(hipHostMalloc((void**)C_h, Nbytes));
}
} else {
if (A_h) {
*A_h = (T*)malloc(Nbytes);
REQUIRE(*A_h != nullptr);
}
if (B_h) {
*B_h = (T*)malloc(Nbytes);
REQUIRE(*B_h != nullptr);
}
if (C_h) {
*C_h = (T*)malloc(Nbytes);
REQUIRE(*C_h != nullptr);
}
}
setDefaultData(N, A_h ? *A_h : nullptr, B_h ? *B_h : nullptr, C_h ? *C_h : nullptr);
return true;
}
template <typename T>
bool initArrays(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h, size_t N,
bool usePinnedHost = false) {
size_t Nbytes = N * sizeof(T);
if (A_d) {
HIP_CHECK(hipMalloc(A_d, Nbytes));
}
if (B_d) {
HIP_CHECK(hipMalloc(B_d, Nbytes));
}
if (C_d) {
HIP_CHECK(hipMalloc(C_d, Nbytes));
}
return initArraysForHost(A_h, B_h, C_h, N, usePinnedHost);
}
// Threaded version of setDefaultData to be called from multi thread tests
// Call HIP_CHECK_THREAD_FINALIZE after joining
template <typename T> void setDefaultDataT(size_t numElements, T* A_h, T* B_h, T* C_h) {
// Initialize the host data:
for (size_t i = 0; i < numElements; i++) {
if (std::is_same<T, int>::value || std::is_same<T, unsigned int>::value) {
if (A_h) A_h[i] = 3;
if (B_h) B_h[i] = 4;
if (C_h) C_h[i] = 5;
} else if (std::is_same<T, char>::value || std::is_same<T, unsigned char>::value) {
if (A_h) A_h[i] = 'a';
if (B_h) B_h[i] = 'b';
if (C_h) C_h[i] = 'c';
} else {
if (A_h) A_h[i] = 3.146f + i;
if (B_h) B_h[i] = 1.618f + i;
if (C_h) C_h[i] = 1.4f + i;
}
}
}
// Threaded version of initArraysForHost to be called from multi thread tests
// Call HIP_CHECK_THREAD_FINALIZE after joining
template <typename T>
void initArraysForHostT(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) {
size_t Nbytes = N * sizeof(T);
if (usePinnedHost) {
if (A_h) {
HIP_CHECK_THREAD(hipHostMalloc((void**)A_h, Nbytes));
}
if (B_h) {
HIP_CHECK_THREAD(hipHostMalloc((void**)B_h, Nbytes));
}
if (C_h) {
HIP_CHECK_THREAD(hipHostMalloc((void**)C_h, Nbytes));
}
} else {
if (A_h) {
*A_h = (T*)malloc(Nbytes);
REQUIRE_THREAD(*A_h != nullptr);
}
if (B_h) {
*B_h = (T*)malloc(Nbytes);
REQUIRE_THREAD(*B_h != nullptr);
}
if (C_h) {
*C_h = (T*)malloc(Nbytes);
REQUIRE_THREAD(*C_h != nullptr);
}
}
setDefaultDataT(N, A_h ? *A_h : nullptr, B_h ? *B_h : nullptr, C_h ? *C_h : nullptr);
}
// Threaded version of initArrays to be called from multi thread tests
// Call HIP_CHECK_THREAD_FINALIZE after joining
template <typename T>
void initArraysT(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h, size_t N,
bool usePinnedHost = false) {
size_t Nbytes = N * sizeof(T);
if (A_d) {
HIP_CHECK_THREAD(hipMalloc(A_d, Nbytes));
}
if (B_d) {
HIP_CHECK_THREAD(hipMalloc(B_d, Nbytes));
}
if (C_d) {
HIP_CHECK_THREAD(hipMalloc(C_d, Nbytes));
}
initArraysForHostT(A_h, B_h, C_h, N, usePinnedHost);
}
// Threaded version of freeArraysForHost to be called from multi thread tests
// Call HIP_CHECK_THREAD_FINALIZE after joining
template <typename T> void freeArraysForHostT(T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
if (usePinnedHost) {
if (A_h) {
HIP_CHECK_THREAD(hipHostFree(A_h));
}
if (B_h) {
HIP_CHECK_THREAD(hipHostFree(B_h));
}
if (C_h) {
HIP_CHECK_THREAD(hipHostFree(C_h));
}
} else {
if (A_h) {
free(A_h);
}
if (B_h) {
free(B_h);
}
if (C_h) {
free(C_h);
}
}
}
template <typename T> bool freeArraysForHost(T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
if (usePinnedHost) {
if (A_h) {
HIP_CHECK(hipHostFree(A_h));
}
if (B_h) {
HIP_CHECK(hipHostFree(B_h));
}
if (C_h) {
HIP_CHECK(hipHostFree(C_h));
}
} else {
if (A_h) {
free(A_h);
}
if (B_h) {
free(B_h);
}
if (C_h) {
free(C_h);
}
}
return true;
}
template <typename T>
void freeArraysT(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
if (A_d) {
HIP_CHECK_THREAD(hipFree(A_d));
}
if (B_d) {
HIP_CHECK_THREAD(hipFree(B_d));
}
if (C_d) {
HIP_CHECK_THREAD(hipFree(C_d));
}
freeArraysForHostT(A_h, B_h, C_h, usePinnedHost);
}
template <typename T>
bool freeArrays(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
if (A_d) {
HIP_CHECK(hipFree(A_d));
}
if (B_d) {
HIP_CHECK(hipFree(B_d));
}
if (C_d) {
HIP_CHECK(hipFree(C_d));
}
return freeArraysForHost(A_h, B_h, C_h, usePinnedHost);
}
template <typename T>
static bool assemblyFile_Verification(std::string assemfilename, std::string inst) {
std::string filePath = "./catch/unit/deviceLib/";
bool result = false;
std::string filename;
filename = filePath + assemfilename;
std::ifstream file(filename.c_str(), std::ios::out);
if (file) {
std::string line;
int line_pos = 0, start_pos = 0;
int last_pos = 0;
int start_match = 0;
while (getline(file, line)) {
line_pos++;
if ((std::is_same<T, float>::value)) {
if (!start_pos && std::regex_search(line, std::regex("Begin function (.*)AtomicCheck"))) {
start_pos = line_pos;
}
if (!last_pos && std::regex_search(line, std::regex(".Lfunc_end0-(.*)AtomicCheck"))) {
last_pos = line_pos;
break;
}
} else {
if ((start_match != 2) &&
std::regex_search(line, std::regex("Begin function (.*)AtomicCheck"))) {
start_match++;
if (start_match == 2) start_pos = line_pos;
}
if (!last_pos && std::regex_search(line, std::regex("func_end1-(.*)AtomicCheck"))) {
last_pos = line_pos;
break;
}
}
if (start_pos) {
result = std::regex_search(line, std::regex(inst));
if (result) break;
}
}
} else {
result = true;
SUCCEED("Assembly file does not exist");
}
return result;
}
} // namespace HipTest
@@ -0,0 +1,618 @@
/*
Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#pragma clang diagnostic ignored "-Wsign-compare"
#include "hip_test_context.hh"
#include <catch.hpp>
#include <atomic>
#include <chrono>
#include <cstring>
#include <cstdlib>
#include <iostream>
#include <iomanip>
#include <mutex>
#include <cstdlib>
#include <thread>
#include "hip_test_features.hh"
#ifdef TEST_CLOCK_CYCLE
#define clock_function() clock64()
#else
#define clock_function() wall_clock64()
#endif
#define HIP_PRINT_STATUS(status) INFO(hipGetErrorName(status) << " at line: " << __LINE__);
// Not thread-safe
#define HIP_CHECK(error) \
{ \
hipError_t localError = error; \
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
INFO("Error: " << hipGetErrorString(localError) << "\n Code: " << localError \
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
<< "\n At line: " << __LINE__); \
REQUIRE(false); \
} \
}
#define HIP_CHECK_IGNORED_RETURN(error, ignoredError) \
{ \
hipError_t localError = error; \
if ((localError == ignoredError)) { \
INFO("Skipped: " << hipGetErrorString(localError) << "\n Code: " << localError \
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
<< "\n At line: " << __LINE__); \
return; \
} \
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
INFO("Error: " << hipGetErrorString(localError) << "\n Code: " << localError \
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
<< "\n At line: " << __LINE__); \
REQUIRE(false); \
} \
}
// Threaded HIP_CHECKs
#define HIP_CHECK_THREAD(error) \
{ \
/*To see if error has occured in previous threads, stop execution */ \
if (TestContext::get().hasErrorOccured() == true) { \
return; /*This will only work with std::thread and not with std::async*/ \
} \
auto localError = error; \
HCResult result(__LINE__, __FILE__, localError, #error); \
TestContext::get().addResults(result); \
}
#define REQUIRE_THREAD(condition) \
{ \
/*To see if error has occured in previous threads, stop execution */ \
if (TestContext::get().hasErrorOccured() == true) { \
return; /*This will only work with std::thread and not with std::async*/ \
} \
auto localResult = (condition); \
HCResult result(__LINE__, __FILE__, hipSuccess, #condition, localResult); \
TestContext::get().addResults(result); \
}
// Do not call before all threads have joined
#define HIP_CHECK_THREAD_FINALIZE() \
{ TestContext::get().finalizeResults(); }
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
#define HIP_CHECK_ERROR(errorExpr, expectedError) \
{ \
hipError_t localError = errorExpr; \
INFO("Matching Errors: " \
<< "\n Expected Error: " << hipGetErrorString(expectedError) \
<< "\n Expected Code: " << expectedError << '\n' \
<< " Actual Error: " << hipGetErrorString(localError) \
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
REQUIRE(localError == expectedError); \
}
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError or
// expectedError1.
#define HIP_CHECK_ERRORS(errorExpr, expectedError, expectedError1) \
{ \
hipError_t localError = errorExpr; \
INFO("Matching Errors: " \
<< "\n Expected Error: " << hipGetErrorString(expectedError) \
<< "\n Expected Code: " << expectedError << " or " << expectedError << '\n' \
<< " Actual Error: " << hipGetErrorString(localError) \
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
REQUIRE((localError == expectedError || localError == expectedError1)); \
}
// Not thread-safe
#define HIPRTC_CHECK(error) \
{ \
auto localError = error; \
if (localError != HIPRTC_SUCCESS) { \
INFO("Error: " << hiprtcGetErrorString(localError) << "\n Code: " << localError \
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
<< "\n At line: " << __LINE__); \
REQUIRE(false); \
} \
}
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \
{ \
auto localError = errorExpr; \
INFO("Matching Errors: " \
<< "\n Expected Error: " << hiprtcGetErrorString(expectedError) \
<< "\n Expected Code: " << expectedError << '\n' \
<< " Actual Error: " << hiprtcGetErrorString(localError) \
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
REQUIRE(localError == expectedError); \
}
// Although its assert, it will be evaluated at runtime
#define HIP_ASSERT(x) \
{ REQUIRE((x)); }
#define HIPCHECK(error) \
{ \
hipError_t localError = error; \
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(localError), localError, \
#error, __FILE__, __LINE__); \
abort(); \
} \
}
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \
{ \
auto localError = errorExpr; \
INFO("Matching Errors: " \
<< "\n Expected Error: " << hiprtcGetErrorString(expectedError) \
<< "\n Expected Code: " << expectedError << '\n' \
<< " Actual Error: " << hiprtcGetErrorString(localError) \
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
REQUIRE(localError == expectedError); \
}
#define HIPASSERT(condition) \
if (!(condition)) { \
printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__); \
abort(); \
}
#if HT_NVIDIA
#define CTX_CREATE() \
hipCtx_t context; \
initHipCtx(&context);
#define CTX_DESTROY() HIPCHECK(hipCtxDestroy(context));
#define ARRAY_DESTROY(array) HIPCHECK(hipArrayDestroy(array));
#define HIP_TEX_REFERENCE hipTexRef
#define HIP_ARRAY hipArray_t
static void initHipCtx(hipCtx_t* pcontext) {
HIPCHECK(hipInit(0));
hipDevice_t device;
HIPCHECK(hipDeviceGet(&device, 0));
HIPCHECK(hipCtxCreate(pcontext, 0, device));
}
#else
#define CTX_CREATE()
#define CTX_DESTROY()
#define ARRAY_DESTROY(array) HIPCHECK(hipFreeArray(array));
#define HIP_TEX_REFERENCE textureReference*
#define HIP_ARRAY hipArray_t
#endif
static inline int getWarpSize() {
#if HT_NVIDIA
return 32;
#elif HT_AMD
int device = -1;
int warpSize = -1;
HIP_CHECK(hipGetDevice(&device));
HIP_CHECK(hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize, device));
return warpSize;
#else
std::cout<<"Have to be either Nvidia or AMD platform, asserting"<<std::endl;
assert(false);
#endif
}
static inline bool IsGfx11() {
#if HT_NVIDIA
return false;
#elif HT_AMD
int device = -1;
hipDeviceProp_t props{};
HIP_CHECK(hipGetDevice(&device));
HIP_CHECK(hipGetDeviceProperties(&props, device));
// Get GCN Arch Name and compare to check if it is gfx11
std::string arch = std::string(props.gcnArchName);
auto pos = arch.find("gfx11");
if (pos != std::string::npos)
return true;
else
return false;
#else
std::cout << "Have to be either Nvidia or AMD platform, asserting" << std::endl;
assert(false);
#endif
}
// Utility Functions
namespace HipTest {
static inline int getDeviceCount() {
int dev = 0;
HIP_CHECK(hipGetDeviceCount(&dev));
return dev;
}
// Returns the current system time in microseconds
static inline long long get_time() {
return std::chrono::high_resolution_clock::now().time_since_epoch() /
std::chrono::microseconds(1);
}
static inline double elapsed_time(long long startTimeUs, long long stopTimeUs) {
return ((double)(stopTimeUs - startTimeUs)) / ((double)(1000));
}
static inline unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) {
int device{0};
HIP_CHECK(hipGetDevice(&device));
hipDeviceProp_t props{};
HIP_CHECK(hipGetDeviceProperties(&props, device));
unsigned blocks = props.multiProcessorCount * blocksPerCU;
if (blocks * threadsPerBlock < N) {
blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
}
return blocks;
}
// Threaded version of setNumBlocks - to be used in multi threaded test
// Why? because catch2 does not support multithreaded macro calls
// Make sure you call HIP_CHECK_THREAD_FINALIZE after your threads join
// Also you can not return in threaded functions, due to how HIP_CHECK_THREAD works
static inline void setNumBlocksThread(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N,
unsigned& blocks) {
int device{0};
blocks = 0; // incase error has occured in some other thread and the next call might not execute,
// we set the blocks size to 0
HIP_CHECK_THREAD(hipGetDevice(&device));
hipDeviceProp_t props{};
HIP_CHECK_THREAD(hipGetDeviceProperties(&props, device));
blocks = props.multiProcessorCount * blocksPerCU;
if (blocks * threadsPerBlock > N) {
blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
}
}
static inline int RAND_R(unsigned* rand_seed) {
#if defined(_WIN32) || defined(_WIN64)
srand(*rand_seed);
return rand();
#else
return rand_r(rand_seed);
#endif
}
inline bool isImageSupported() {
int imageSupport = 1;
#if HT_AMD
int device;
HIP_CHECK(hipGetDevice(&device));
HIPCHECK(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport, device));
#endif
return imageSupport != 0;
}
inline bool isPcieAtomicsSupported() {
int pcieAtomics = 1;
int device;
HIP_CHECK(hipGetDevice(&device));
HIPCHECK(hipDeviceGetAttribute(&pcieAtomics, hipDeviceAttributeHostNativeAtomicSupported,
device));
return pcieAtomics != 0;
}
inline bool isP2PSupported(int& d1, int& d2) {
int num_devices = HipTest::getDeviceCount();
int supported = 1;
for (auto i = 0u; i < num_devices; ++i) {
int canAccess = 0;
for (auto j = 0u; j < num_devices; ++j) {
if (i != j) {
HIP_CHECK(hipDeviceCanAccessPeer(&canAccess, i, j));
if (!canAccess) {
supported = 0;
d1 = i;
d2 = j;
break;
}
}
}
}
return supported;
}
inline bool areWarpMatchFunctionsSupported() {
int matchFunctionsSupported = 1;
#if HT_NVIDIA
int device;
hipDeviceProp_t prop;
HIP_CHECK(hipGetDevice(&device));
HIP_CHECK(hipGetDeviceProperties(&prop, device));
if (prop.major < 7) {
matchFunctionsSupported = 0;
}
#endif
return matchFunctionsSupported != 0;
}
/**
* Causes the test to stop and be skipped at runtime.
* reason: Message describing the reason the test has been skipped.
*/
static inline void HIP_SKIP_TEST(char const* const reason) noexcept {
// ctest is setup to parse for "HIP_SKIP_THIS_TEST", at which point it will skip the test.
std::cout << "Skipping test. Reason: " << reason << '\n' << "HIP_SKIP_THIS_TEST" << std::endl;
}
/**
* @brief Helper template that returns the expected arguments of a kernel.
*
* @return constexpr std::tuple<FArgs...> the expected arguments of the kernel.
*/
template <typename... FArgs> std::tuple<FArgs...> getExpectedArgs(void(FArgs...)){};
/**
* @brief Asserts that the types of the arguments of a function match exactly with the types in the
* function signature.
* This is necessary because HIP RTC does not do implicit casting of the kernel
* parameters.
* In order to get the kernel function signature, this function should only called when
* RTC is disabled.
*
* @tparam F the kernel function
* @tparam Args the parameters that will be passed to the kernel.
*/
template <typename F, typename... Args> void validateArguments(F f, Args...) {
using expectedArgsTuple = decltype(getExpectedArgs(f));
static_assert(std::is_same<expectedArgsTuple, std::tuple<Args...>>::value,
"Kernel arguments types must match exactly!");
}
/**
* @brief Launch a kernel using either HIP or HIP RTC.
*
* @tparam Typenames A list of typenames used by the kernel (unused if the kernel is not a
* template).
* @tparam K The kernel type. Expects a function or template when RTC is disabled. Expects a
* function pointer instead when RTC is enabled.
* @tparam Dim Can be either dim3 or int.
* @tparam Args A list of kernel arguments to be forwarded.
* @param kernel The kernel to be launched (defined in kernels.hh)
* @param numBlocks
* @param numThreads
* @param memPerBlock
* @param stream
* @param packedArgs A list of kernel arguments to be forwarded.
*/
template <typename... Typenames, typename K, typename Dim, typename... Args>
void launchKernel(K kernel, Dim numBlocks, Dim numThreads, std::uint32_t memPerBlock,
hipStream_t stream, Args&&... packedArgs) {
#ifndef RTC_TESTING
validateArguments(kernel, packedArgs...);
kernel<<<numBlocks, numThreads, memPerBlock, stream>>>(std::forward<Args>(packedArgs)...);
#else
launchRTCKernel<Typenames...>(kernel, numBlocks, numThreads, memPerBlock, stream,
std::forward<Args>(packedArgs)...);
#endif
HIP_CHECK(hipGetLastError());
}
//---
struct Pinned {
static const bool isPinned = true;
static const char* str() { return "Pinned"; };
static void* Alloc(size_t sizeBytes) {
void* p;
HIPCHECK(hipHostMalloc((void**)&p, sizeBytes));
return p;
};
};
//---
struct Unpinned {
static const bool isPinned = false;
static const char* str() { return "Unpinned"; };
static void* Alloc(size_t sizeBytes) {
void* p = malloc(sizeBytes);
HIPASSERT(p);
return p;
};
};
struct Memcpy {
static const char* str() { return "Memcpy"; };
};
struct MemcpyAsync {
static const char* str() { return "MemcpyAsync"; };
};
template <typename C> struct MemTraits;
template <> struct MemTraits<Memcpy> {
static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
hipStream_t stream) {
(void)stream;
HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind));
}
};
template <> struct MemTraits<MemcpyAsync> {
static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
hipStream_t stream) {
HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream));
}
};
class BlockingContext {
std::atomic_bool blocked{true};
hipStream_t stream;
public:
BlockingContext(hipStream_t s) : blocked(true), stream(s) {}
BlockingContext(const BlockingContext& in) {
blocked = in.blocked_val();
stream = in.stream_val();
}
BlockingContext(const BlockingContext&& in) {
blocked = in.blocked_val();
stream = in.stream_val();
}
void reset() { blocked = true; }
BlockingContext& operator=(const BlockingContext& in) {
blocked = in.blocked_val();
stream = in.stream_val();
return *this;
}
void block_stream() {
blocked = true;
auto blocking_callback = [](hipStream_t, hipError_t, void* data) {
auto blocked = reinterpret_cast<std::atomic_bool*>(data);
while (blocked->load()) {
// Yield this thread till we are waiting
std::this_thread::yield();
}
};
HIP_CHECK(hipStreamAddCallback(stream, blocking_callback, (void*)&blocked, 0));
}
void unblock_stream() {
blocked = false;
}
bool is_blocked() const { return hipStreamQuery(stream) == hipErrorNotReady; }
bool blocked_val() const { return blocked.load(); }
hipStream_t stream_val() const { return stream; }
};
} // namespace HipTest
// This must be called in the beginning of image test app's main() to indicate whether image
// is supported.
#define CHECK_IMAGE_SUPPORT \
if (!HipTest::isImageSupported()) { \
INFO("Texture is not support on the device. Skipped."); \
return; \
}
// This must be called in host-device memory conherency tests
#define CHECK_PCIE_ATOMICS_SUPPORT \
if (!HipTest::isPcieAtomicsSupported()) { \
INFO("Pcie atomics is not support on the device. Skipped."); \
return; \
}
#define CHECK_P2P_SUPPORT \
int d1, d2; \
if (!HipTest::isP2PSupported(d1,d2)) { \
std::string msg = "P2P access check failed between dev1:" + std::to_string(d1) + ",dev2:" + \
std::to_string(d2); \
HipTest::HIP_SKIP_TEST(msg.c_str()); \
return; \
} \
// This must be called in the beginning of warp test app's main() to indicate warp match functions
// are supported.
#define CHECK_WARP_MATCH_FUNCTIONS_SUPPORT \
if (!HipTest::areWarpMatchFunctionsSupported()) { \
INFO("Warp Match Functions are not support on the device. Skipped."); \
return; \
}
// Call GENERATE_CAPTURE macro at the start of the test, before using BEGIN/END_CAPTURE.
// Use BEGIN/END_CAPTURE macros to execute APIs in both stream capturing and non-capturing modes.
// Place BEGIN_CAPTURE before the API call and END_CAPTURE after the call.
#define GENERATE_CAPTURE() bool capture = GENERATE(true, false);
#define BEGIN_CAPTURE(stream) \
if (capture && stream != nullptr) { \
hipStreamCaptureMode flags = GENERATE( \
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); \
HIP_CHECK(hipStreamBeginCapture(stream, flags)); \
}
#define END_CAPTURE(stream) \
if (capture && stream != nullptr) { \
hipGraph_t graph = nullptr; \
hipGraphExec_t graph_exec = nullptr; \
HIP_CHECK(hipStreamEndCapture(stream, &graph)); \
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); \
HIP_CHECK(hipGraphLaunch(graph_exec, stream)); \
HIP_CHECK(hipGraphExecDestroy(graph_exec)); \
HIP_CHECK(hipGraphDestroy(graph)); \
}
// These macros are used for testing behaviour when sync APIs are being captured. Before
// calling BEGIN_CAPTURE_SYNC, hipError_t variable (capture_err) should be initialized to hipSuccess
// and passed to this macro. The scenario with using this macro should look like this:
// 1. BEGIN_CAPTURE_SYNC(capture_err)
// 2. HIP_CHECK_ERROR(SyncAPI, capture_err)
// 3. END_CAPTURE_SYNC(capture_err)
// Some sync APIs are allowed in relaxed capture mode which is indicated with
// rlx_mode_allowed variable. For other two modes, those APIs return
// hipErrorStreamCaptureUnsupported. These macros shouldn't be used with hipStreamSync and
// hipDeviceSync during capture.
#define BEGIN_CAPTURE_SYNC(capture_err, rlx_mode_allowed) \
hipStream_t stream; \
GENERATE_CAPTURE(); \
if (capture) { \
HIP_CHECK(hipStreamCreate(&stream)); \
hipStreamCaptureMode mode = GENERATE( \
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); \
HIP_CHECK(hipStreamBeginCapture(stream, mode)); \
if (!rlx_mode_allowed) { \
capture_err = hipErrorStreamCaptureImplicit; \
} else if (mode != hipStreamCaptureModeRelaxed) { \
capture_err = hipErrorStreamCaptureUnsupported; \
} \
}
// If test has other HIP API calls that depend on sync call that is captured and fails, the rest of
// the test (except freeing the memory) should be skipped after calling END_CAPTURE_SYNC() by
// testing if previously created hipError_t variable (capture_err) doesn't equal hipSuccess.
#define END_CAPTURE_SYNC(capture_err) \
if (capture) { \
hipGraph_t graph; \
hipError_t stream_err = hipSuccess; \
if (capture_err != hipSuccess) { \
stream_err = hipErrorStreamCaptureInvalidated; \
} \
HIP_CHECK_ERROR(hipStreamEndCapture(stream, &graph), stream_err); \
if (graph != nullptr) { \
HIP_CHECK(hipGraphDestroy(graph)); \
} \
HIP_CHECK(hipStreamDestroy(stream)); \
}
@@ -0,0 +1,197 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip/hip_runtime.h>
#include <hip/hiprtc.h>
#include <atomic>
#include <mutex>
#include <vector>
#include <iostream>
#include <string>
#include <set>
#include <unordered_map>
// OS Check
#if defined(_WIN32)
#define HT_WIN 1
#define HT_LINUX 0
#elif defined(__linux__)
#define HT_WIN 0
#define HT_LINUX 1
#else
#error "OS not recognized"
#endif
// Platform check
#if defined(__HIP_PLATFORM_AMD__)
#define HT_AMD 1
#define HT_NVIDIA 0
#elif defined(__HIP_PLATFORM_NVIDIA__)
#define HT_AMD 0
#define HT_NVIDIA 1
#else
#error "Platform not recognized"
#endif
typedef struct Config_ {
std::vector<std::string> json_files; // Json files
std::string platform; // amd/nvidia
std::string os; // windows/linux
} Config;
// Store Multi threaded results
struct HCResult {
size_t line; // Line of check (HIP_CHECK_THREAD or REQUIRE_THREAD)
std::string file; // File name of the check
hipError_t result; // hipResult for HIP_CHECK_THREAD, for conditions its hipSuccess
std::string call; // Call of HIP API or a bool condition
bool conditionsResult; // If bool condition, result of call. For HIP Calls its true
HCResult(size_t l, std::string f, hipError_t r, std::string c, bool b = true)
: line(l), file(f), result(r), call(c), conditionsResult(b) {}
};
class TestContext {
bool p_windows = false, p_linux = false; // OS
bool amd = false, nvidia = false; // HIP Platform
std::string exe_path;
std::string current_test;
std::set<std::string> skip_test;
std::string json_file_;
std::vector<std::string> platform_list_ = {"amd", "nvidia"};
std::vector<std::string> os_list_ = {"windows", "linux", "all"};
std::vector<std::string> amd_arch_list_ = {};
struct rtcState {
hipModule_t module;
hipFunction_t kernelFunction;
};
std::unordered_map<std::string, rtcState> compiledKernels{};
Config config_;
std::string& getCommonJsonFile();
std::string substringFound(std::vector<std::string> list, std::string filename);
void detectOS();
void detectPlatform();
void getConfigFiles();
void setExePath(int, char**);
void parseOptions(int, char**);
bool parseJsonFiles();
std::string getMatchingConfigFile(std::string config_dir);
std::string getCurrentArch();
const Config& getConfig() const { return config_; }
TestContext(int argc, char** argv);
// Multi threaded checks helpers
std::mutex resultMutex;
std::vector<HCResult> results; // Multi threaded test results buffer
std::atomic<bool> hasErrorOccured_{false};
public:
static TestContext& get(int argc = 0, char** argv = nullptr) {
static TestContext instance(argc, argv);
return instance;
}
static std::string getEnvVar(std::string var) {
#if defined(_WIN32)
constexpr rsize_t MAX_LEN = 4096;
char dstBuf[MAX_LEN];
size_t dstSize;
if (!::getenv_s(&dstSize, dstBuf, MAX_LEN, var.c_str())) {
return std::string(dstBuf);
}
#elif defined(__linux__)
char* val = std::getenv(var.c_str());
if (val != NULL) {
return std::string(val);
}
#else
#error "OS not recognized"
#endif
return std::string("");
}
bool isWindows() const;
bool isLinux() const;
bool isNvidia() const;
bool isAmd() const;
bool skipTest() const;
const std::string& getCurrentTest() const { return current_test; }
std::string currentPath() const;
// Multi threaded results helpers
void addResults(HCResult r); // Add multi threaded results
void finalizeResults(); // Validate on all results
bool hasErrorOccured(); // Query if error has occured
/**
* @brief Unload all loaded modules.
* Note: This function needs to be called at the end of each test that uses RTC.
* It is not possible to unload the loaded modules without adding explicit code to the end
* of each test. This function exists only to provide a clean way to exit a test when using RTC.
* However, not unloading a module explicitly shouldn't have any effect on the outcome of
* the test.
*/
void cleanContext();
/**
* @brief Keeps track of all the already compiled rtc kernels.
*
* @param kernelNameExpression The name expression (e.g. hipTest::vectorADD<float>).
* @param loadedModule The loaded module.
* @param kernelFunction The hipFunction that will be used to run the kernel in the future.
*/
void trackRtcState(std::string kernelNameExpression, hipModule_t loadedModule,
hipFunction_t kernelFunction);
/**
* @brief Get the already compiled hip rtc kernel function if it exists.
*
* @param kernelNameExpression The name expression (e.g. hipTest::vectorADD<float>).
* @return the hipFunction if it exists. nullptr otherwise
*/
hipFunction_t getFunction(const std::string kernelNameExpression);
TestContext(const TestContext&) = delete;
void operator=(const TestContext&) = delete;
~TestContext();
};
static bool _log_enable = (!TestContext::getEnvVar("HT_LOG_ENABLE").empty() ? true : false);
// printing logs
#define LogPrintf(format, ...) \
{ \
if(_log_enable) { \
printf(format, __VA_ARGS__); \
printf("%c", '\n'); \
} \
}
@@ -0,0 +1,239 @@
/*
Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
// Test groups are named based on the group names from hip_api_runtime.h, with adding "Test" suffix
/**
* @defgroup CallbackTest Callback Activity APIs
* @{
* This section describes tests for the callback/Activity of HIP runtime API.
* @}
*/
/**
* @defgroup ContextTest Context Management
* @{
* This section describes tests for the context management functions of HIP runtime API.
* @warning All Context Management APIs are **deprecated** and shall not be implemented.
* @}
*/
/**
* @defgroup AtomicsTest Device Atomics
* @{
* This section describes tests for the Device Atomic APIs.
* @}
*/
/**
* @defgroup DeviceLanguageTest Device Language
* @{
* This section describes tests for the Device Language API.
* @}
*/
/**
* @defgroup DeviceTest Device Management
* @{
* This section describes tests for device management functions of HIP runtime API.
* @}
*/
/**
* @defgroup DriverTest Initialization and Version
* @{
* This section describes tests for the initialization and version functions of HIP runtime API.
* @}
*/
/**
* @defgroup DynamicLoadingTest Kernel Loading Management
* @{
* This section describes the different kernel launch approaches.
* @}
*/
/**
* @defgroup ErrorTest Error Handling
* @{
* This section describes tests for the error handling functions of HIP runtime API.
* @}
*/
/**
* @defgroup EventTest Event Management
* @{
* This section describes tests for the event management functions of HIP runtime API.
* @}
*/
/**
* @defgroup ExecutionTest Execution Control
* @{
* This section describes tests for the execution control functions of HIP runtime API.
* @}
*/
/**
* @defgroup GraphTest Graph Management
* @{
* This section describes tests for the graph management types & functions of HIP runtime API.
* @}
*/
/**
* @defgroup KernelTest Kernel Functions Management
* @{
* This section describes the various kernel functions invocation.
* @}
*/
/**
* @defgroup SyncthreadsTest Synchronization Functions
* @{
* This section describes tests for Synchronization Functions.
* @}
*/
/**
* @defgroup ThreadfenceTest Memory Fence Functions
* @{
* This section describes tests for Memory Fence Functions.
* @}
*/
/**
* @defgroup MemoryTest memory Management APIs
* @{
* This section describes the memory management types & functions of HIP runtime API.
* @}
*/
/**
* @defgroup PeerToPeerTest PeerToPeer Device Memory Access
* @{
* This section describes tests for the PeerToPeer device memory access functions of HIP runtime
* API.
* @warning PeerToPeer support is experimental.
* @}
*/
/**
* @defgroup PerformanceTest Performance tests
* @{
* This section describes performance tests for the target API groups and use-cases.
* @}
*/
/**
* @defgroup ShflTest warp shuffle function Management
* @{
* This section describes the warp shuffle types & functions of HIP runtime API.
* @}
*/
/**
* @defgroup p2pTest P2P Management
* @{
* This section describes the P2P management types & functions of HIP runtime API.
* @}
*/
/**
* @defgroup StreamOTest Ordered Memory Allocator
* @{
* This section describes the tests for Stream Ordered Memory Allocator functions of HIP runtime
* API.
* @}
*/
/**
* @defgroup StreamTest Stream Management
* @{
* This section describes the stream management types & functions of HIP runtime API.
* @}
*/
/**
* @defgroup ModuleTest Module Management
* @{
* This section describes the module management types & functions of HIP runtime API.
* @}
*/
/**
* @defgroup TextureTest Texture Management
* @{
* This section describes tests for the texture management functions of HIP runtime API.
* @}
*/
/**
* @defgroup VectorTypeTest Vector types
* @{
* This section describes tests for the Vector type functions and operators.
* @}
*/
/**
* @defgroup MathTest Math Device Functions
* @{
* This section describes tests for device math functions of HIP runtime API.
* @}
*/
/**
* @defgroup PrintfTest Printf API Management
* @{
* This section describes the various Printf use case Scenarios.
* @}
*/
/**
* @defgroup SurfaceTest Surface Management
* @{
* This section describes tests for the surface management functions of HIP runtime API.
* @}
*/
/**
* @defgroup ComplexTest Complex type
* @{
* This section describes tests for the Complex type functions.
* @}
*/
/**
* @defgroup VirtualMemoryManagementTest Virtual Memory Management APIs
* @{
* This section describes the virtual memory management types & functions of HIP runtime API.
* @}
*/
/**
* @defgroup ModuleTest Module Functions Management
* @{
* This section describes the loading of modules from code object files and invocation of different kernels.
* @}
*/
@@ -0,0 +1,40 @@
/*
Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <string>
#include <vector>
#include <assert.h>
#include <unordered_set>
// Catch Test Features
typedef enum CTFeatures {
CT_FEATURE_FINEGRAIN_HWSUPPORT = 0x0, // FINEGRAIN Supported Hardware.
CT_FEATURE_HMM = 0x1, // HMM Enabled
CT_FEATURE_TEXTURES_NOT_SUPPORTED = 0x2, // Textures not supported
CT_FEATURE_LAST = 0x3
} CTFeatures;
bool CheckIfFeatSupported(enum CTFeatures test_feat, std::string gcn_arch);
bool getGenericTarget(const std::string& agentTarget, std::string& genericTarget);
bool isGenericTargetSupported(char* gcnArchName = nullptr, int deviceId = 0);
@@ -0,0 +1,89 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
// We haven't checked which filesystem to include yet
#ifndef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL
// Check for feature test macro for <filesystem>
#if defined(__cpp_lib_filesystem)
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 0
// Check for feature test macro for <experimental/filesystem>
#elif defined(__cpp_lib_experimental_filesystem)
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 1
// We can't check if headers exist...
// Let's assume experimental to be safe
#elif !defined(__has_include)
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 1
// Check if the header "<filesystem>" exists
#elif __has_include(<filesystem>)
// If we're compiling on Visual Studio and are not compiling with C++17,
// we need to use experimental
#ifdef _MSC_VER
// Check and include header that defines "_HAS_CXX17"
#if __has_include(<yvals_core.h>)
#include <yvals_core.h>
// Check for enabled C++17 support
#if defined(_HAS_CXX17) && _HAS_CXX17
// We're using C++17, so let's use the normal version
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 0
#endif
#endif
// If the marco isn't defined yet, that means any of the other
// VS specific checks failed, so we need to use experimental
#ifndef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 1
#endif
// Not on Visual Studio. Let's use the normal version
#else // #ifdef _MSC_VER
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 0
#endif
// Check if the header "<filesystem>" exists
#elif __has_include(<experimental/filesystem>)
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 1
// Fail if neither header is available with a nice error message
#else
#error Could not find system header "<filesystem>" ||
"<experimental/filesystem>"
#endif
// We priously determined that we need the exprimental version
#if INCLUDE_STD_FILESYSTEM_EXPERIMENTAL
// Include it
#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING 1;
#include <experimental/filesystem>
// We need the alias from std::experimental::filesystem to std::filesystem
namespace fs = std::experimental::filesystem;
// We have a decent compiler and can use the normal version
#else
// Include it
#include <filesystem>
namespace fs = std::filesystem;
#endif
#endif // #ifndef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL
@@ -0,0 +1,70 @@
/*
Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include "hip_test_common.hh"
#ifdef __linux__
#include <sys/sysinfo.h>
#else
#include <windows.h>
#include <sysinfoapi.h>
#endif
namespace HipTest {
static inline int getGeviceCount() {
int dev = 0;
HIP_CHECK(hipGetDeviceCount(&dev));
return dev;
}
// Get Free Memory from the system
static inline size_t getMemoryAmount() {
#ifdef __linux__
struct sysinfo info{};
sysinfo(&info);
return info.freeram / (1024 * 1024); // MB
#elif defined(_WIN32)
MEMORYSTATUSEX statex;
statex.dwLength = sizeof(statex);
GlobalMemoryStatusEx(&statex);
return (statex.ullAvailPhys / (1024 * 1024)); // MB
#endif
}
static inline size_t getHostThreadCount(const size_t memPerThread, const size_t maxThreads) {
if (memPerThread == 0) return 0;
auto memAmount = getMemoryAmount();
const auto processor_count = std::thread::hardware_concurrency();
if (processor_count == 0 || memAmount == 0) return 0;
size_t thread_count = 0;
if ((processor_count * memPerThread) < memAmount)
thread_count = processor_count;
else
thread_count = reinterpret_cast<size_t>(memAmount / memPerThread);
if (maxThreads > 0) {
return (thread_count > maxThreads) ? maxThreads : thread_count;
}
return thread_count;
}
} // namespace HipTest
@@ -0,0 +1,107 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip/hip_runtime.h>
namespace HipTest {
template <typename T> __global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < NELEM; i += stride) {
C_d[i] = A_d[i] + B_d[i];
}
}
template <typename T> __global__ void vectorSUB(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < NELEM; i += stride) {
C_d[i] = A_d[i] - B_d[i];
}
}
template <typename T>
__global__ void vectorADDReverse(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
C_d[i] = A_d[i] + B_d[i];
}
}
template <typename T> __global__ void addCount(const T* A_d, T* C_d, size_t NELEM, int count) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
// Deliberately do this in an inefficient way to increase kernel runtime
for (int i = 0; i < count; i++) {
for (size_t i = offset; i < NELEM; i += stride) {
C_d[i] = A_d[i] + (T)count;
}
}
}
template <typename T>
__global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
// Deliberately do this in an inefficient way to increase kernel runtime
for (int i = 0; i < count; i++) {
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
C_d[i] = A_d[i] + (T)count;
}
}
}
template <typename T> __global__ void memsetReverse(T* C_d, T val, int64_t NELEM) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
C_d[i] = val;
}
}
template <typename T> __global__ void vector_square(const T* A_d, T* C_d, size_t N_ELMTS) {
size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = gputhread; i < N_ELMTS; i += stride) {
C_d[i] = A_d[i] * A_d[i];
}
}
template <typename T> __global__ void vector_cubic(const T* A_d, T* C_d, size_t N_ELMTS) {
size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = gputhread; i < N_ELMTS; i += stride) {
C_d[i] = A_d[i] * A_d[i] * A_d[i];
}
}
} // namespace HipTest
@@ -0,0 +1,136 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include "hip_test_common.hh"
#include "hip_test_filesystem.hh"
#include <string>
#include <array>
#include <cstdlib>
#include <random>
#include <fstream>
#include <streambuf>
#include <thread>
#include <future>
namespace hip {
/*
Class to spawn a process in isolation and test its standard output and return status
Good for printf tests and environment variable tests
How to use:
Have the stand alone exe in the same folder
Init a class using hip::SpawnProc proc("ExeName", yes_or_no_to_capture_output);
proc.run("Optional command line args");
*/
class SpawnProc {
std::string exeName;
std::string resultStr;
std::string tmpFileName;
std::future<int> ret_from_run;
bool captureOutput;
std::string getRandomString(size_t len = 6) {
std::random_device dev;
std::mt19937 rng(dev());
std::uniform_int_distribution<std::mt19937::result_type> dist(0, 25);
std::string res;
for (size_t i = 0; i < len; i++) {
res += 'a' + dist(rng);
}
return res;
}
public:
SpawnProc(std::string exeName_, bool captureOutput_ = false)
: exeName(exeName_), captureOutput(captureOutput_) {
auto dir = fs::path(TestContext::get().currentPath());
dir /= exeName;
exeName = dir.string();
// On Windows, fs::exists returns false without extension.
if (TestContext::get().isWindows()) {
if (fs::path(exeName).extension().empty()) {
exeName += ".exe";
}
}
INFO("Testing that exe exists: " << exeName);
REQUIRE(fs::exists(exeName));
if (captureOutput) {
auto path = fs::temp_directory_path();
path /= getRandomString();
tmpFileName = path.string();
INFO("Testing that capture file does not exist already: " << tmpFileName);
REQUIRE(!fs::exists(tmpFileName));
}
if (TestContext::get().isWindows()) {
exeName = (exeName.find(" ", 0) == std::string::npos) ? exeName : ("\"" + exeName + "\"");
tmpFileName = (tmpFileName.find(" ", 0) == std::string::npos) ? tmpFileName : ("\"" + tmpFileName + "\"");
}
}
int run(std::string commandLineArgs = "") {
std::string execCmd = exeName;
// Append command line args
if (commandLineArgs.size() > 0) {
execCmd += " "; // Add space for command line args
execCmd += commandLineArgs;
}
if (captureOutput) {
execCmd += " > ";
execCmd += tmpFileName;
}
if (TestContext::get().isWindows()) {
execCmd = (execCmd.find(" ", 0) == std::string::npos) ? execCmd : ("\"" + execCmd + "\"");
}
auto res = std::system(execCmd.c_str());
if (captureOutput) {
std::ifstream t(tmpFileName.c_str());
resultStr =
std::string((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
t.close();
}
#if HT_LINUX
return WEXITSTATUS(res);
#else
return res;
#endif
}
void run_async(std::string commandLineArgs = "") {
ret_from_run = std::async(std::launch::async, &hip::SpawnProc::run, this, commandLineArgs);
}
int wait() {
ret_from_run.wait();
return ret_from_run.get();
}
std::string getOutput() { return resultStr; }
};
} // namespace hip
@@ -0,0 +1,280 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip/hip_runtime.h>
#include <hip/hiprtc.h>
#include <kernel_mapping.hh>
#include <catch.hpp>
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
#include <set>
#include <mutex>
#include "hip/hip_runtime_api.h"
#include "hip_test_context.hh"
#define STRINGIFY(x) #x
namespace HipTest {
struct KernelArgument {
const void* ptr;
size_t sizeRequirement;
size_t alignmentRequirement;
};
/**
* @brief Reconstructs the name expression for the kernel.
*
* @param kernelName the name of the kernel (e.g. "HipTest::VectorADD")
* @param typenames the typenames used by this kernel (e.g. "float").
* @return std::string the reconstructed expression (e.g. "VectorADD<float>""). Returns kernelName
* instead if the kernel is not a template.
*/
inline std::string reconstructExpression(std::string& kernelName,
std::vector<std::string>& typenames) {
std::string kernelExpression = kernelName;
if (typenames.size() > 0) {
kernelExpression += "<" + typenames[0];
for (size_t i = 1; i < typenames.size(); ++i) {
kernelExpression += "," + typenames[i];
}
kernelExpression += ">";
}
return kernelExpression;
}
/**
* @brief Packs the kernel arguments into the format expected by hipModuleLaunchKernel
*
* @param args list of arguments for the kernel and their alignemnt requirements.
* @return std::vector<char> the packed arguments ready to be passed on to hipModuleLaunchKernel
*/
inline std::vector<char> alignArguments(std::vector<KernelArgument>& args) {
std::vector<char> alignedArguments{};
int count = 0;
for (auto& arg : args) {
const char* argPtr{reinterpret_cast<const char*>(arg.ptr)};
int paddingNeeded = (arg.alignmentRequirement - 1) & (~count + 1);
alignedArguments.insert(std::end(alignedArguments), paddingNeeded, 0);
count += paddingNeeded;
alignedArguments.insert(std::end(alignedArguments), argPtr, argPtr + arg.sizeRequirement);
count += arg.sizeRequirement;
}
return alignedArguments;
}
inline std::vector<char> getKernelCode(hiprtcProgram& rtcProgram) {
size_t codeSize;
REQUIRE(HIPRTC_SUCCESS == hiprtcGetCodeSize(rtcProgram, &codeSize));
std::vector<char> code(codeSize);
REQUIRE(HIPRTC_SUCCESS == hiprtcGetCode(rtcProgram, code.data()));
return code;
}
/**
* @brief Compiles a kernel using HIP RTC
*
* @param rtcKernel the name of the kernel to compile.
* @param kernelNameExpression the name expression to be added to the RTC program (e.g.
* HipTest::VectorADD<float>)
* @return hiprtcProgram the compiled rtc program.
*/
inline hiprtcProgram compileRTC(std::string& rtcKernel, std::string& kernelNameExpression) {
std::string fileName = mapKernelToFileName.at(rtcKernel);
std::string filePath{STRINGIFY(KERNELS_PATH) + fileName};
INFO("Opening Kernel File: " << filePath);
std::ifstream kernelFile{filePath};
REQUIRE(kernelFile.is_open());
std::stringstream stringStream;
std::string line;
while (getline(kernelFile, line)) {
/* Skip the include directive since it is not part of the kernel */
if (line.find("#include") != std::string::npos) {
continue;
}
stringStream << line << '\n';
}
kernelFile.close();
std::string kernelCode{stringStream.str()};
INFO("RTC Kernel Code:\n" << kernelCode)
hiprtcProgram rtcProgram;
hiprtcCreateProgram(&rtcProgram, kernelCode.c_str(), (fileName + ".cu").c_str(), 0, nullptr,
nullptr);
std::vector<const char*> options{};
#ifdef __HIP_PLATFORM_AMD__
int deviceCount;
REQUIRE(hipSuccess == hipGetDeviceCount(&deviceCount));
std::set<std::string> architectures{};
for (int i = 0; i < deviceCount; ++i) {
hipDeviceProp_t props;
REQUIRE(hipSuccess == hipGetDeviceProperties(&props, i));
architectures.insert(std::string{"--gpu-architecture="} + props.gcnArchName);
}
for (auto& architecture : architectures) {
options.push_back(architecture.c_str());
}
#else
options.push_back("--fmad=false");
#endif
REQUIRE(HIPRTC_SUCCESS == hiprtcAddNameExpression(rtcProgram, kernelNameExpression.c_str()));
REQUIRE(HIPRTC_SUCCESS == hiprtcCompileProgram(rtcProgram, 1, options.data()));
return rtcProgram;
}
/**
* @brief Get a typename as a string
*
* @tparam T The typename
* @return std::string the string representation of T
*/
template <typename T> std::string getTypeName() {
std::string name, prefix, suffix;
#ifdef __clang__
name = __PRETTY_FUNCTION__;
prefix = "std::string HipTest::getTypeName() [T = ";
suffix = "]";
#elif defined(__GNUC__)
name = __PRETTY_FUNCTION__;
prefix = "std::string HipTest::getTypeName() [with T = ";
suffix = "; std::string = std::__cxx11::basic_string<char>]";
#elif defined(_MSC_VER)
name = __FUNCSIG__;
prefix = "std::string __cdecl HipTest::getTypeName<";
suffix = ">(void)";
#endif
return name.substr(prefix.size(), name.rfind(suffix) - prefix.size());
}
/**
* @brief Tells the user that the kernels are using HIP RTC. Prints only once per test.
*
*/
static inline void printInfo() {
static bool alreadyPrinted{false};
if (!alreadyPrinted) {
std::cout << "INFO: This test is running using HIP RTC to compile and run the kernels."
<< std::endl;
alreadyPrinted = true;
}
}
/**
* @brief Compiles and launches a kernel using HIP RTC
*
* @tparam Typenames A list of typenames used by the kernel (unused if the kernel is not a
* template).
* @tparam Args A list of kernel arguments to be forwarded.
* @param getKernelName A function wrapper that returns the name of the kernel to launch (check
* kernels.hh for more info)
* @param numBlocks
* @param numThreads
* @param memPerBlock
* @param stream
* @param packedArgs A list of kernel arguments to be forwarded.
*/
template <typename... Typenames, typename... Args>
void launchRTCKernel(std::string (*getKernelName)(), dim3 numBlocks, dim3 numThreads,
std::uint32_t memPerBlock, hipStream_t stream, Args&&... packedArgs) {
printInfo();
TestContext& testContext = TestContext::get();
std::string kernelName = (*getKernelName)();
std::vector<std::string> kernelTypenames{std::string(HipTest::getTypeName<Typenames>())...};
std::string kernelExpression = reconstructExpression(kernelName, kernelTypenames);
static std::mutex mutex{};
{
std::lock_guard<std::mutex> lockGuard(mutex);
if (testContext.getFunction(kernelExpression) == nullptr) {
hiprtcProgram rtcProgram{compileRTC(kernelName, kernelExpression)};
std::vector<char> compiledCode{getKernelCode(rtcProgram)};
hipModule_t module;
REQUIRE(hipSuccess == hipModuleLoadData(&module, compiledCode.data()));
hipFunction_t kernelFunction;
const char* loweredName;
REQUIRE(HIPRTC_SUCCESS ==
hiprtcGetLoweredName(rtcProgram, kernelExpression.c_str(), &loweredName));
REQUIRE(hipSuccess == hipModuleGetFunction(&kernelFunction, module, loweredName));
/* After obtaining the kernelFunction, the program is no longer needed. So it can be destroyed */
REQUIRE(HIPRTC_SUCCESS == hiprtcDestroyProgram(&rtcProgram));
testContext.trackRtcState(kernelExpression, module, kernelFunction);
}
}
hipFunction_t kernelFunction = testContext.getFunction(kernelExpression);
std::vector<KernelArgument> args = {
{reinterpret_cast<const void*>(&packedArgs), sizeof(Args), alignof(Args)}...};
std::vector<char> alignedArguments{alignArguments(args)};
size_t argumentsSize{alignedArguments.size()};
void* config_array[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, alignedArguments.data(),
HIP_LAUNCH_PARAM_BUFFER_SIZE, reinterpret_cast<void*>(&argumentsSize),
HIP_LAUNCH_PARAM_END};
REQUIRE(hipSuccess ==
hipModuleLaunchKernel(kernelFunction, numBlocks.x, numBlocks.y, numBlocks.z, numThreads.x,
numThreads.y, numThreads.z, memPerBlock, stream, nullptr,
config_array));
}
/**
* @brief Template overload for when numBlocks and numThreads is an integer.
*
*/
template <typename... Typenames, typename... Args>
void launchRTCKernel(std::string kernelName, int numBlocks, int numThreads,
std::uint32_t memPerBlock, hipStream_t stream, Args&&... packedArgs) {
launchRTCKernel<Typenames...>(kernelName, dim3(numBlocks), dim3(numThreads), memPerBlock, stream,
std::forward<Args>(packedArgs)...);
}
} // namespace HipTest
@@ -0,0 +1,88 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/**
* @brief Error codes retured by rocm_smi_lib functions
*/
typedef enum {
RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful
RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid
RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or
//!< action is not available for the
//!< given input, on the given system
RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This
//!< may because the operation is not
//!< supported by the Linux kernel
//!< version running on the executing
//!< machine
RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file
//!< error. Many functions require
//!< root access to run.
RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other
//!< resource
RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught
RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of
//!< allowable or safe range
RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi
//!< initializing internal data
//!< structures
RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not
//!< yet been implemented in the
//!< current system for the current
//!< devices
RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not
//!< found
RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were
//!< available for the operation
RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during
//!< execution of function
RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data
//!< was read
RSMI_STATUS_NO_DATA, //!< No data was found for a given
//!< input
RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to
//!< function is not what was expected
RSMI_STATUS_BUSY, //!< A resource or mutex could not be
//!< acquired because it is already
//!< being used
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
//!< exceeded INT32_MAX
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
} rsmi_status_t;
/**
* @brief Types of memory
*/
typedef enum {
RSMI_MEM_TYPE_FIRST = 0,
RSMI_MEM_TYPE_VRAM = RSMI_MEM_TYPE_FIRST, //!< VRAM memory
RSMI_MEM_TYPE_VIS_VRAM, //!< VRAM memory that is visible
RSMI_MEM_TYPE_GTT, //!< GTT memory
RSMI_MEM_TYPE_LAST = RSMI_MEM_TYPE_GTT
} rsmi_memory_type_t;
@@ -0,0 +1,376 @@
#pragma once
#include <math.h>
#define HIP_SAMPLING_VERIFY_EPSILON 0.00001
// The internal precision varies by the GPU family and sometimes within the family.
// Thus the following threshold is subject to change.
#define HIP_SAMPLING_VERIFY_RELATIVE_THRESHOLD 0.05 // 5% for filter mode
#define HIP_SAMPLING_VERIFY_ABSOLUTE_THRESHOLD 0.1
#if HT_NVIDIA
typedef unsigned char uchar;
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, T>::type
inline __host__ __device__ operator+(const T &a, const T &b)
{
return {a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w};
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, T>::type
inline __host__ __device__ operator-(const T &a, const T &b)
{
return {a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w};
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, bool>::type
inline __host__ __device__ operator==(const T &a, const T &b)
{
return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, T>::type
inline __host__ __device__ operator*(const decltype(T::x) &a, const T &b)
{
return {a * b.x, a * b.y, a * b.z, a * b.w};
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, void>::type
inline __host__ __device__ operator*=(T &a, const decltype(T::x) &b)
{
a.x *= b;
a.y *= b;
a.z *= b;
a.w *= b;
}
#endif // HT_NVIDIA
template <typename T> struct mipmapLevelArray {
T* data; // level array data
hipExtent e; // level array size
};
// From CIE 1931 color space to sRGB
inline float hipSRGBMap(float fc) {
double c = static_cast<double>(fc);
#if !defined(_WIN32)
if (std::isnan(c))
c = 0.0;
#else
if (_isnan(c)) c = 0.0;
#endif
if (c > 1.0)
c = 1.0;
else if (c < 0.0)
c = 0.0;
else if (c < 0.0031308)
c = 12.92 * c;
else
c = 1.055 * pow(c, 5.0 / 12.0) - 0.055;
return static_cast<float>(c);
}
// From sRGB to CIE 1931 color space
inline float hipSRGBUnmap(float fc) {
double c = static_cast<double>(fc);
if (c <= 0.04045)
c = c / 12.92;
else
c = pow((c + 0.055) / 1.055, 2.4);
return static_cast<float>(c);
}
inline float4 hipSRGBMap(float4 fc) {
fc.x = hipSRGBMap(fc.x);
fc.y = hipSRGBMap(fc.y);
fc.z = hipSRGBMap(fc.z);
// Alpha channel will keep unchanged
return fc;
}
inline float4 hipSRGBUnmap(float4 fc) {
fc.x = hipSRGBUnmap(fc.x);
fc.y = hipSRGBUnmap(fc.y);
fc.z = hipSRGBUnmap(fc.z);
// Alpha channel will keep unchanged
return fc;
}
template<typename T>
typename std::enable_if<std::is_scalar<T>::value == true, double>::type
hipFabs(const T &t) {
return fabs(t);
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 1, double>::type
hipFabs(const T &t) {
return fabs(t.x);
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 2, double>::type
hipFabs(const T &t) {
double x = static_cast<double>(t.x);
double y = static_cast<double>(t.y);
double s = x * x + y * y;
return sqrt(s);
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 3, double>::type
hipFabs(const T &t) {
double x = static_cast<double>(t.x);
double y = static_cast<double>(t.y);
double z = static_cast<double>(t.z);
double s = x * x + y * y + z * z;
return sqrt(s);
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, double>::type
hipFabs(const T &t) {
double x = static_cast<double>(t.x);
double y = static_cast<double>(t.y);
double z = static_cast<double>(t.z);
double w = static_cast<double>(t.w);
double s = x * x + y * y + z * z + w * w;
return sqrt(s);
}
template<typename T, hipTextureFilterMode fMode = hipFilterModePoint, bool sRGB = false>
bool hipTextureSamplingVerify(const T &outputData, const T &expected) {
bool testResult = false;
if (fMode == hipFilterModePoint && !sRGB) {
testResult = outputData == expected;
} else {
double mean = (hipFabs(outputData) + hipFabs(expected)) / 2;
double diff = hipFabs(outputData - expected);
double ratio = diff / (mean + HIP_SAMPLING_VERIFY_EPSILON);
if (ratio <= HIP_SAMPLING_VERIFY_RELATIVE_THRESHOLD) {
testResult = true;
} else if (diff <= HIP_SAMPLING_VERIFY_ABSOLUTE_THRESHOLD) {
// Some small outputs have big ratio due to float operation difference of ALU and GPU
testResult = true;
}
}
return testResult;
}
// Simulate CTS static AddressingTable sAddressingTable
template<hipTextureAddressMode addressMode>
void hipTextureGetAddress(int &value, const int maxValue)
{
switch(addressMode)
{
case hipAddressModeClamp:
value = value < 0 ? 0
: (value > maxValue - 1 ? maxValue - 1 : value);
break;
case hipAddressModeBorder:
value = value < -1 ? -1
: (value > maxValue ? maxValue : value);
break;
default:
break;
}
}
// Simulate logics in CTS read_image_pixel_float().
// x, y and z must be returned by hipTextureGetAddress()
template<typename T, hipTextureAddressMode addressMode, bool sRGB = false>
T hipTextureGetValue(const T *data, const int x, const int width,
const int y = 0, const int height = 0, const int z = 0, const int depth = 0) {
T result;
memset(&result, 0, sizeof(result));
switch (addressMode) {
case hipAddressModeClamp:
if (width > 0) {
if (height == 0 && depth == 0) {
result = data[x]; // 1D
} else if (depth == 0) {
result = data[y * width + x]; // 2D
} else {
result = data[z * width * height + y * width + x]; // 3D
}
}
break;
case hipAddressModeBorder:
if (width > 0) {
if (height == 0 && depth == 0) {
if (x >= 0 && x < width)
result = data[x]; // 1D
} else if (depth == 0) {
if (x >= 0 && x < width && y >= 0 && y < height)
result = data[y * width + x]; // 2D
} else {
if (x >= 0 && x < width && y >= 0 && y < height && z >= 0 && z < depth)
result = data[z * width * height + y * width + x]; // 3D
}
}
break;
default:
break;
}
if constexpr (sRGB && std::is_same<T, float4>::value) {
result = hipSRGBUnmap(result);
}
return result;
}
template<typename T, hipTextureAddressMode addressMode, hipTextureFilterMode filterMode, bool sRGB = false>
T getExpectedValue(const int width, float x, const T *data) {
T result;
memset(&result, 0, sizeof(result));
switch (filterMode) {
case hipFilterModePoint: {
int i1 = static_cast<int>(floor(x));
hipTextureGetAddress < addressMode > (i1, width);
result = hipTextureGetValue < T, addressMode, sRGB > (data, i1, width);
}
break;
case hipFilterModeLinear: {
x -= 0.5;
int i1 = static_cast<int>(floor(x));
int i2 = i1 + 1;
float a = x - i1;
hipTextureGetAddress < addressMode > (i1, width);
hipTextureGetAddress < addressMode > (i2, width);
T t1 = hipTextureGetValue < T, addressMode, sRGB> (data, i1, width);
T t2 = hipTextureGetValue < T, addressMode, sRGB > (data, i2, width);
return (1 - a) * t1 + a * t2;
}
break;
}
return result;
}
template<typename T, hipTextureAddressMode addressMode, hipTextureFilterMode filterMode, bool sRGB = false>
T getExpectedValue(const int width, const int height, float x, float y, const T *data) {
T result;
memset(&result, 0, sizeof(result));
switch (filterMode) {
case hipFilterModePoint: {
int i1 = static_cast<int>(floor(x));
int j1 = static_cast<int>(floor(y));
hipTextureGetAddress < addressMode > (i1, width);
hipTextureGetAddress < addressMode > (j1, height);
result = hipTextureGetValue < T, addressMode, sRGB > (data, i1, width, j1, height);
}
break;
case hipFilterModeLinear: {
x -= 0.5;
y -= 0.5;
int i1 = static_cast<int>(floor(x));
int j1 = static_cast<int>(floor(y));
int i2 = i1 + 1;
int j2 = j1 + 1;
float a = x - i1;
float b = y - j1;
hipTextureGetAddress < addressMode > (i1, width);
hipTextureGetAddress < addressMode > (i2, width);
hipTextureGetAddress < addressMode > (j1, height);
hipTextureGetAddress < addressMode > (j2, height);
T t11 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j1, height);
T t21 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j1, height);
T t12 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j2, height);
T t22 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j2, height);
result = (1 - a) * (1 - b) * t11 + a * (1 - b) * t21 + (1 - a) * b * t12
+ a * b * t22;
}
break;
}
return result;
}
template<class T, hipTextureAddressMode addressMode, hipTextureFilterMode filterMode, bool sRGB = false>
T getExpectedValue(const int width, const int height, const int depth,
float x, float y, float z, const T *data) {
T result;
memset(&result, 0, sizeof(result));
switch (filterMode) {
case hipFilterModePoint: {
int i1 = static_cast<int>(floor(x));
int j1 = static_cast<int>(floor(y));
int k1 = static_cast<int>(floor(z));
hipTextureGetAddress < addressMode > (i1, width);
hipTextureGetAddress < addressMode > (j1, height);
hipTextureGetAddress < addressMode > (k1, depth);
result = hipTextureGetValue < T, addressMode, sRGB > (data, i1, width, j1, height, k1, depth);
}
break;
case hipFilterModeLinear: {
x -= 0.5;
y -= 0.5;
z -= 0.5;
int i1 = static_cast<int>(floor(x));
int j1 = static_cast<int>(floor(y));
int k1 = static_cast<int>(floor(z));
int i2 = i1 + 1;
int j2 = j1 + 1;
int k2 = k1 + 1;
float a = x - i1;
float b = y - j1;
float c = z - k1;
hipTextureGetAddress < addressMode > (i1, width);
hipTextureGetAddress < addressMode > (i2, width);
hipTextureGetAddress < addressMode > (j1, height);
hipTextureGetAddress < addressMode > (j2, height);
hipTextureGetAddress < addressMode > (k1, depth);
hipTextureGetAddress < addressMode > (k2, depth);
T t111 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j1, height, k1, depth);
T t211 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j1, height, k1, depth);
T t121 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j2, height, k1, depth);
T t112 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j1, height, k2, depth);
T t122 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j2, height, k2, depth);
T t212 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j1, height, k2, depth);
T t221 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j2, height, k1, depth);
T t222 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j2, height, k2, depth);
result =
(1 - a) * (1 - b) * (1 - c) * t111 + a * (1 - b) * (1 - c) * t211 +
(1 - a) * b * (1 - c) * t121 + a * b * (1 - c) * t221 +
(1 - a) * (1 - b) * c * t112 + a * (1 - b) * c * t212 +
(1 - a) * b * c * t122 + a * b * c * t222;
}
break;
}
return result;
}
@@ -0,0 +1,27 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <map>
const std::map<std::string, std::string> mapKernelToFileName{
{"Set", "Set.cpp"},
{"HipTest::vectorADD", "vectorADD.inl"},
};
@@ -0,0 +1,55 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip_test_common.hh>
#include <map>
#ifndef RTC_TESTING
__global__ void Set(int* Ad, int val);
/* Kernel Templates */
#include "vectorADD.inl"
#else
/*
* Wrapper Macros that create a string representation of the kernel name.
* In the case of kernel templates, a variadic template is used to ensure compatibility with
* the launchKernel template when RTC is not enabled. If the kernel is inside a namespace, use the
* "_NS" version of the Macro.
*/
#define FUNCTION_WRAPPER(param) \
std::string param() { return #param; }
#define TEMPLATE_WRAPPER(param) \
template <typename...> std::string param() { return #param; }
#define FUNCTION_WRAPPER_NS(param, namespace) \
std::string param() { return #namespace "::" #param; }
#define TEMPLATE_WRAPPER_NS(param, namespace) \
template <typename...> std::string param() { return #namespace "::" #param; }
FUNCTION_WRAPPER(Set);
namespace HipTest {
TEMPLATE_WRAPPER_NS(vectorADD, HipTest);
}
#endif
@@ -0,0 +1,329 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <functional>
#include <hip/hip_runtime_api.h>
#include <hip_test_common.hh>
#include <resource_guards.hh>
#include <utils.hh>
static inline unsigned int GenerateLinearAllocationFlagCombinations(
const LinearAllocs allocation_type) {
switch (allocation_type) {
case LinearAllocs::hipHostMalloc:
return GENERATE(hipHostMallocDefault, hipHostMallocPortable, hipHostMallocMapped,
hipHostMallocWriteCombined);
case LinearAllocs::mallocAndRegister:
case LinearAllocs::hipMallocManaged:
case LinearAllocs::malloc:
case LinearAllocs::hipMalloc:
return 0u;
default:
assert("Invalid LinearAllocs enumerator");
throw std::invalid_argument("Invalid LinearAllocs enumerator");
}
}
template <bool should_synchronize, typename F>
void MemcpyDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
using LA = LinearAllocs;
const auto allocation_size = GENERATE(kPageSize / 2, kPageSize, kPageSize * 2);
const auto host_allocation_type = GENERATE(LA::malloc, LA::hipHostMalloc);
const auto host_allocation_flags = GenerateLinearAllocationFlagCombinations(host_allocation_type);
LinearAllocGuard<int> host_allocation(host_allocation_type, allocation_size,
host_allocation_flags);
LinearAllocGuard<int> device_allocation(LA::hipMalloc, allocation_size);
const auto element_count = allocation_size / sizeof(*device_allocation.ptr());
constexpr auto thread_count = 1024;
const auto block_count = element_count / thread_count + 1;
constexpr int expected_value = 42;
VectorSet<<<block_count, thread_count>>>(device_allocation.ptr(), expected_value, element_count);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(memcpy_func(host_allocation.host_ptr(), device_allocation.ptr(), allocation_size));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
ArrayFindIfNot(host_allocation.host_ptr(), expected_value, element_count);
}
template <bool should_synchronize, typename F>
void MemcpyHostToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
using LA = LinearAllocs;
const auto allocation_size = GENERATE(kPageSize / 2, kPageSize, kPageSize * 2);
const auto host_allocation_type = GENERATE(LA::malloc, LA::hipHostMalloc);
const auto host_allocation_flags = GenerateLinearAllocationFlagCombinations(host_allocation_type);
LinearAllocGuard<int> src_host_allocation(host_allocation_type, allocation_size,
host_allocation_flags);
LinearAllocGuard<int> dst_host_allocation(LA::hipHostMalloc, allocation_size);
LinearAllocGuard<int> device_allocation(LA::hipMalloc, allocation_size);
const auto element_count = allocation_size / sizeof(*device_allocation.ptr());
constexpr int fill_value = 42;
std::fill_n(src_host_allocation.host_ptr(), element_count, fill_value);
std::fill_n(dst_host_allocation.host_ptr(), element_count, 0);
HIP_CHECK(memcpy_func(device_allocation.ptr(), src_host_allocation.host_ptr(), allocation_size));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
HIP_CHECK(hipMemcpy(dst_host_allocation.host_ptr(), device_allocation.ptr(), allocation_size,
hipMemcpyDeviceToHost));
ArrayFindIfNot(dst_host_allocation.host_ptr(), fill_value, element_count);
}
template <bool should_synchronize, typename F>
void MemcpyHostToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
using LA = LinearAllocs;
const auto allocation_size = GENERATE(kPageSize / 2, kPageSize, kPageSize * 2);
const auto src_allocation_type = GENERATE(LA::malloc, LA::hipHostMalloc);
const auto dst_allocation_type = GENERATE(LA::malloc, LA::hipHostMalloc);
const auto src_allocation_flags = GenerateLinearAllocationFlagCombinations(src_allocation_type);
const auto dst_allocation_flags = GenerateLinearAllocationFlagCombinations(dst_allocation_type);
LinearAllocGuard<int> src_allocation(src_allocation_type, allocation_size, src_allocation_flags);
LinearAllocGuard<int> dst_allocation(dst_allocation_type, allocation_size, dst_allocation_flags);
const auto element_count = allocation_size / sizeof(*src_allocation.host_ptr());
constexpr auto expected_value = 42;
std::fill_n(src_allocation.host_ptr(), element_count, expected_value);
HIP_CHECK(memcpy_func(dst_allocation.host_ptr(), src_allocation.host_ptr(), allocation_size));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
ArrayFindIfNot(dst_allocation.host_ptr(), expected_value, element_count);
}
template <bool should_synchronize, bool enable_peer_access, typename F>
void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto allocation_size = GENERATE(kPageSize / 2, kPageSize, kPageSize * 2);
const auto device_count = HipTest::getDeviceCount();
const auto src_device = GENERATE_COPY(range(0, device_count));
const auto dst_device = GENERATE_COPY(range(0, device_count));
INFO("Src device: " << src_device << ", Dst device: " << dst_device);
HIP_CHECK(hipSetDevice(src_device));
if constexpr (enable_peer_access) {
if (src_device == dst_device) {
return;
}
int can_access_peer = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
if (!can_access_peer) {
std::string msg = "Skipped as peer access cannot be enabled between devices " +
std::to_string(src_device) + " " + std::to_string(dst_device);
HipTest::HIP_SKIP_TEST(msg.c_str());
return;
}
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
}
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, allocation_size);
LinearAllocGuard<int> result(LinearAllocs::hipHostMalloc, allocation_size, hipHostMallocPortable);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, allocation_size);
const auto element_count = allocation_size / sizeof(*src_allocation.ptr());
constexpr auto thread_count = 1024;
const auto block_count = element_count / thread_count + 1;
constexpr int expected_value = 42;
HIP_CHECK(hipSetDevice(src_device));
VectorSet<<<block_count, thread_count>>>(src_allocation.ptr(), expected_value, element_count);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(memcpy_func(dst_allocation.ptr(), src_allocation.ptr(), allocation_size));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
HIP_CHECK(
hipMemcpy(result.host_ptr(), dst_allocation.ptr(), allocation_size, hipMemcpyDeviceToHost));
if constexpr (enable_peer_access) {
// If we've gotten this far, EnablePeerAccess must have succeeded, so we
// only need to check this condition
HIP_CHECK(hipDeviceDisablePeerAccess(dst_device));
}
ArrayFindIfNot(result.host_ptr(), expected_value, element_count);
}
template <bool should_synchronize, typename F>
void MemcpyWithDirectionCommonTests(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
using namespace std::placeholders;
SECTION("Device to host") {
MemcpyDeviceToHostShell<should_synchronize>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToHost), kernel_stream);
}
SECTION("Device to host with default kind") {
MemcpyDeviceToHostShell<should_synchronize>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
}
SECTION("Host to device") {
MemcpyHostToDeviceShell<should_synchronize>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToDevice), kernel_stream);
}
SECTION("Host to device with default kind") {
MemcpyHostToDeviceShell<should_synchronize>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
}
SECTION("Host to host") {
MemcpyHostToHostShell<should_synchronize>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToHost), kernel_stream);
}
SECTION("Host to host with default kind") {
MemcpyHostToHostShell<should_synchronize>(std::bind(memcpy_func, _1, _2, _3,
hipMemcpyDefault), kernel_stream);
}
SECTION("Device to device") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<should_synchronize, true>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice), kernel_stream);
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<should_synchronize, false>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice), kernel_stream);
}
}
SECTION("Device to device with default kind") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<should_synchronize, true>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<should_synchronize, false>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
}
}
}
// Synchronization behavior checks
template <typename F>
void MemcpySyncBehaviorCheck(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream) {
LaunchDelayKernel(std::chrono::milliseconds{100}, kernel_stream);
HIP_CHECK(memcpy_func());
if (should_sync) {
HIP_CHECK(hipStreamQuery(kernel_stream));
} else {
HIP_CHECK_ERROR(hipStreamQuery(kernel_stream), hipErrorNotReady);
}
}
template <typename F>
void MemcpyHPageabletoDSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard<int> host_alloc(LinearAllocs::malloc, kPageSize);
LinearAllocGuard<int> device_alloc(LinearAllocs::hipMalloc, kPageSize);
MemcpySyncBehaviorCheck(std::bind(memcpy_func, device_alloc.ptr(), host_alloc.ptr(), kPageSize),
should_sync, kernel_stream);
}
template <typename F>
void MemcpyHPinnedtoDSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, kPageSize);
LinearAllocGuard<int> device_alloc(LinearAllocs::hipMalloc, kPageSize);
MemcpySyncBehaviorCheck(std::bind(memcpy_func, device_alloc.ptr(), host_alloc.ptr(), kPageSize),
should_sync, kernel_stream);
}
template <typename F>
void MemcpyDtoHPageableSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard<int> host_alloc(LinearAllocs::malloc, kPageSize);
LinearAllocGuard<int> device_alloc(LinearAllocs::hipMalloc, kPageSize);
MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.ptr(), kPageSize),
should_sync, kernel_stream);
}
template <typename F>
void MemcpyDtoHPinnedSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, kPageSize);
LinearAllocGuard<int> device_alloc(LinearAllocs::hipMalloc, kPageSize);
MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.ptr(), kPageSize),
should_sync, kernel_stream);
}
template <typename F>
void MemcpyDtoDSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard<int> src_alloc(LinearAllocs::hipMalloc, kPageSize);
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipMalloc, kPageSize);
MemcpySyncBehaviorCheck(std::bind(memcpy_func, dst_alloc.ptr(), src_alloc.ptr(), kPageSize),
should_sync, kernel_stream);
}
template <typename F>
void MemcpyHtoHSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
using LA = LinearAllocs;
auto src_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
auto dst_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
LinearAllocGuard<int> src_alloc(src_alloc_type, kPageSize);
LinearAllocGuard<int> dst_alloc(dst_alloc_type, kPageSize);
MemcpySyncBehaviorCheck(std::bind(memcpy_func, dst_alloc.ptr(), src_alloc.ptr(), kPageSize),
should_sync, kernel_stream);
}
// Common negative tests
template <typename F> void MemcpyCommonNegativeTests(F f, void* dst, void* src, size_t count) {
SECTION("dst == nullptr") { HIP_CHECK_ERROR(f(nullptr, src, count), hipErrorInvalidValue); }
SECTION("src == nullptr") { HIP_CHECK_ERROR(f(dst, nullptr, count), hipErrorInvalidValue); }
}
template <typename F>
void MemcpyWithDirectionCommonNegativeTests(F f, void* dst, void* src, size_t count,
hipMemcpyKind kind) {
using namespace std::placeholders;
MemcpyCommonNegativeTests(std::bind(f, _1, _2, _3, kind), dst, src, count);
// Disabled on AMD due to defect - EXSWHTEC-128
#if HT_NVIDIA
SECTION("Invalid MemcpyKind") {
HIP_CHECK_ERROR(f(dst, src, count, static_cast<hipMemcpyKind>(-1)),
hipErrorInvalidMemcpyDirection);
}
#endif
}
@@ -0,0 +1,893 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#pragma clang diagnostic ignored "-Wmissing-field-initializers"
#pragma clang diagnostic ignored "-Wunused-lambda-capture"
#pragma clang diagnostic ignored "-Wunused-parameter"
#include <variant>
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include <utils.hh>
#include <resource_guards.hh>
using PtrVariant = std::variant<hipPitchedPtr, hipArray_t>;
static inline hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction) {
switch (direction) {
case hipMemcpyHostToDevice:
return hipMemcpyDeviceToHost;
case hipMemcpyDeviceToHost:
return hipMemcpyHostToDevice;
default:
return direction;
}
};
static inline hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos,
PtrVariant src_ptr, hipPos src_pos,
hipExtent extent, hipMemcpyKind kind) {
hipMemcpy3DParms parms = {0};
if (std::holds_alternative<hipArray_t>(dst_ptr)) {
parms.dstArray = std::get<hipArray_t>(dst_ptr);
} else {
parms.dstPtr = std::get<hipPitchedPtr>(dst_ptr);
}
parms.dstPos = dst_pos;
if (std::holds_alternative<hipArray_t>(src_ptr)) {
parms.srcArray = std::get<hipArray_t>(src_ptr);
} else {
parms.srcPtr = std::get<hipPitchedPtr>(src_ptr);
}
parms.srcPos = src_pos;
parms.extent = extent;
parms.kind = kind;
return parms;
}
static bool operator==(const hipPitchedPtr& lhs, const hipPitchedPtr& rhs) {
// not checking for xsize currently as hipGraphMemcpyNodeGetParams returns incorrect value
return lhs.ptr == rhs.ptr && lhs.pitch == rhs.pitch && lhs.ysize == rhs.ysize;
}
static bool operator==(const hipPos& lhs, const hipPos& rhs) {
return lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z;
}
static bool operator==(const hipExtent& lhs, const hipExtent& rhs) {
return lhs.width == rhs.width && lhs.height == rhs.height && lhs.depth == rhs.depth;
}
static inline bool operator==(const hipMemcpy3DParms& lhs, const hipMemcpy3DParms& rhs) {
return lhs.dstArray == rhs.dstArray && lhs.dstPtr == rhs.dstPtr && lhs.dstPos == rhs.dstPos &&
lhs.srcArray == rhs.srcArray && lhs.srcPtr == rhs.srcPtr && lhs.srcPos == rhs.srcPos &&
lhs.extent == rhs.extent && lhs.kind == rhs.kind;
}
template <bool async = false, bool graph = false, bool set_params = false>
hipError_t Memcpy3DWrapper(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr, hipPos src_pos,
hipExtent extent, hipMemcpyKind kind, hipStream_t stream = nullptr) {
auto parms = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
if constexpr (graph) {
hipGraph_t g = nullptr;
HIP_CHECK(hipGraphCreate(&g, 0));
hipGraphNode_t node = nullptr;
if constexpr (set_params) {
auto reversed_parms = GetMemcpy3DParms(src_ptr, src_pos, dst_ptr, dst_pos, extent,
ReverseMemcpyDirection(kind));
HIP_CHECK(hipGraphAddMemcpyNode(&node, g, nullptr, 0, &reversed_parms));
HIP_CHECK(hipGraphMemcpyNodeSetParams(node, &parms));
} else {
HIP_CHECK(hipGraphAddMemcpyNode(&node, g, nullptr, 0, &parms));
}
hipMemcpy3DParms retrieved_params = {0};
HIP_CHECK(hipGraphMemcpyNodeGetParams(node, &retrieved_params));
REQUIRE(parms == retrieved_params);
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, g, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(g));
return hipSuccess;
}
if constexpr (async) {
return hipMemcpy3DAsync(&parms, stream);
} else {
return hipMemcpy3D(&parms);
}
}
template <bool should_synchronize, typename F>
void Memcpy3DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyDeviceToHost, hipMemcpyDefault);
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
LinearAllocGuard3D<int> device_alloc(extent);
const size_t host_pitch = GENERATE_REF(device_alloc.width(), device_alloc.width() + 64);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
host_pitch * device_alloc.height() * device_alloc.depth());
const dim3 threads_per_block(32, 32);
const dim3 blocks(device_alloc.width_logical() / threads_per_block.x + 1,
device_alloc.height() / threads_per_block.y + 1, device_alloc.depth());
Iota<<<blocks, threads_per_block>>>(device_alloc.ptr(), device_alloc.pitch(),
device_alloc.width_logical(), device_alloc.height(),
device_alloc.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(memcpy_func(
make_hipPitchedPtr(host_alloc.ptr(), host_pitch, device_alloc.width(), device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), device_alloc.extent(),
kind, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemoryVerify(host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
device_alloc.height(), device_alloc.depth(), f);
}
template <bool should_synchronize, bool enable_peer_access, typename F>
void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyDeviceToDevice, hipMemcpyDefault);
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
const auto device_count = HipTest::getDeviceCount();
const auto src_device = GENERATE_COPY(range(0, device_count));
const auto dst_device = GENERATE_COPY(range(0, device_count));
INFO("Src device: " << src_device << ", Dst device: " << dst_device);
HIP_CHECK(hipSetDevice(src_device));
if (device_count > 0 && kernel_stream != nullptr && kernel_stream != hipStreamPerThread) {
HIP_CHECK(hipStreamCreate(&kernel_stream));
}
if constexpr (enable_peer_access) {
if (src_device == dst_device) {
if (device_count > 0 && kernel_stream != nullptr && kernel_stream != hipStreamPerThread) {
HIP_CHECK(hipStreamDestroy(kernel_stream));
}
return;
}
int can_access_peer = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
if (!can_access_peer) {
std::string msg = "Skipped as peer access cannot be enabled between devices " +
std::to_string(src_device) + " " + std::to_string(dst_device);
HipTest::HIP_SKIP_TEST(msg.c_str());
if (device_count > 0 && kernel_stream != nullptr && kernel_stream != hipStreamPerThread) {
HIP_CHECK(hipStreamDestroy(kernel_stream));
}
return;
}
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
}
LinearAllocGuard3D<int> src_alloc(extent);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard3D<int> dst_alloc(extent);
HIP_CHECK(hipSetDevice(src_device));
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
dst_alloc.width() * dst_alloc.height() * dst_alloc.depth());
const dim3 threads_per_block(32, 32);
const dim3 blocks(dst_alloc.width_logical() / threads_per_block.x + 1,
dst_alloc.height() / threads_per_block.y + 1, dst_alloc.depth());
// Using dst_alloc width and height to set only the elements that will be copied over to
// dst_alloc
Iota<<<blocks, threads_per_block>>>(src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width_logical(),
dst_alloc.height(), dst_alloc.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), dst_alloc.extent(), kind, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
if (device_count > 0 && kernel_stream != nullptr && kernel_stream != hipStreamPerThread) {
HIP_CHECK(hipStreamDestroy(kernel_stream));
}
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(host_alloc.ptr(), dst_alloc.width(),
dst_alloc.width(), dst_alloc.height()),
make_hipPos(0, 0, 0), dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
dst_alloc.extent(), hipMemcpyDeviceToHost));
const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemoryVerify(host_alloc.ptr(), dst_alloc.width(), dst_alloc.width_logical(),
dst_alloc.height(), dst_alloc.depth(), f);
}
template <bool should_synchronize, typename F>
void Memcpy3DHostToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyHostToDevice, hipMemcpyDefault);
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
LinearAllocGuard3D<int> device_alloc(extent);
const size_t host_pitch = GENERATE_REF(device_alloc.pitch(), 2 * device_alloc.pitch());
LinearAllocGuard<int> src_host_alloc(LinearAllocs::hipHostMalloc,
host_pitch * device_alloc.height() * device_alloc.depth());
LinearAllocGuard<int> dst_host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.width() * device_alloc.height() * device_alloc.depth());
const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemorySet(src_host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
device_alloc.height(), device_alloc.depth(), f);
std::fill_n(dst_host_alloc.ptr(),
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 0);
HIP_CHECK(memcpy_func(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_host_alloc.ptr(), host_pitch, device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.extent(), kind, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(dst_host_alloc.ptr(), device_alloc.width(),
device_alloc.width(), device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
device_alloc.extent(), hipMemcpyDeviceToHost));
PitchedMemoryVerify(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.width_logical(),
device_alloc.height(), device_alloc.depth(), f);
}
template <bool should_synchronize, typename F>
void Memcpy3DHostToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyHostToHost, hipMemcpyDefault);
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
const size_t padding = GENERATE_COPY(0, 64);
const size_t src_pitch = extent.width + padding;
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
src_pitch * extent.height * extent.depth);
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemorySet(src_host.ptr(), src_pitch, extent.width / sizeof(int), extent.height,
extent.depth, f);
HIP_CHECK(
memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_host.ptr(), src_pitch, extent.width, extent.height),
make_hipPos(0, 0, 0), extent, kind, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
PitchedMemoryVerify(dst_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
}
template <bool should_synchronize, typename F>
void Memcpy3DArrayHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
constexpr hipExtent extent{127, 128, 8};
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
extent.width * sizeof(int) * extent.height * extent.depth);
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
extent.width * sizeof(int) * extent.height * extent.depth);
ArrayAllocGuard<int> src_array(extent);
ArrayAllocGuard<int> dst_array(extent);
const auto f = [extent](size_t x, size_t y, size_t z) {
return z * extent.width * extent.height + y * extent.width + x;
};
PitchedMemorySet(src_host.ptr(), extent.width * sizeof(int), extent.width, extent.height,
extent.depth, f);
// Host -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_host.ptr(), extent.width * sizeof(int),
extent.width * sizeof(int), extent.height),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Array
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Host
HIP_CHECK(memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width * sizeof(int),
extent.width * sizeof(int), extent.height),
make_hipPos(0, 0, 0), dst_array.ptr(), make_hipPos(0, 0, 0), extent,
hipMemcpyDeviceToHost, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
PitchedMemoryVerify(dst_host.ptr(), extent.width * sizeof(int), extent.width, extent.height,
extent.depth, f);
}
template <bool should_synchronize, typename F>
void Memcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
constexpr hipExtent extent{127, 128, 8};
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
extent.width * sizeof(int) * extent.height * extent.depth);
ArrayAllocGuard<int> src_array(extent);
ArrayAllocGuard<int> dst_array(extent);
LinearAllocGuard3D<int> src_device(extent.width, extent.height, extent.depth);
LinearAllocGuard3D<int> dst_device(extent.width, extent.height, extent.depth);
const dim3 threads_per_block(32, 32);
const dim3 blocks(src_device.width_logical() / threads_per_block.x + 1,
src_device.height() / threads_per_block.y + 1, src_device.depth());
Iota<<<blocks, threads_per_block>>>(src_device.ptr(), src_device.pitch(),
src_device.width_logical(), src_device.height(),
src_device.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
// Device -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Array
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Device
HIP_CHECK(memcpy_func(dst_device.pitched_ptr(), make_hipPos(0, 0, 0), dst_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Device -> Host
HIP_CHECK(memcpy_func(make_hipPitchedPtr(host_alloc.ptr(), extent.width * sizeof(int),
extent.width * sizeof(int), extent.height),
make_hipPos(0, 0, 0), dst_device.pitched_ptr(), make_hipPos(0, 0, 0),
dst_device.extent(), hipMemcpyDeviceToHost, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
const auto f = [extent](size_t x, size_t y, size_t z) {
return z * extent.width * extent.height + y * extent.width + x;
};
PitchedMemoryVerify(host_alloc.ptr(), extent.width * sizeof(int), extent.width, extent.height,
extent.depth, f);
}
template <typename F>
void Memcpy3DHtoDSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
using LA = LinearAllocs;
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
LinearAllocGuard<int> host_alloc(
LA::hipHostMalloc, device_alloc.width() * device_alloc.height() * device_alloc.depth());
MemcpySyncBehaviorCheck(
std::bind(memcpy_func, device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.extent(), hipMemcpyHostToDevice, kernel_stream),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy3DDtoHPageableSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
LinearAllocGuard<int> host_alloc(
LinearAllocs::malloc, device_alloc.width() * device_alloc.height() * device_alloc.depth());
MemcpySyncBehaviorCheck(
std::bind(memcpy_func,
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
device_alloc.extent(), hipMemcpyDeviceToHost, kernel_stream),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy3DDtoHPinnedSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
LinearAllocGuard<int> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.width() * device_alloc.height() * device_alloc.depth());
MemcpySyncBehaviorCheck(
std::bind(memcpy_func,
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
device_alloc.extent(), hipMemcpyDeviceToHost, kernel_stream),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy3DDtoDSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard3D<int> src_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
LinearAllocGuard3D<int> dst_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
MemcpySyncBehaviorCheck(
std::bind(memcpy_func, dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice, kernel_stream),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy3DHtoHSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
using LA = LinearAllocs;
const auto src_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
const auto dst_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
LinearAllocGuard<int> src_alloc(src_alloc_type, 32 * sizeof(int) * 32 * 8);
LinearAllocGuard<int> dst_alloc(dst_alloc_type, 32 * sizeof(int) * 32 * 8);
MemcpySyncBehaviorCheck(
std::bind(memcpy_func,
make_hipPitchedPtr(dst_alloc.ptr(), 32 * sizeof(int), 32 * sizeof(int), 32),
make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_alloc.ptr(), 32 * sizeof(int), 32 * sizeof(int), 32),
make_hipPos(0, 0, 0), make_hipExtent(32 * sizeof(int), 32, 8), hipMemcpyHostToHost,
kernel_stream),
should_sync, kernel_stream);
}
template <bool should_synchronize, typename F>
void Memcpy3DZeroWidthHeightDepth(F memcpy_func, const hipStream_t stream = nullptr) {
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
const auto [width_mult, height_mult, depth_mult] =
GENERATE(std::make_tuple(0, 1, 1), std::make_tuple(1, 0, 1), std::make_tuple(1, 1, 0));
SECTION("Device to Host") {
LinearAllocGuard3D<uint8_t> device_alloc(extent);
LinearAllocGuard<uint8_t> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.width() * device_alloc.height() * device_alloc.depth());
std::fill_n(host_alloc.ptr(),
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 42);
HIP_CHECK(hipMemset3D(device_alloc.pitched_ptr(), 1, device_alloc.extent()));
HIP_CHECK(memcpy_func(
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipExtent(device_alloc.width() * width_mult, device_alloc.height() * height_mult,
device_alloc.depth() * depth_mult),
hipMemcpyDeviceToHost, stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth());
}
SECTION("Device to Device") {
LinearAllocGuard3D<uint8_t> src_alloc(extent);
LinearAllocGuard3D<uint8_t> dst_alloc(extent);
LinearAllocGuard<uint8_t> host_alloc(
LinearAllocs::hipHostMalloc, dst_alloc.width() * dst_alloc.height() * dst_alloc.depth());
HIP_CHECK(hipMemset3D(src_alloc.pitched_ptr(), 1, src_alloc.extent()));
HIP_CHECK(hipMemset3D(dst_alloc.pitched_ptr(), 42, dst_alloc.extent()));
HIP_CHECK(
memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0),
make_hipExtent(dst_alloc.width() * width_mult, dst_alloc.height() * height_mult,
dst_alloc.depth() * depth_mult),
hipMemcpyDeviceToDevice, stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(host_alloc.ptr(), dst_alloc.width(),
dst_alloc.width(), dst_alloc.height()),
make_hipPos(0, 0, 0), dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
dst_alloc.extent(), hipMemcpyDeviceToHost));
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
dst_alloc.width_logical() * dst_alloc.height());
}
SECTION("Host to Device") {
LinearAllocGuard3D<uint8_t> device_alloc(extent);
LinearAllocGuard<uint8_t> src_host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.width() * device_alloc.height() * device_alloc.depth());
LinearAllocGuard<uint8_t> dst_host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.width() * device_alloc.height() * device_alloc.depth());
std::fill_n(src_host_alloc.ptr(),
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 1);
HIP_CHECK(hipMemset3D(device_alloc.pitched_ptr(), 42, device_alloc.extent()));
HIP_CHECK(memcpy_func(
device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0),
make_hipExtent(device_alloc.width() * width_mult, device_alloc.height() * height_mult,
device_alloc.depth() * depth_mult),
hipMemcpyHostToDevice, stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(dst_host_alloc.ptr(), device_alloc.width(),
device_alloc.width(), device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), device_alloc.extent(), hipMemcpyDeviceToHost));
ArrayFindIfNot(dst_host_alloc.ptr(), static_cast<uint8_t>(42),
device_alloc.width_logical() * device_alloc.height());
}
SECTION("Host to Host") {
const auto alloc_size = extent.width * extent.height * extent.depth;
LinearAllocGuard<uint8_t> src_alloc(LinearAllocs::hipHostMalloc, alloc_size);
LinearAllocGuard<uint8_t> dst_alloc(LinearAllocs::hipHostMalloc, alloc_size);
std::fill_n(src_alloc.ptr(), alloc_size, 1);
std::fill_n(dst_alloc.ptr(), alloc_size, 42);
HIP_CHECK(
memcpy_func(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0),
make_hipExtent(extent.width * width_mult, extent.height * height_mult,
extent.depth * depth_mult),
hipMemcpyHostToHost, stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
ArrayFindIfNot(dst_alloc.ptr(), static_cast<uint8_t>(42), alloc_size);
}
}
constexpr auto MemTypeHost() {
#if HT_AMD
return hipMemoryTypeHost;
#else
return CU_MEMORYTYPE_HOST;
#endif
}
constexpr auto MemTypeDevice() {
#if HT_AMD
return hipMemoryTypeDevice;
#else
return CU_MEMORYTYPE_DEVICE;
#endif
}
constexpr auto MemTypeArray() {
#if HT_AMD
return hipMemoryTypeArray;
#else
return CU_MEMORYTYPE_ARRAY;
#endif
}
constexpr auto MemTypeUnified() {
#if HT_AMD
return hipMemoryTypeUnified;
#else
return CU_MEMORYTYPE_UNIFIED;
#endif
}
using DrvPtrVariant = std::variant<hipPitchedPtr, hipArray_t>;
static inline HIP_MEMCPY3D GetDrvMemcpy3DParms(DrvPtrVariant dst_ptr, hipPos dst_pos,
DrvPtrVariant src_ptr, hipPos src_pos,
hipExtent extent, hipMemcpyKind kind) {
HIP_MEMCPY3D parms = {0};
if (std::holds_alternative<hipArray_t>(dst_ptr)) {
parms.dstMemoryType = hipMemoryTypeArray;
parms.dstArray = std::get<hipArray_t>(dst_ptr);
} else {
auto ptr = std::get<hipPitchedPtr>(dst_ptr);
parms.dstPitch = ptr.pitch;
switch (kind) {
case hipMemcpyDeviceToHost:
case hipMemcpyHostToHost:
parms.dstMemoryType = hipMemoryTypeHost;
parms.dstHost = ptr.ptr;
break;
case hipMemcpyDeviceToDevice:
case hipMemcpyHostToDevice:
parms.dstMemoryType = hipMemoryTypeDevice;
parms.dstDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
break;
case hipMemcpyDefault:
parms.dstMemoryType = hipMemoryTypeUnified;
parms.dstDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
break;
default:
assert(false);
}
}
if (std::holds_alternative<hipArray_t>(src_ptr)) {
parms.srcMemoryType = hipMemoryTypeArray;
parms.srcArray = std::get<hipArray_t>(src_ptr);
} else {
auto ptr = std::get<hipPitchedPtr>(src_ptr);
parms.srcPitch = ptr.pitch;
switch (kind) {
case hipMemcpyDeviceToHost:
case hipMemcpyDeviceToDevice:
parms.srcMemoryType = hipMemoryTypeDevice;
parms.srcDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
break;
case hipMemcpyHostToDevice:
case hipMemcpyHostToHost:
parms.srcMemoryType = hipMemoryTypeHost;
parms.srcHost = ptr.ptr;
break;
case hipMemcpyDefault:
parms.srcMemoryType = hipMemoryTypeUnified;
parms.srcDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
break;
default:
assert(false);
}
}
parms.WidthInBytes = extent.width;
parms.Height = extent.height;
parms.Depth = extent.depth;
parms.srcXInBytes = src_pos.x;
parms.srcY = src_pos.y;
parms.srcZ = src_pos.z;
parms.dstXInBytes = dst_pos.x;
parms.dstY = dst_pos.y;
parms.dstZ = dst_pos.z;
return parms;
}
static inline bool operator==(const HIP_MEMCPY3D& lhs, const HIP_MEMCPY3D& rhs) {
bool pos_eq = lhs.dstXInBytes == rhs.dstXInBytes && lhs.dstY == rhs.dstY &&
lhs.dstZ == rhs.dstZ && lhs.srcXInBytes == rhs.srcXInBytes && lhs.srcY == rhs.srcY &&
lhs.srcZ == rhs.srcZ;
bool extent_eq =
lhs.WidthInBytes == rhs.WidthInBytes && lhs.Height == rhs.Height && lhs.Depth == rhs.Depth;
bool mem_eq = true;
if (lhs.dstArray) {
mem_eq = lhs.dstArray == rhs.dstArray && lhs.dstMemoryType == rhs.dstMemoryType;
} else {
mem_eq = lhs.dstPitch == rhs.dstPitch && lhs.dstMemoryType == rhs.dstMemoryType;
}
if (lhs.srcArray) {
mem_eq = lhs.srcArray == rhs.srcArray && lhs.srcMemoryType == rhs.srcMemoryType;
} else {
mem_eq = lhs.srcPitch == rhs.srcPitch && lhs.srcMemoryType == rhs.srcMemoryType;
}
if (lhs.dstDevice) {
mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice);
}
if (lhs.dstHost) {
mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice);
}
if (lhs.srcDevice) {
mem_eq = mem_eq && (lhs.srcDevice == rhs.srcDevice);
}
if (lhs.srcHost) {
mem_eq = mem_eq && (lhs.srcHost == rhs.srcHost);
}
return pos_eq && extent_eq && mem_eq;
}
template <bool set_params = false>
hipError_t DrvMemcpy3DGraphWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
hipCtx_t context, hipStream_t stream = nullptr) {
auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
hipGraph_t g = nullptr;
HIP_CHECK(hipGraphCreate(&g, 0));
hipGraphNode_t node = nullptr;
if constexpr (set_params) {
auto reversed_parms = GetDrvMemcpy3DParms(src_ptr, src_pos, dst_ptr, dst_pos, extent,
ReverseMemcpyDirection(kind));
HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &reversed_parms, context));
HIP_CHECK(hipDrvGraphMemcpyNodeSetParams(node, &parms));
} else {
HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &parms, context));
}
HIP_MEMCPY3D retrieved_params = {0};
HIP_CHECK(hipDrvGraphMemcpyNodeGetParams(node, &retrieved_params));
REQUIRE(parms == retrieved_params);
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, g, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(g));
return hipSuccess;
}
template <bool async = false>
hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
hipStream_t stream = nullptr) {
auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
if constexpr (async) {
return hipDrvMemcpy3DAsync(&parms, stream);
} else {
return hipDrvMemcpy3D(&parms);
}
}
template <bool should_synchronize, typename F>
void DrvMemcpy3DArrayHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
DrvArrayAllocGuard<int> src_array(extent);
DrvArrayAllocGuard<int> dst_array(extent);
const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemorySet(src_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
// Host -> Array
HIP_CHECK(
memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_host.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Array
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Host
HIP_CHECK(
memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0), dst_array.ptr(), make_hipPos(0, 0, 0), extent,
hipMemcpyDeviceToHost, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
PitchedMemoryVerify(dst_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
}
template <bool should_synchronize, typename F>
void DrvMemcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
DrvArrayAllocGuard<int> src_array(extent);
DrvArrayAllocGuard<int> dst_array(extent);
LinearAllocGuard3D<int> src_device(extent);
LinearAllocGuard3D<int> dst_device(extent);
const dim3 threads_per_block(32, 32);
const dim3 blocks(src_device.width_logical() / threads_per_block.x + 1,
src_device.height() / threads_per_block.y + 1, src_device.depth());
Iota<<<blocks, threads_per_block>>>(src_device.ptr(), src_device.pitch(),
src_device.width_logical(), src_device.height(),
src_device.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
// Device -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Array
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Device
HIP_CHECK(memcpy_func(dst_device.pitched_ptr(), make_hipPos(0, 0, 0), dst_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
HIP_CHECK(
memcpy_func(make_hipPitchedPtr(host_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0), dst_device.pitched_ptr(), make_hipPos(0, 0, 0),
dst_device.extent(), hipMemcpyDeviceToHost, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemoryVerify(host_alloc.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
}
@@ -0,0 +1,251 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <algorithm>
#include <chrono>
#include <memory>
#include <numeric>
#include <type_traits>
#include <vector>
#include <cmd_options.hh>
#include <hip_test_common.hh>
#include <resource_guards.hh>
#pragma clang diagnostic ignored "-Wunused-but-set-variable"
#pragma clang diagnostic ignored "-Wunused-parameter"
#pragma clang diagnostic ignored "-Wunused-function"
#if defined(_WIN32)
#if defined(_WIN64)
typedef __int64 ssize_t;
#else // !_WIN64
typedef __int32 ssize_t;
#endif // !_WIN64
#endif /*_WIN32*/
class Timer {
public:
Timer(const Timer&) = delete;
Timer& operator=(const Timer&) = delete;
protected:
Timer(float& time, hipStream_t stream) : time_(time), stream_(stream) {}
void Record(float time) { time_ += time; }
hipStream_t GetStream() const { return stream_; }
private:
float& time_;
hipStream_t stream_;
};
class EventTimer : public Timer {
public:
EventTimer(float& time, hipStream_t stream = nullptr) : Timer(time, stream) {
HIP_CHECK(hipEventCreate(&start_));
HIP_CHECK(hipEventCreate(&stop_));
HIP_CHECK(hipEventRecord(start_, GetStream()));
}
~EventTimer() {
hipError_t error; // to avoid compiler warnings
error = hipEventRecord(stop_, GetStream());
error = hipEventSynchronize(stop_);
float ms;
error = hipEventElapsedTime(&ms, start_, stop_);
Record(ms);
error = hipEventDestroy(start_);
error = hipEventDestroy(stop_);
}
private:
hipEvent_t start_;
hipEvent_t stop_;
};
class CpuTimer : public Timer {
public:
CpuTimer(float& time, hipStream_t stream = nullptr) : Timer(time, stream) {
start_ = std::chrono::steady_clock::now();
}
~CpuTimer() {
hipError_t error; // to avoid compiler warnings
error = hipStreamSynchronize(GetStream());
stop_ = std::chrono::steady_clock::now();
std::chrono::duration<float, std::milli> ms = stop_ - start_;
Record(ms.count());
}
private:
std::chrono::time_point<std::chrono::steady_clock> start_;
std::chrono::time_point<std::chrono::steady_clock> stop_;
};
template <typename Derived> class Benchmark {
public:
Benchmark()
: iterations_(cmd_options.iterations),
warmups_(cmd_options.warmups),
display_output_(!cmd_options.no_display),
progress_bar_(cmd_options.progress) {
benchmark_name_ = Catch::getResultCapture().getCurrentTestName();
}
Benchmark(const Benchmark&) = delete;
Benchmark& operator=(const Benchmark&) = delete;
static constexpr ssize_t kWarmup = -1;
void Configure(size_t iterations, size_t warmups) {
iterations_ = iterations;
warmups_ = warmups;
}
void AddSectionName(const std::string& section_name) { benchmark_name_ += "/" + section_name; }
using ModifierSignature = std::function<float(float)>;
void RegisterModifier(const ModifierSignature& modifier) { modifier_ = modifier; }
template <typename... Args> std::tuple<float, float, float, float> Run(Args&&... args) {
AddSectionName(std::to_string(iterations_));
AddSectionName(std::to_string(warmups_));
auto& derived = static_cast<Derived&>(*this);
current_ = kWarmup;
for (size_t i = 0u; i < warmups_; ++i) {
PrintProgress("warmup", static_cast<int>(100.f * (i + 1) / warmups_));
derived(args...);
}
time_ = .0;
std::vector<float> samples;
samples.reserve(iterations_);
for (current_ = 0; current_ < iterations_; ++current_) {
PrintProgress("measurement", static_cast<int>(100.f * (current_ + 1) / iterations_));
derived(args...);
if (modifier_) time_ = modifier_(time_);
samples.push_back(time_);
time_ = .0;
}
float sum = std::accumulate(cbegin(samples), cend(samples), .0);
float mean = sum / samples.size();
float deviation =
std::accumulate(cbegin(samples), cend(samples), .0,
[mean](float sum, float next) { return sum + std::pow(next - mean, 2); });
deviation = sqrt(deviation / samples.size());
float best = *std::min_element(cbegin(samples), cend(samples));
float worst = *std::max_element(cbegin(samples), cend(samples));
PrintStats(mean, deviation, best, worst);
return {mean, deviation, best, worst};
}
protected:
template <bool event_based>
using TimerType = std::conditional_t<event_based, EventTimer, CpuTimer>;
template <bool event_based = false>
std::unique_ptr<TimerType<event_based>> GetTimer(hipStream_t stream = nullptr) {
return std::make_unique<TimerType<event_based>>(time_, stream);
}
float time() const { return time_; }
size_t iterations() const { return iterations_; }
size_t warmups() const { return warmups_; }
ssize_t current() const { return current_; }
private:
std::string benchmark_name_;
float time_;
size_t iterations_;
size_t warmups_;
ssize_t current_;
bool display_output_;
bool progress_bar_;
ModifierSignature modifier_;
void Print(const std::string& out = "") {
if (!display_output_) return;
std::cout << "\r" << std::setw(110) << std::left << benchmark_name_ << "\t|\t" << out
<< std::flush;
}
void PrintProgress(const std::string& name, int progress) {
if (!(display_output_ && progress_bar_)) return;
Print(name + ": [" + std::to_string(progress) + "%]");
}
void PrintStats(float mean, float deviation, float best, float worst) {
if (!display_output_) return;
Print("Average time: " + std::to_string(mean) + " ms, Standard deviation: " +
std::to_string(deviation) + " ms, Fastest: " + std::to_string(best) +
" ms, Slowest: " + std::to_string(worst) + " ms\n");
}
};
constexpr bool kTimerTypeCpu = false;
constexpr bool kTimerTypeEvent = true;
#define TIMED_SECTION_STREAM(TIMER_TYPE, STREAM) \
if (auto _ = this->template GetTimer<TIMER_TYPE>(STREAM); true)
#define TIMED_SECTION(TIMER_TYPE) TIMED_SECTION_STREAM(TIMER_TYPE, nullptr)
constexpr size_t operator"" _KB(unsigned long long int kb) { return kb << 10; }
constexpr size_t operator"" _MB(unsigned long long int mb) { return mb << 20; }
constexpr size_t operator"" _GB(unsigned long long int gb) { return gb << 30; }
static std::string GetAllocationSectionName(LinearAllocs allocation_type) {
switch (allocation_type) {
case LinearAllocs::malloc:
return "host pageable";
case LinearAllocs::hipHostMalloc:
return "host pinned";
case LinearAllocs::hipMalloc:
return "device malloc";
case LinearAllocs::hipMallocManaged:
return "managed";
default:
return "unknown alloc type";
}
}
@@ -0,0 +1,454 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip_array_common.hh>
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
enum class LinearAllocs {
malloc,
mallocAndRegister,
hipHostMalloc,
hipMalloc,
hipMallocManaged,
noAlloc
};
inline std::string to_string(const LinearAllocs allocation_type) {
switch (allocation_type) {
case LinearAllocs::malloc:
return "malloc";
case LinearAllocs::mallocAndRegister:
return "malloc + hipHostRegister";
case LinearAllocs::hipHostMalloc:
return "hipHostMalloc";
case LinearAllocs::hipMalloc:
return "hipMalloc";
case LinearAllocs::hipMallocManaged:
return "hipMallocManaged";
default:
return "unknown alloc type";
}
}
template <typename T> class LinearAllocGuard {
public:
LinearAllocGuard() = default;
LinearAllocGuard(const LinearAllocs allocation_type, const size_t size,
const unsigned int flags = 0u)
: allocation_type_{allocation_type},
size_{size} {
switch (allocation_type_) {
case LinearAllocs::malloc:
ptr_ = host_ptr_ = reinterpret_cast<T*>(malloc(size));
break;
case LinearAllocs::mallocAndRegister:
host_ptr_ = reinterpret_cast<T*>(malloc(size));
HIP_CHECK(hipHostRegister(host_ptr_, size, flags));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&ptr_), host_ptr_, 0u));
break;
case LinearAllocs::hipHostMalloc:
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&ptr_), size, flags));
host_ptr_ = ptr_;
break;
case LinearAllocs::hipMalloc:
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&ptr_), size));
break;
case LinearAllocs::hipMallocManaged:
HIP_CHECK(hipMallocManaged(reinterpret_cast<void**>(&ptr_), size, flags ? flags : 1u));
host_ptr_ = ptr_;
break;
case LinearAllocs::noAlloc:
break;
}
}
LinearAllocGuard(const LinearAllocGuard&) = delete;
LinearAllocGuard(LinearAllocGuard&& o) { *this = std::move(o); }
LinearAllocGuard& operator=(LinearAllocGuard&& o) {
if (this != &o) {
dealloc();
allocation_type_ = o.allocation_type_;
ptr_ = o.ptr_;
host_ptr_ = o.host_ptr_;
size_ = o.size_;
o.allocation_type_ = LinearAllocs::noAlloc;
o.ptr_ = nullptr;
o.host_ptr_ = nullptr;
o.size_ = 0;
}
return *this;
}
~LinearAllocGuard() { dealloc(); }
T* ptr() const { return ptr_; };
T* host_ptr() const { return host_ptr_; }
size_t size_bytes() const { return size_; }
private:
LinearAllocs allocation_type_ = LinearAllocs::noAlloc;
T* ptr_ = nullptr;
T* host_ptr_ = nullptr;
size_t size_ = 0;
void dealloc() {
if (ptr_ == nullptr) {
return;
}
// No Catch macros, don't want to possibly throw in the destructor
if (ptr_ != nullptr) {
switch (allocation_type_) {
case LinearAllocs::noAlloc:
break;
case LinearAllocs::malloc:
free(ptr_);
break;
case LinearAllocs::mallocAndRegister:
// Cast to void to suppress nodiscard warnings
static_cast<void>(hipHostUnregister(host_ptr_));
free(host_ptr_);
break;
case LinearAllocs::hipHostMalloc:
static_cast<void>(hipHostFree(ptr_));
break;
case LinearAllocs::hipMalloc:
case LinearAllocs::hipMallocManaged:
static_cast<void>(hipFree(ptr_));
}
}
}
};
template <typename T> class LinearAllocGuardMultiDim {
protected:
LinearAllocGuardMultiDim(hipExtent extent) : extent_{extent} {}
~LinearAllocGuardMultiDim() { static_cast<void>(hipFree(pitched_ptr_.ptr)); }
public:
T* ptr() const { return reinterpret_cast<T*>(pitched_ptr_.ptr); };
size_t pitch() const { return pitched_ptr_.pitch; }
hipExtent extent() const { return extent_; }
hipPitchedPtr pitched_ptr() const { return pitched_ptr_; }
size_t width() const { return extent_.width; }
size_t width_logical() const { return extent_.width / sizeof(T); }
size_t height() const { return extent_.height; }
public:
hipPitchedPtr pitched_ptr_;
const hipExtent extent_;
};
template <typename T, bool unaligned = false>
class LinearAllocGuard2D : public LinearAllocGuardMultiDim<T> {
public:
LinearAllocGuard2D(const size_t width_logical, const size_t height)
: LinearAllocGuardMultiDim<T>{make_hipExtent(width_logical * sizeof(T), height, 1)} {
if (unaligned) {
this->pitched_ptr_.pitch = width_logical * sizeof(T);
HIP_CHECK(hipMalloc(&this->pitched_ptr_.ptr, this->pitched_ptr_.pitch * height));
} else {
HIP_CHECK(hipMallocPitch(&this->pitched_ptr_.ptr, &this->pitched_ptr_.pitch,
this->extent_.width, this->extent_.height));
}
}
LinearAllocGuard2D(const LinearAllocGuard2D&) = delete;
LinearAllocGuard2D(LinearAllocGuard2D&&) = delete;
};
template <typename T> class LinearAllocGuard3D : public LinearAllocGuardMultiDim<T> {
public:
LinearAllocGuard3D(const size_t width_logical, const size_t height, const size_t depth)
: LinearAllocGuardMultiDim<T>{make_hipExtent(width_logical * sizeof(T), height, depth)} {
HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_));
}
LinearAllocGuard3D(const hipExtent extent) : LinearAllocGuardMultiDim<T>(extent) {
HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_));
}
LinearAllocGuard3D(const LinearAllocGuard3D&) = delete;
LinearAllocGuard3D(LinearAllocGuard3D&&) = delete;
size_t depth() const { return this->extent_.depth; }
};
template <typename T> class ArrayAllocGuard {
public:
// extent should contain logical width
ArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} {
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
HIP_CHECK(hipMalloc3DArray(&ptr_, &desc, extent_, flags));
}
~ArrayAllocGuard() { static_cast<void>(hipFreeArray(ptr_)); }
ArrayAllocGuard(const ArrayAllocGuard&) = delete;
ArrayAllocGuard(ArrayAllocGuard&&) = delete;
hipArray_t ptr() const { return ptr_; }
hipExtent extent() const { return extent_; }
private:
hipArray_t ptr_ = nullptr;
const hipExtent extent_;
};
template <typename T> class MipmappedArrayAllocGuard {
public:
// extent should contain logical width
MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int levels,
const unsigned int flags)
: extent_{extent}, levels_{levels} {
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
HIP_CHECK(hipMallocMipmappedArray(&ptr_, &desc, extent_, levels_, flags));
}
MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u)
: MipmappedArrayAllocGuard{extent, 1, flags} {}
~MipmappedArrayAllocGuard() { static_cast<void>(hipFreeMipmappedArray(ptr_)); }
MipmappedArrayAllocGuard(const MipmappedArrayAllocGuard&) = delete;
MipmappedArrayAllocGuard(MipmappedArrayAllocGuard&&) = delete;
hipMipmappedArray_t ptr() const { return ptr_; }
hipArray_t GetLevel(unsigned int level) {
hipArray_t ret;
HIP_CHECK(hipGetMipmappedArrayLevel(&ret, ptr_, level));
return ret;
}
hipExtent extent() const { return extent_; }
unsigned int levels() const { return levels_; }
private:
hipMipmappedArray_t ptr_ = nullptr;
const hipExtent extent_;
const unsigned int levels_;
};
template <typename T> class DrvArrayAllocGuard {
public:
// extent should contain width in bytes
DrvArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} {
HIP_ARRAY3D_DESCRIPTOR desc{};
using vec_info = vector_info<T>;
desc.Format = vec_info::format;
desc.NumChannels = vec_info::size;
desc.Width = extent_.width / sizeof(T);
desc.Height = extent_.height;
desc.Depth = extent_.depth;
desc.Flags = flags;
HIP_CHECK(hipArray3DCreate(&ptr_, &desc));
}
~DrvArrayAllocGuard() { static_cast<void>(hipArrayDestroy(ptr_)); }
DrvArrayAllocGuard(const DrvArrayAllocGuard&) = delete;
DrvArrayAllocGuard(DrvArrayAllocGuard&&) = delete;
hipArray_t ptr() const { return ptr_; }
hipExtent extent() const { return extent_; }
private:
hipArray_t ptr_ = nullptr;
const hipExtent extent_;
};
enum class Streams { nullstream, perThread, created, withFlags, withPriority };
class StreamGuard {
public:
StreamGuard() = default;
StreamGuard(const Streams stream_type, unsigned int flags = hipStreamDefault, int priority = 0)
: stream_type_{stream_type}, flags_{flags}, priority_{priority} {
switch (stream_type_) {
case Streams::nullstream:
stream_ = nullptr;
break;
case Streams::perThread:
stream_ = hipStreamPerThread;
break;
case Streams::created:
HIP_CHECK(hipStreamCreate(&stream_));
break;
case Streams::withFlags:
HIP_CHECK(hipStreamCreateWithFlags(&stream_, flags_));
break;
case Streams::withPriority:
HIP_CHECK(hipStreamCreateWithPriority(&stream_, flags_, priority_));
break;
}
}
StreamGuard(const StreamGuard&) = delete;
StreamGuard(StreamGuard&& o) { *this = std::move(o); }
StreamGuard& operator=(StreamGuard&& o) {
if (this != &o) {
if (stream_type_ >= Streams::created) {
static_cast<void>(hipStreamDestroy(stream_));
}
stream_type_ = o.stream_type_;
flags_ = o.flags_;
priority_ = o.priority_;
stream_ = o.stream_;
o.stream_type_ = Streams::nullstream;
o.flags_ = 0u;
o.priority_ = 0;
o.stream_ = nullptr;
}
return *this;
}
~StreamGuard() {
if (stream_type_ >= Streams::created && stream_ != nullptr) {
static_cast<void>(hipStreamDestroy(stream_));
}
}
hipStream_t stream() const { return stream_; }
private:
Streams stream_type_ = Streams::nullstream;
unsigned int flags_ = 0u;
int priority_ = 0;
hipStream_t stream_ = nullptr;
};
class EventsGuard {
public:
EventsGuard(size_t N) : events_(N) {
for (auto& e : events_) HIP_CHECK(hipEventCreate(&e));
}
EventsGuard(const EventsGuard&) = delete;
EventsGuard(EventsGuard&&) = delete;
~EventsGuard() {
for (auto& e : events_) {
static_cast<void>(hipEventDestroy(e));
}
}
hipEvent_t& operator[](int index) { return events_[index]; }
operator hipEvent_t() const { return events_.at(0); }
std::vector<hipEvent_t>& event_list() { return events_; }
private:
std::vector<hipEvent_t> events_;
};
class StreamsGuard {
public:
StreamsGuard(size_t N) : streams_(N) {
for (auto& s : streams_) HIP_CHECK(hipStreamCreate(&s));
}
StreamsGuard(const StreamsGuard&) = delete;
StreamsGuard(StreamsGuard&&) = delete;
~StreamsGuard() {
for (auto& s : streams_) static_cast<void>(hipStreamDestroy(s));
}
hipStream_t& operator[](int index) { return streams_[index]; }
operator hipStream_t() const { return streams_.at(0); }
std::vector<hipStream_t>& stream_list() { return streams_; }
private:
std::vector<hipStream_t> streams_;
};
enum class MemPools { dev_default, created };
class MemPoolGuard {
public:
MemPoolGuard(const MemPools mempool_type, int device,
hipMemAllocationHandleType handle_type = hipMemHandleTypeNone)
: mempool_type_{mempool_type}, device_{device}, handle_type_{handle_type} {
switch (mempool_type_) {
case MemPools::dev_default:
HIP_CHECK(hipDeviceGetDefaultMemPool(&mempool_, device_));
break;
case MemPools::created:
hipMemPoolProps pool_props;
memset(&pool_props, 0, sizeof(pool_props));
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.handleTypes = handle_type_;
pool_props.location.type = hipMemLocationTypeDevice;
pool_props.location.id = device_;
pool_props.win32SecurityAttributes = nullptr;
HIP_CHECK(hipMemPoolCreate(&mempool_, &pool_props));
}
}
MemPoolGuard(const MemPoolGuard&) = delete;
MemPoolGuard(MemPoolGuard&&) = delete;
~MemPoolGuard() {
if (mempool_type_ == MemPools::created) {
static_cast<void>(hipMemPoolDestroy(mempool_));
} else {
// Reset max states for default mem pool, so subtests won't fail
uint64_t value = 0;
HIP_CHECK(hipMemPoolSetAttribute(mempool_, hipMemPoolAttrUsedMemHigh, &value));
HIP_CHECK(hipMemPoolSetAttribute(mempool_, hipMemPoolAttrReservedMemHigh, &value));
}
}
hipMemPool_t mempool() const { return mempool_; }
private:
const MemPools mempool_type_;
int device_;
hipMemAllocationHandleType handle_type_;
hipMemPool_t mempool_;
};
@@ -0,0 +1,110 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <condition_variable>
#include <mutex>
#include <thread>
/*
Guarantees total ordering between parent and child thread
PARENT CHILD
THREAD THREAD
TestPart1
\
\
\
TestPart2
/
/
/
TestPart3
\
\
\
TestPart4
Usage:
Define a derived class which inherits from ThreadedZigZagTest instantiated with that selfsame class,
which implements the appropriate test methods
class DerivedTestClass : public ThreadedZigZagTest<DerivedTestClass> {
void TestPart1() {...}
void TestPart2() {...}
void TestPart3() {...}
void TestPart4() {...}
};
The derived class can contain state that the test requires.
*/
template <typename T> class ThreadedZigZagTest {
public:
void run() {
// 1.
static_cast<T*>(this)->TestPart1();
auto t = std::thread([this] {
// 2.
static_cast<T*>(this)->TestPart2();
{
std::lock_guard<std::mutex> lock(mtx_);
ready_ = true;
}
cv_.notify_one();
{
std::unique_lock<std::mutex> lock(mtx_);
cv_.wait(lock, [this] { return !ready_; });
}
// 4.
static_cast<T*>(this)->TestPart4();
});
{
std::unique_lock<std::mutex> lock(mtx_);
cv_.wait(lock, [this] { return ready_; });
}
// 3.
static_cast<T*>(this)->TestPart3();
{
std::lock_guard<std::mutex> lock(mtx_);
ready_ = false;
}
cv_.notify_one();
// Finalize
t.join();
HIP_CHECK_THREAD_FINALIZE();
}
void TestPart1() const {}
void TestPart2() const {}
void TestPart3() const {}
void TestPart4() const {}
private:
std::mutex mtx_;
std::condition_variable cv_;
bool ready_ = false;
};
@@ -0,0 +1,194 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <chrono>
#include <optional>
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
namespace {
inline constexpr size_t kPageSize = 4096;
} // anonymous namespace
template <typename T>
void ArrayMismatch(T* const expected, T* const actual, const size_t num_elements) {
const auto ret = std::mismatch(expected, expected + num_elements, actual);
if (ret.first != expected + num_elements) {
const auto idx = std::distance(expected, ret.first);
INFO("Value mismatch at index: " << idx);
REQUIRE(expected[idx] == actual[idx]);
}
}
template <typename It, typename T> void ArrayFindIfNot(It begin, It end, const T expected_value) {
const auto it = std::find_if_not(
begin, end, [expected_value](const T elem) { return expected_value == elem; });
if (it != end) {
const auto idx = std::distance(begin, it);
INFO("Value mismatch at index " << idx);
REQUIRE(expected_value == *it);
}
}
template <typename T>
void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_elements) {
ArrayFindIfNot(array, array + num_elements, expected_value);
}
template <typename T, typename F>
static inline void ArrayAllOf(const T* arr, uint32_t count, F value_gen) {
for (auto i = 0u; i < count; ++i) {
const std::optional<T> expected_val = value_gen(i);
if (!expected_val.has_value()) continue;
// Using require on every iteration leads to a noticeable performance loss on large arrays,
// even when the require passes.
if (arr[i] != expected_val.value()) {
INFO("Mismatch at index: " << i);
REQUIRE(arr[i] == expected_val.value());
}
}
}
template <typename T>
static inline void ArrayInRange(const T* arr, uint32_t count,const T minval,const T maxval) {
for (auto i = 0u; i < count; ++i) {
if(arr[i] < minval)
{
INFO("Mismatch at index: " << i);
REQUIRE(arr[i] > minval);
}
else if(arr[i] > maxval)
{
INFO("Mismatch at index: " << i);
REQUIRE(arr[i] < maxval);
}
}
}
template <typename T, typename F>
void PitchedMemoryVerify(T* const ptr, const size_t pitch, const size_t width, const size_t height,
const size_t depth, F expected_value_generator) {
for (size_t z = 0; z < depth; ++z) {
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
const auto slice = reinterpret_cast<uint8_t*>(ptr) + pitch * height * z;
const auto row = slice + pitch * y;
if (reinterpret_cast<T*>(row)[x] != expected_value_generator(x, y, z)) {
INFO("Mismatch at indices: " << x << ", " << y << ", " << z);
REQUIRE(reinterpret_cast<T*>(row)[x] == expected_value_generator(x, y, z));
}
}
}
}
}
template <typename T, typename F>
void PitchedMemorySet(T* const ptr, const size_t pitch, const size_t width, const size_t height,
const size_t depth, F expected_value_generator) {
for (size_t z = 0; z < depth; ++z) {
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
const auto slice = reinterpret_cast<uint8_t*>(ptr) + pitch * height * z;
const auto row = slice + pitch * y;
reinterpret_cast<T*>(row)[x] = expected_value_generator(x, y, z);
}
}
}
}
template <typename T>
__global__ void VectorIncrement(T* const vec, const T increment_value, size_t N) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < N; i += stride) {
vec[i] += increment_value;
}
}
template <typename T> __global__ void VectorSet(T* const vec, const T value, size_t N) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < N; i += stride) {
vec[i] = value;
}
}
// Will execute for atleast interval milliseconds
static __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) {
while (interval--) {
#if HT_AMD
uint64_t start = clock_function();
while (clock_function() - start < ticks_per_ms) {
__builtin_amdgcn_s_sleep(10);
}
#endif
#if HT_NVIDIA
uint64_t start = clock64();
while (clock64() - start < ticks_per_ms) {
}
#endif
}
}
template <typename T>
__global__ void Iota(T* const out, size_t pitch, size_t w, size_t h, size_t d) {
const auto x = blockIdx.x * blockDim.x + threadIdx.x;
const auto y = blockIdx.y * blockDim.y + threadIdx.y;
const auto z = blockIdx.z * blockDim.z + threadIdx.z;
if (x < w && y < h && z < d) {
char* const slice = reinterpret_cast<char*>(out) + pitch * h * z;
char* const row = slice + pitch * y;
reinterpret_cast<T*>(row)[x] = z * w * h + y * w + x;
}
}
inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hipStream_t stream = nullptr) {
int ticks_per_ms = 0;
#if HT_AMD
HIPCHECK(hipDeviceGetAttribute(&ticks_per_ms, hipDeviceAttributeWallClockRate, 0));
#endif
#if HT_NVIDIA
HIPCHECK(hipDeviceGetAttribute(&ticks_per_ms, hipDeviceAttributeClockRate, 0));
#endif
Delay<<<1, 1, 0, stream>>>(interval.count(), ticks_per_ms);
}
template <typename... Attributes>
inline bool DeviceAttributesSupport(const int device, Attributes... attributes) {
constexpr auto DeviceAttributeSupport = [](const int device,
const hipDeviceAttribute_t attribute) {
int value = 0;
HIP_CHECK(hipDeviceGetAttribute(&value, attribute, device));
return value;
};
return (... && DeviceAttributeSupport(device, attributes));
}
inline int GetDeviceAttribute(const hipDeviceAttribute_t attr, int device) {
int value = 0;
HIP_CHECK(hipDeviceGetAttribute(&value, attr, device));
return value;
}
@@ -0,0 +1,519 @@
/*
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#define HIP_ENABLE_WARP_SYNC_BUILTINS
#define HIP_ENABLE_EXTRA_WARP_SYNC_TYPES
#include <hip_test_common.hh>
#include <resource_guards.hh>
#include <hip/hip_cooperative_groups.h>
#include <hip/hip_fp16.h>
#include <limits>
#include <cmath>
#include <iostream>
#include <ios>
#define MASK_SHIFT(x, n) \
(x & (static_cast<uint64_t>(1) << n)) >> n
const unsigned long long Every5thBit = 0x1084210842108421;
const unsigned long long Every9thBit = 0x8040201008040201;
const unsigned long long Every5thBut9th = Every5thBit & ~Every9thBit;
const unsigned long long AllThreads = ~0;
static constexpr int kNumReduces = 5000;
inline __device__ bool deactivate_thread(const uint64_t* const active_masks) {
const auto warp =
cooperative_groups::tiled_partition(cooperative_groups::this_thread_block(), warpSize);
const auto block = cooperative_groups::this_thread_block();
const auto warps_per_block = (block.size() + warpSize - 1) / warpSize;
const auto block_rank = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
const auto idx = block_rank * warps_per_block + block.thread_rank() / warpSize;
return !(active_masks[idx] & (static_cast<uint64_t>(1) << warp.thread_rank()));
}
inline std::mt19937& GetRandomGenerator() {
static std::mt19937 mt(std::random_device{}());
return mt;
}
template <typename T> inline T GenerateRandomInteger(const T min, const T max) {
std::uniform_int_distribution<T> dist(min, max);
return dist(GetRandomGenerator());
}
template <typename T> inline T GenerateRandomReal(const T min, const T max) {
std::uniform_real_distribution<T> dist(min, max);
return dist(GetRandomGenerator());
}
inline int generate_width(int warp_size) {
int exponent = 0;
while (warp_size >>= 1) {
++exponent;
}
return GENERATE_COPY(map([](int e) { return 1 << e; }, range(1, exponent + 1)));
}
inline uint64_t get_active_mask(unsigned int warp_id, unsigned int warp_size) {
uint64_t active_mask = 0;
switch (warp_id % 5) {
case 0: // even threads in the warp
active_mask = 0xAAAAAAAAAAAAAAAA;
break;
case 1: // odd threads in the warp
active_mask = 0x5555555555555555;
break;
case 2: // first half of the warp
for (int i = 0; i < warp_size / 2; i++) {
active_mask = active_mask | (static_cast<uint64_t>(1) << i);
}
break;
case 3: // second half of the warp
for (int i = warp_size / 2; i < warp_size; i++) {
active_mask = active_mask | (static_cast<uint64_t>(1) << i);
}
break;
case 4: // all threads
active_mask = 0xFFFFFFFFFFFFFFFF;
break;
}
return active_mask;
}
template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
inline T expandPrecision(int X) { return X; }
template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
inline T expandPrecision(int X) {
return X * 3.141592653589793115997963468544185161590576171875;
}
template <typename T, std::enable_if_t<std::is_same<T, __half>::value, bool> = true>
inline __half expandPrecision(int X) {
return (__half)expandPrecision<float>(X);
}
template <typename T, std::enable_if_t<std::is_same<T, __half2>::value, bool> = true>
inline __half2 expandPrecision(int X) {
__half H = expandPrecision<float>(X);
return {H, H};
}
template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
inline void expandPrecision(T* Array, int size) {
(void)Array;
(void)size;
}
template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
inline void expandPrecision(T *Array, int size) {
for (int i = 0; i != size; ++i) {
Array[i] *= 3.141592653589793115997963468544185161590576171875;
}
}
template <typename T>
inline void initializeInput(T *Input, int size) {
int Values[] = {0, -1, 2, 3, 4, 5, -6, 7,
8, -9, 10, 11, 12, 13, -14, 15,
16, 17, -18, 19, 20, -21, 22, 23,
24, 25, 26, -27, 28, 29, 30, 31,
-32, 33, 34, 35, -36, 37, 38, -39,
40, 41, 42, 43, -44, -45, 46, 47,
48, 49, 50, -51, 52, 53, -54, 55,
56, 57, -58, 59, 60, 61, 62, -63};
for (int i = 0; i != size; ++i) {
Input[i] = expandPrecision<T>(Values[i]);
}
}
template <typename T>
inline void initializeExpected(T *Expected, int *Values, int size) {
for (int i = 0; i != size; ++i) {
Expected[i] = expandPrecision<T>(Values[i]);
}
}
template <typename T>
inline bool compareEqual(T X, T Y) { return X == Y; }
template <>
inline bool compareEqual(__half X, __half Y) {
return __half2float(X) == __half2float(Y);
}
template <>
inline bool compareEqual(__half2 X, __half2 Y) {
return compareEqual(X.x, Y.x) && compareEqual(X.y, Y.y);
}
inline bool compareMaskEqual(unsigned long long *Actual, unsigned long long *Expected,
int i, int warpSize) {
if (warpSize == 32)
return (unsigned)Actual[i] == (unsigned)Expected[i];
return Actual[i] == Expected[i];
}
template <typename T>
inline T alignUp(T num, size_t n) {
if (num % n == 0) {
return num;
}
return ((num + n - 1) / n) * n;
}
template <class T>
struct DistributionType {
using type = std::uniform_int_distribution<T>;
};
// there is no std::uniform_real_distribution for 'half' type, so we cast from
// unsigned short, avoiding Nan and Infinity
template <>
struct DistributionType<__half> {
using type = std::uniform_int_distribution<unsigned short>;
};
template <>
struct DistributionType<float> {
using type = std::uniform_real_distribution<float>;
};
template <>
struct DistributionType<double> {
using type = std::uniform_real_distribution<double>;
};
template <class T>
struct MinOp {
T operator()(const T& lhs, const T& rhs) const
{
return std::min(lhs, rhs);
}
};
template <class T>
struct MaxOp {
T operator()(const T& lhs, const T& rhs) const
{
return std::max(lhs, rhs);
}
};
template <class T>
struct XorOp {
__host__ __device__ T operator()(const T& lhs, const T& rhs)
{
return (!lhs) != (!rhs) == 1;
}
};
// typeid(T).name() does seem to return a very descriptive name for primitive types,
// at least on clang, so we roll out an equivalent
template<class T>
const char* typeToString()
{
if (std::is_same<T, int>::value)
return "int";
if (std::is_same<T, unsigned int>::value)
return "unsigned int";
if (std::is_same<T, long long>::value)
return "long long";
if (std::is_same<T, unsigned long long>::value)
return "unsigned long long";
if (std::is_same<T, half>::value)
return "half";
if (std::is_same<T, float>::value)
return "float";
if (std::is_same<T, double>::value)
return "double";
assert(false && "Missing conversion to string for type");
return "";
}
template<class T, template <typename> class Op>
const char* opToString()
{
if constexpr (std::is_same<Op<T>, std::plus<T>>::value)
return "add";
else if constexpr (std::is_same<Op<T>, MinOp<T>>::value)
return "min";
else if constexpr (std::is_same<Op<T>, MaxOp<T>>::value)
return "max";
else if constexpr (std::is_same<Op<T>, std::logical_and<T>>::value)
return "logical_and";
else if constexpr (std::is_same<Op<T>, std::logical_or<T>>::value)
return "logical_or";
else if constexpr (std::is_same<Op<T>, XorOp<T>>::value)
return "logical_xor";
else {
static_assert(std::is_void<T>::value, "Unsupported operator");
return "";
}
}
template <class T, class Gen>
void genRandomMasks(LinearAllocGuard<T>& d_buf,
LinearAllocGuard<T>& buf,
Gen& gen,
int numItems)
{
// masks must be != 0, hence passing 1 as the 'a' distribution parameter
std::uniform_int_distribution<unsigned long long> dist(1);
int numBytes = numItems * sizeof(T);
LinearAllocGuard<T> tmp(LinearAllocs::malloc, numBytes);
LinearAllocGuard<T> d_tmp(LinearAllocs::hipMalloc, numBytes);
buf = std::move(tmp);
d_buf = std::move(d_tmp);
for (int i = 0; i < numItems; i++) {
T mask = dist(gen);
if (getWarpSize() == 32)
mask &= 0xFFFFFFFF;
buf.ptr()[i] = mask;
}
HIP_CHECK(hipMemcpy(d_buf.ptr(), buf.ptr(), numBytes, hipMemcpyHostToDevice));
}
// generates a random __half (instead of using uniform_real_distribution<float> casting to __half
// which is problematic)
// @expDist needs to be between [0-2^5-2]
template <class Gen>
__half genRandomHalf(std::uniform_int_distribution<unsigned short>& dist,
Gen& gen)
{
__half_raw tmp;
tmp.x = dist(gen);
// rewrite the exponent to force the number to be (-8<x<8) and at the same time avoid NaN or
// infinity
tmp.x &= 0xBBFF;
return tmp;
}
// generates a random buffer in buf, copies it to device memory in d_buf
template <class T, class Dist, class Gen>
void genRandomBuffers(LinearAllocGuard<T>& d_buf,
LinearAllocGuard<T>& buf,
Dist& dist,
Gen& gen,
int numItems)
{
int numBytes = numItems * sizeof(T);
LinearAllocGuard<T> tmp(LinearAllocs::malloc, numBytes);
LinearAllocGuard<T> d_tmp(LinearAllocs::hipMalloc, numBytes);
buf = std::move(tmp);
d_buf = std::move(d_tmp);
for (int i = 0; i < numItems; i++)
if constexpr (std::is_same<T, __half>::value)
buf.ptr()[i] = genRandomHalf(dist, gen);
else
buf.ptr()[i] = dist(gen);
HIP_CHECK(hipMemcpy(d_buf.ptr(), buf.ptr(), numBytes, hipMemcpyHostToDevice));
}
// given an operation produces the expected result of the reduction
// @mask indicates the lanes that will participate in the computation
template <class T, class Op>
T calculateExpected(const T* input, Op op, unsigned long long mask)
{
T result;
int wavefrontSize = getWarpSize();
if (std::is_same<Op, std::plus<T>>::value) {
T tmp[64] = { 0 };
for (int i = 0; i < wavefrontSize; i++) {
if (mask & (1ul << i)) {
tmp[i] = input[i];
}
}
for (int modulo = 2; modulo <= wavefrontSize; modulo *= 2) {
for (int i = 0; i < wavefrontSize; i += modulo) {
int j = i + modulo / 2;
if (j < wavefrontSize)
tmp[i] += tmp[j];
}
}
result = tmp[0];
} else {
bool initialized = false;
for (int i = 0; i < wavefrontSize; i++) {
if (mask & (1ul << i)) {
if (initialized)
result = op(input[i], result);
else {
result = input[i];
initialized = true;
}
}
}
}
return result;
}
template <class T>
void printMismatch(const T& result, const T& expected, const T* input, unsigned long long mask)
{
std::ios init(NULL);
init.copyfmt(std::cout);
std::cout << "\nMismatch\n";
std::cout << "Mask: 0x" << std::hex << std::setfill('0') << std::setw(16) << mask << "\n";
std::cout.copyfmt(init);
for (int i = 0; i < getWarpSize(); i++) {
if ((1ul << i) & mask) {
if constexpr (std::is_same<T, __half>::value)
std::cout << "Lane " << i << ": " << __half2float(input[i]) << "\n";
else
std::cout << "Lane " << i << ": " << input[i] << "\n";
}
}
if constexpr (std::is_same<T, __half>::value) {
std::cout << "Result: " << __half2float(result) << "\n";
std::cout << "Expected: " << __half2float(expected) << "\n";
} else {
std::cout << "Result: " << result << "\n";
std::cout << "Expected: " << expected << "\n";
}
}
template <class T>
void compareFloatingPoint(const T& result, const T& expected, unsigned long long mask, const T* input)
{
using namespace Catch::Matchers;
if constexpr (std::is_same<T, __half>::value) {
float resultFloat = __half2float(result);
float expectedFloat = __half2float(expected);
float absDifference = fabs(resultFloat - expectedFloat);
float relativeEpsilon = 0.1 * fmax(resultFloat, expectedFloat);
float eps = 0.01f;
REQUIRE(!__hisnan(result));
REQUIRE(!__hisinf(result));
if (relativeEpsilon > eps) {
if (absDifference > 0.0001) {
if (absDifference >= eps * fabs(fmax(resultFloat, expectedFloat))) {
printMismatch(result, expected, input, mask);
std::cout << "Relative epsilon: " << relativeEpsilon << "\n";
std::cout << "Difference: " << absDifference << "\n";
}
}
REQUIRE_THAT(__half2float(resultFloat), WithinRel(expectedFloat, eps));
}
} else {
// for float or double, also lossy in terms of precision
T absDifference = fabs(result - expected);
T relativeEpsilon = 0.1 * fmax(result, expected);
T eps = 0.01;
if (relativeEpsilon > eps) {
if (absDifference > 0.0001) {
if (absDifference >= eps * fabs(fmax(result, expected))) {
printMismatch(result, expected, input, mask);
std::cout << "Relative epsilon: " << relativeEpsilon << "\n";
std::cout << "Difference: " << absDifference << "\n";
}
REQUIRE_THAT(result, WithinRel(expected, eps));
}
}
}
}
// @tparam Reduce a functor; abstracts away kernel dispatching
// (via hiprtc or normal execution)
template <class T, class Reduce, template <typename> class Op>
void runTestReduce(int iteration, Reduce reduce)
{
using namespace Catch::Matchers;
using distribution = typename DistributionType<T>::type;
unsigned int wavefrontSize = getWarpSize();
// one result per reduce per thread to be checked
LinearAllocGuard<T> d_output(LinearAllocs::hipMalloc, kNumReduces * wavefrontSize * sizeof(T));
LinearAllocGuard<T> output(LinearAllocs::malloc, kNumReduces * wavefrontSize * sizeof(T));
std::mt19937_64 gen(iteration);
// for float16, we generate any random unsigned short, but cap the exponent later on
// to keep it in the range (-8.0..8.0) (just to avoid overflows)
// On the rest of the types, just use a bigger reduced range of numbers to avoid overflows too
T a = std::is_same<T, half>::value? std::numeric_limits<unsigned short>::lowest() : -1023;
T b = std::is_same<T, half>::value? std::numeric_limits<unsigned short>::max() : 1023;
distribution dist(a, b);
LinearAllocGuard<T> input, d_input;
LinearAllocGuard<unsigned long long> masks, d_masks;
Op<T> op;
int numReduce = 0;
genRandomBuffers(d_input, input, dist, gen, kNumReduces * wavefrontSize);
genRandomMasks(d_masks, masks, gen, kNumReduces);
reduce(d_output.ptr(), d_input.ptr(), d_masks.ptr(), kNumReduces, op);
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(output.ptr(), d_output.ptr(), d_output.size_bytes(), hipMemcpyDeviceToHost));
while (numReduce < kNumReduces) {
T expected = calculateExpected<T>(input.ptr(), op, masks.ptr()[numReduce]);
int lane = 0;
while (lane < wavefrontSize) {
auto result = output.ptr()[numReduce * wavefrontSize + lane];
unsigned long long mask = masks.ptr()[numReduce];
if ((1ul << lane) & mask) {
if constexpr (std::is_integral<T>::value || std::is_same<Op<T>, MinOp<T>>::value ||
std::is_same<Op<T>, MaxOp<T>>::value) {
// for integral types or min/max the result should match exactly
if constexpr (std::is_same<T, __half>::value)
REQUIRE(__half2float(result) == __half2float(expected));
else {
if (result != expected) {
printMismatch(result, expected, input.ptr(), mask);
REQUIRE(result == expected);
}
}
} else
compareFloatingPoint(result, expected, mask, input.ptr());
}
lane++;
}
numReduce++;
}
}
@@ -0,0 +1,8 @@
if(NOT RTC_TESTING)
set(TEST_SRC
Set.cpp
)
add_library(KERNELS EXCLUDE_FROM_ALL OBJECT ${TEST_SRC})
target_compile_options(KERNELS PUBLIC -std=c++17)
endif()
@@ -0,0 +1,6 @@
#include <kernels.hh>
__global__ void Set(int* Ad, int val) {
int tx = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tx] = val;
}
@@ -0,0 +1,10 @@
namespace HipTest {
template <typename T> __global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < NELEM; i += stride) {
C_d[i] = A_d[i] + B_d[i];
}
}
}
@@ -0,0 +1,47 @@
# Common Tests
set(TEST_SRC
childMalloc.cc
hipDeviceComputeCapabilityMproc.cc
hipDeviceGetPCIBusIdMproc.cc
hipDeviceTotalMemMproc.cc
hipGetDeviceAttributeMproc.cc
hipGetDeviceCountMproc.cc
hipGetDevicePropertiesMproc.cc
hipSetGetDeviceMproc.cc
hipIpcMemAccessTest.cc
hipMallocConcurrencyMproc.cc
hipMemCoherencyTstMProc.cc
hipIpcEventHandle.cc
deviceAllocationMproc.cc
hipNoGpuTsts.cc
hipMemGetInfoMProc.cc
)
if(UNIX)
add_custom_target(dummy_kernel.code
COMMAND ${CMAKE_CXX_COMPILER}
--genco ${CMAKE_CURRENT_SOURCE_DIR}/dummy_kernel.cpp
-o ${CMAKE_CURRENT_BINARY_DIR}/../multiproc/dummy_kernel.code
-I${HIP_PATH}/include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include
--rocm-path=${ROCM_PATH})
set_property(GLOBAL APPEND PROPERTY
G_INSTALL_CUSTOM_TARGETS ${CMAKE_CURRENT_BINARY_DIR}/dummy_kernel.code)
endif()
# the last argument linker libraries is required for this test but optional to the function
if(HIP_PLATFORM MATCHES "nvidia")
hip_add_exe_to_target(NAME MultiProc
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS nvrtc)
set_target_properties(MultiProc PROPERTIES COMPILE_FLAGS -arch=sm_70)
elseif(HIP_PLATFORM MATCHES "amd")
hip_add_exe_to_target(NAME MultiProc
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS hiprtc)
endif()
if(UNIX)
add_dependencies(build_tests dummy_kernel.code)
endif()
@@ -0,0 +1,62 @@
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#ifdef __linux__
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <dlfcn.h>
bool testMallocFromChild() {
int fd[2];
pid_t childpid;
bool testResult = false;
// create pipe descriptors
pipe(fd);
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// parent will wait to read the device cnt
read(fd[0], &testResult, sizeof(testResult));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
return testResult;
} else if (!childpid) { // Child
// writing only, no need for read-descriptor
close(fd[0]);
char* A_d = nullptr;
hipError_t ret = hipMalloc(&A_d, 1024);
printf("hipMalloc returned : %s\n", hipGetErrorString(ret));
if (ret == hipSuccess)
testResult = true;
else
testResult = false;
// send the value on the write-descriptor:
write(fd[1], &testResult, sizeof(testResult));
// close the write descriptor:
close(fd[1]);
exit(0);
}
return false;
}
TEST_CASE("ChildMalloc") {
auto res = testMallocFromChild();
REQUIRE(res == true);
}
#endif
@@ -0,0 +1,348 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#ifdef __linux__
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <dlfcn.h>
#define SIZE 2097152
// GPU threads
#define BLOCKSIZE 512
#define GRIDSIZE 256
__device__ static char* dev_common_ptr = nullptr;
/**
* This kernel allocates a memory chunk using malloc().
*/
static __global__ void kerTestDeviceMalloc(size_t size) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
if (myId == 0) {
dev_common_ptr = reinterpret_cast<char*> (malloc(size));
if (dev_common_ptr == nullptr) {
printf("Device Allocation Failed! \n");
return;
}
}
}
/**
* This kernel writes to the memory location allocated in kernel
* kerTestDeviceMalloc or kerTestDeviceNew.
*/
static __global__ void kerTestDeviceWrite() {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
if (dev_common_ptr == nullptr) {
printf("Device Allocation Failed! \n");
return;
}
*(dev_common_ptr + myId) = SCHAR_MAX;
}
/**
* This kernel frees the memory chunk allocated in kernel
* kerTestDeviceMalloc using free().
*/
static __global__ void kerTestDeviceFree(int *result) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
if (myId == 0) {
if (dev_common_ptr != nullptr) {
*result = 1;
for (int idx = 0; idx < (BLOCKSIZE*GRIDSIZE); idx++) {
if (*(dev_common_ptr + myId) != SCHAR_MAX) {
*result = 0;
break;
}
}
free(dev_common_ptr);
} else {
*result = 0;
}
}
}
/**
* This kernel allocates a memory chunk using new operator.
*/
static __global__ void kerTestDeviceNew(size_t size) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
if (myId == 0) {
dev_common_ptr = new char[size];
if (dev_common_ptr == nullptr) {
printf("Device Allocation Failed! \n");
return;
}
}
}
/**
* This kernel frees the memory chunk allocated in kernel
* kerTestDeviceNew using delete operator.
*/
static __global__ void kerTestDeviceDelete(int *result) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
if (myId == 0) {
if (dev_common_ptr != nullptr) {
*result = 1;
for (int idx = 0; idx < (BLOCKSIZE*GRIDSIZE); idx++) {
if (*(dev_common_ptr + myId) != SCHAR_MAX) {
*result = 0;
break;
}
}
delete[] dev_common_ptr;
} else {
*result = 0;
}
}
}
/**
* Test device malloc()/new in both Parent and Child Process.
* Allocate SIZE bytes in both parent and child process. Verify
* the allocated size in both parent and child process.
*/
static bool testDeviceAllocMulProc(bool testmalloc) {
int fd[2];
pid_t childpid;
bool testResult = false;
size_t avail = 0, tot = 0;
// create pipe descriptors
pipe(fd);
// fork process
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
int *result_d{nullptr};
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
// Allocate in parent
if (testmalloc) {
kerTestDeviceMalloc<<<1, 1>>>(SIZE);
} else {
kerTestDeviceNew<<<1, 1>>>(SIZE);
}
HIP_CHECK(hipDeviceSynchronize());
// Check allocated memory size
HIP_CHECK(hipMemGetInfo(&avail, &tot));
if ((tot - avail) < SIZE) {
// Clean up memory before return
if (testmalloc) {
kerTestDeviceFree<<<1, 1>>>(result_d);
} else {
kerTestDeviceDelete<<<1, 1>>>(result_d);
}
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipFree(result_d));
close(fd[0]);
wait(NULL);
return false;
}
// parent will wait to read the device cnt
read(fd[0], &testResult, sizeof(testResult));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
// At this point the child process exits.
// Ensure that device memory allocated from child is freed.
HIP_CHECK(hipMemGetInfo(&avail, &tot));
if ((tot - avail) < SIZE) {
testResult = false;
}
if (testmalloc) {
kerTestDeviceFree<<<1, 1>>>(result_d);
} else {
kerTestDeviceDelete<<<1, 1>>>(result_d);
}
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipFree(result_d));
} else if (!childpid) { // Child
// Wait for hipDeviceSetLimit() completion in parent.
int *result_d{nullptr};
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
close(fd[0]);
// Allocate in child
if (testmalloc) {
kerTestDeviceMalloc<<<1, 1>>>(SIZE);
} else {
kerTestDeviceNew<<<1, 1>>>(SIZE);
}
HIP_CHECK(hipDeviceSynchronize());
// Check allocated memory size
HIP_CHECK(hipMemGetInfo(&avail, &tot));
if ((tot - avail) < SIZE) {
testResult = false;
} else {
testResult = true;
}
// send the value on the write-descriptor:
write(fd[1], &testResult, sizeof(testResult));
// close the write descriptor:
close(fd[1]);
if (testmalloc) {
kerTestDeviceFree<<<1, 1>>>(result_d);
} else {
kerTestDeviceDelete<<<1, 1>>>(result_d);
}
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipFree(result_d));
exit(0);
}
return testResult;
}
/**
* Test device malloc()/new, write and free()/delete[]
* from both Parent and Child Process. From both Parent and
* Child Process invoke the kernel to allocate memory, the
* kernel to write to the allocated memory and a third kernel
* to verify the memory contents and free it.
*/
static bool testDeviceMemMulProc(bool testmalloc) {
int fd[2];
bool testResult = false;
pid_t childpid;
int testResultChild = 0;
size_t size = BLOCKSIZE*GRIDSIZE;
// create pipe descriptors
pipe(fd);
// fork process
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
int *result_d{nullptr}, *result_h{nullptr};
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
result_h = reinterpret_cast<int*> (malloc(sizeof(int)));
REQUIRE(result_h != nullptr);
// Allocate in parent
if (testmalloc) {
kerTestDeviceMalloc<<<1, 1>>>(size);
} else {
kerTestDeviceNew<<<1, 1>>>(size);
}
// Write
kerTestDeviceWrite<<<GRIDSIZE, BLOCKSIZE>>>();
// Free
if (testmalloc) {
kerTestDeviceFree<<<1, 1>>>(result_d);
} else {
kerTestDeviceDelete<<<1, 1>>>(result_d);
}
HIP_CHECK(hipDeviceSynchronize());
*result_h = 0;
HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(int),
hipMemcpyDefault));
if (*result_h == 0) {
testResult = false;
} else {
testResult = true;
}
// parent will wait to read the device cnt
read(fd[0], &testResultChild, sizeof(int));
if (testResultChild == 0) {
testResult &= false;
} else {
testResult &= true;
}
// close the read-descriptor
close(fd[0]);
HIP_CHECK(hipFree(result_d));
free(result_h);
// wait for child exit
wait(NULL);
} else if (!childpid) { // Child
// Wait for hipDeviceSetLimit() completion in parent.
close(fd[0]);
int *result_d{nullptr}, *result_h{nullptr};
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
result_h = reinterpret_cast<int*> (malloc(sizeof(int)));
REQUIRE(result_h != nullptr);
// Allocate in child
if (testmalloc) {
kerTestDeviceMalloc<<<1, 1>>>(size);
} else {
kerTestDeviceNew<<<1, 1>>>(size);
}
// Write
kerTestDeviceWrite<<<GRIDSIZE, BLOCKSIZE>>>();
// Free
if (testmalloc) {
kerTestDeviceFree<<<1, 1>>>(result_d);
} else {
kerTestDeviceDelete<<<1, 1>>>(result_d);
}
HIP_CHECK(hipDeviceSynchronize());
*result_h = 0;
HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(int),
hipMemcpyDefault));
// send the value on the write-descriptor:
write(fd[1], result_h, sizeof(int));
// close the write descriptor:
close(fd[1]);
HIP_CHECK(hipFree(result_d));
free(result_h);
exit(0);
}
return testResult;
}
/**
* Multiprocess device side malloc test.
*/
TEST_CASE("Unit_deviceAllocation_Malloc_MultProcess") {
auto res = testDeviceAllocMulProc(true);
REQUIRE(res == true);
}
/**
* Multiprocess device side new test.
*/
TEST_CASE("Unit_deviceAllocation_New_MultProcess") {
auto res = testDeviceAllocMulProc(false);
REQUIRE(res == true);
}
/**
* Multiprocess device side malloc, write and free test.
*/
TEST_CASE("Unit_deviceAllocation_MallocFree_MultProcess") {
auto res = testDeviceMemMulProc(true);
REQUIRE(res == true);
}
/**
* Multiprocess device side new, write and delete test.
*/
TEST_CASE("Unit_deviceAllocation_NewDelete_MultProcess") {
auto res = testDeviceMemMulProc(false);
REQUIRE(res == true);
}
#endif
@@ -0,0 +1,26 @@
/*
Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
extern "C" __global__ void dummy_ker() {
}
@@ -0,0 +1,159 @@
/*
Copyright (c) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* hipDeviceComputeCapability tests
* Scenario: Validate behavior of hipDeviceComputeCapability for masked devices
*/
#include <hip_test_common.hh>
#ifdef __linux__
#include <unistd.h>
#include <sys/wait.h>
#define MAX_SIZE 30
#define VISIBLE_DEVICE 0
/**
* Fetches Gpu device count
*/
static void getDeviceCount(int *pdevCnt) {
int fd[2], val = 0;
pid_t childpid;
// create pipe descriptors
pipe(fd);
// disable visible_devices env from shell
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
#endif
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// parent will wait to read the device cnt
read(fd[0], &val, sizeof(val));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
*pdevCnt = val;
} else if (!childpid) { // Child
int devCnt = 1;
// writing only, no need for read-descriptor
close(fd[0]);
HIP_CHECK(hipGetDeviceCount(&devCnt));
// send the value on the write-descriptor:
write(fd[1], &devCnt, sizeof(devCnt));
// close the write descriptor:
close(fd[1]);
exit(0);
} else { // failure
*pdevCnt = 0;
return;
}
}
/**
* Runs test on masked devices
*/
bool runMaskedDeviceTest(int actualNumGPUs) {
bool testResult = true;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
hipError_t err;
char visibleDeviceString[MAX_SIZE] = {};
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
// disable visible_devices env from shell
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
HIP_CHECK(hipInit(0));
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
#endif
for (int count = 1;
count < actualNumGPUs; count++) {
int major, minor;
err = hipDeviceComputeCapability(&major, &minor, count);
if (err == hipSuccess) {
testResult = false;
} else {
printf("hipDeviceComputeCapability: Error Code Returned: '%s'(%d)\n",
hipGetErrorString(err), err);
}
}
close(fd[0]);
printf("testResult = %d \n", testResult);
write(fd[1], &testResult, sizeof(testResult));
close(fd[1]);
exit(0);
} else if (cPid > 0) { // parent
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("fork() failed\n");
HIP_ASSERT(false);
}
return testResult;
}
/**
* Validate behavior of hipDeviceComputeCapability for masked devices.
*/
TEST_CASE("Unit_hipDeviceGet_MaskedDevices") {
int count = -1;
constexpr int ReqGPUs = 2;
bool ret;
getDeviceCount(&count);
if (count >= ReqGPUs) {
ret = runMaskedDeviceTest(count);
REQUIRE(ret == true);
} else {
SUCCEED("Not enough GPUs to run the masked GPU tests");
}
}
#endif // __linux__
@@ -0,0 +1,258 @@
/*
* Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
* Tests to
* 1. Compare {pciDomainID, pciBusID, pciDeviceID} values
* hipDeviceGetPCIBusId vs lspci
* 2. Validate behavior of hipDeviceGetPCIBusId for masked devices
*/
#include <hip_test_common.hh>
#ifdef __linux__
#include <unistd.h>
#include <sys/wait.h>
#define MAX_DEVICE_LENGTH 20
#define MAX_SIZE 30
#define VISIBLE_DEVICE 0
namespace hipDeviceGetPCIBusIdTests {
/**
* Fetches Gpu device count
*/
void getDeviceCount(int *pdevCnt) {
int fd[2], val = 0;
pid_t childpid;
// create pipe descriptors
pipe(fd);
// disable visible_devices env from shell
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
#endif
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// parent will wait to read the device cnt
read(fd[0], &val, sizeof(val));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
*pdevCnt = val;
} else if (!childpid) { // Child
int devCnt = 1;
// writing only, no need for read-descriptor
close(fd[0]);
HIP_CHECK(hipGetDeviceCount(&devCnt));
// send the value on the write-descriptor:
write(fd[1], &devCnt, sizeof(devCnt));
// close the write descriptor:
close(fd[1]);
exit(0);
} else { // failure
*pdevCnt = 0;
return;
}
}
/**
* Runs test on masked devices
*/
bool testWithMaskedDevices(int actualNumGPUs) {
bool testResult = true;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
hipError_t err;
char pciBusId[MAX_DEVICE_LENGTH];
char visibleDeviceString[MAX_SIZE] = {};
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
// disable visible_devices env from shell
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
HIP_CHECK(hipInit(0));
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
#endif
for (int count = 1;
count < actualNumGPUs; count++) {
err = hipDeviceGetPCIBusId(pciBusId, MAX_DEVICE_LENGTH, count);
if (err == hipSuccess) {
testResult &= false;
} else {
printf("hipGetDeviceProperties: Error Code Returned: '%s'(%d)\n",
hipGetErrorString(err), err);
}
}
close(fd[0]);
printf("testResult = %d \n", testResult);
write(fd[1], &testResult, sizeof(testResult));
close(fd[1]);
exit(0);
} else if (cPid > 0) { // parent
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("fork() failed\n");
HIP_ASSERT(false);
}
return testResult;
}
bool getPciBusId(int deviceCount,
char **hipDeviceList) {
for (int i = 0; i < deviceCount; i++) {
HIP_CHECK(hipDeviceGetPCIBusId(hipDeviceList[i], MAX_DEVICE_LENGTH, i));
}
return true;
}
} // namespace hipDeviceGetPCIBusIdTests
/**
* Scenario: Validate behavior of hipDeviceGetPCIBusId for masked devices.
*/
TEST_CASE("Unit_hipDeviceGetPCIBusId_MaskedDevices") {
int count = -1;
constexpr int ReqGPUs = 2;
bool ret;
hipDeviceGetPCIBusIdTests::getDeviceCount(&count);
if (count >= ReqGPUs) {
ret = hipDeviceGetPCIBusIdTests::testWithMaskedDevices(count);
REQUIRE(ret == true);
} else {
SUCCEED("Not enough GPUs to run the masked GPU tests");
}
}
/* Compare {pciDomainID, pciBusID, pciDeviceID} values
* hipDeviceGetPCIBusId vs lspci
*/
TEST_CASE("Unit_hipDeviceGetPCIBusId_CheckPciBusIDWithLspci") {
FILE *fpipe;
{
// Check if lspci is installed, if not, don't proceed
char const *cmd = "lspci --version";
char *lspciCheck{nullptr};
constexpr auto MaxLen = 50;
char temp[MaxLen]{};
fpipe = popen(cmd, "r");
REQUIRE_FALSE(fpipe == nullptr);
lspciCheck = fgets(temp, MaxLen, fpipe);
pclose(fpipe);
if (lspciCheck == nullptr) {
WARN("Skipping test as lspci is not found in system");
return;
}
}
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
REQUIRE_FALSE(deviceCount == 0);
// Allocate an array of pointer to characters
char **hipDeviceList = new char*[deviceCount];
REQUIRE_FALSE(hipDeviceList == nullptr);
char **pciDeviceList = new char*[deviceCount];
REQUIRE_FALSE(pciDeviceList == nullptr);
for (int i = 0; i < deviceCount; i++) {
hipDeviceList[i] = new char[MAX_DEVICE_LENGTH];
REQUIRE_FALSE(hipDeviceList[i] == nullptr);
pciDeviceList[i] = new char[MAX_DEVICE_LENGTH];
REQUIRE_FALSE(pciDeviceList[i] == nullptr);
}
hipDeviceGetPCIBusIdTests::getPciBusId(deviceCount, hipDeviceList);
char const *command = nullptr;
// Get lspci device list and compare with hip device list
if ((TestContext::get()).isNvidia()) {
command = "lspci -D | grep controller | grep NVIDIA | "
"cut -d ' ' -f 1";
} else {
command = "lspci -D | grep -e controller -e accelerator | grep AMD/ATI | "
"cut -d ' ' -f 1";
}
fpipe = popen(command, "r");
REQUIRE_FALSE(fpipe == nullptr);
int index = 0;
int deviceMatchCount = 0;
constexpr auto cmpLen = 10;
while (fgets(pciDeviceList[index], MAX_DEVICE_LENGTH, fpipe)) {
bool bMatchFound = false;
for (int deviceNo = 0; deviceNo < deviceCount; deviceNo++) {
if (!strncasecmp(pciDeviceList[index], hipDeviceList[deviceNo],
cmpLen)) {
deviceMatchCount++;
bMatchFound = true;
}
}
if (bMatchFound == false) {
printf("PCI device: %s is not reported by HIP\n",
pciDeviceList[index]);
}
index++;
if (index >= deviceCount) break;
}
// Deallocate
for (int i = 0; i < deviceCount; i++) {
delete hipDeviceList[i];
}
delete[] hipDeviceList;
for (int i = 0; i < deviceCount; i++) {
delete pciDeviceList[i];
}
delete[] pciDeviceList;
pclose(fpipe);
REQUIRE(deviceMatchCount == deviceCount);
}
#endif
@@ -0,0 +1,177 @@
/*
Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#ifdef __linux__
#include <unistd.h>
#include <sys/wait.h>
/**
* @addtogroup hipDeviceTotalMem hipDeviceTotalMem
* @{
* @ingroup DriverTest
*/
#define MAX_SIZE 30
#define VISIBLE_DEVICE 0
/**
* Fetches Gpu device count
*/
static void getDeviceCount(int *pdevCnt) {
int fd[2], val = 0;
pid_t childpid;
// create pipe descriptors
pipe(fd);
// disable visible_devices env from shell
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
#endif
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// parent will wait to read the device cnt
read(fd[0], &val, sizeof(val));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
*pdevCnt = val;
} else if (!childpid) { // Child
int devCnt = 1;
// writing only, no need for read-descriptor
close(fd[0]);
HIP_CHECK(hipGetDeviceCount(&devCnt));
// send the value on the write-descriptor:
write(fd[1], &devCnt, sizeof(devCnt));
// close the write descriptor:
close(fd[1]);
exit(0);
} else { // failure
*pdevCnt = 0;
return;
}
}
/**
* Func tries to fetch total memory of masked devices and returns pass/fail.
*/
static bool getTotalMemoryOfMaskedDevices(int actualNumGPUs) {
bool testResult = true;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
hipError_t err;
char visibleDeviceString[MAX_SIZE] = {};
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
// disable visible_devices env from shell
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
HIP_CHECK(hipInit(0));
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
#endif
for (int count = 1;
count < actualNumGPUs; count++) {
size_t totMem;
err = hipDeviceTotalMem(&totMem, count);
if (err == hipSuccess) {
testResult &= false;
} else {
printf("hipDeviceTotalMem: Error Code Returned: '%s'(%d)\n",
hipGetErrorString(err), err);
}
}
close(fd[0]);
printf("testResult = %d \n", testResult);
write(fd[1], &testResult, sizeof(testResult));
close(fd[1]);
exit(0);
} else if (cPid > 0) { // parent
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("fork() failed\n");
HIP_ASSERT(false);
}
return testResult;
}
/**
* Test Description
* ------------------------
* - Check that total memory is returned correctly when
* the devices are masked.
* Test source
* ------------------------
* - unit/multiproc/hipDeviceTotalMemMproc.cc
* Test requirements
* ------------------------
* - Multi-device test
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipDeviceTotalMem_MaskedDevices") {
int count = -1;
constexpr int ReqGPUs = 2;
bool ret;
getDeviceCount(&count);
if (count >= ReqGPUs) {
ret = getTotalMemoryOfMaskedDevices(count);
REQUIRE(ret == true);
} else {
SUCCEED("Not enough GPUs to run the masked GPU tests");
}
}
/**
* End doxygen group hipDeviceTotalMem.
* @}
*/
#endif
@@ -0,0 +1,164 @@
/*
Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* hipGetDeviceAttribute tests
* Scenario: Validate behavior of hipGetDeviceAttribute for masked devices.
*/
#include <hip_test_common.hh>
#include <iostream>
#ifdef __linux__
#include <unistd.h>
#include <sys/wait.h>
#define MAX_SIZE 30
#define VISIBLE_DEVICE 0
/**
* Fetches Gpu device count
*/
static void getDeviceCount(int *pdevCnt) {
int fd[2], val = 0;
pid_t childpid;
// create pipe descriptors
pipe(fd);
// disable visible_devices env from shell
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
#endif
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// parent will wait to read the device cnt
read(fd[0], &val, sizeof(val));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
*pdevCnt = val;
} else if (!childpid) { // Child
int devCnt = 1;
// writing only, no need for read-descriptor
close(fd[0]);
HIP_CHECK(hipGetDeviceCount(&devCnt));
// send the value on the write-descriptor:
write(fd[1], &devCnt, sizeof(devCnt));
// close the write descriptor:
close(fd[1]);
exit(0);
} else { // failure
*pdevCnt = 0;
return;
}
}
/**
* Tries to fetch device attribute of masked devices and returns pass/fail.
*/
static bool validateGetAttributeOfMaskedDevices(int actualNumGPUs) {
bool testResult = true;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
hipError_t err;
char visibleDeviceString[MAX_SIZE] = {};
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
// disable visible_devices env from shell
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
HIP_CHECK(hipInit(0));
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
#endif
for (int count = 1;
count < actualNumGPUs; count++) {
int pi = -1;
err = hipDeviceGetAttribute(&pi, hipDeviceAttributePciBusId, count);
if (err == hipSuccess) {
testResult &= false;
} else {
printf("hipDeviceGetAttribute: Error Code Returned: '%s'(%d)\n",
hipGetErrorString(err), err);
}
}
close(fd[0]);
printf("testResult = %d \n", testResult);
write(fd[1], &testResult, sizeof(testResult));
close(fd[1]);
exit(0);
} else if (cPid > 0) { // parent
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("fork() failed\n");
HIP_ASSERT(false);
}
return testResult;
}
/**
* Scenario: Validate behavior of hipDeviceGetAttribute for masked devices.
*/
TEST_CASE("Unit_hipDeviceGetAttribute_MaskedDevices") {
int count = -1;
constexpr int ReqGPUs = 2;
bool ret;
getDeviceCount(&count);
if (count >= ReqGPUs) {
ret = validateGetAttributeOfMaskedDevices(count);
REQUIRE(ret == true);
} else {
SUCCEED("Not enough GPUs to run the masked GPU tests");
}
}
#endif
@@ -0,0 +1,54 @@
/*
Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* hipGetDeviceCount tests
* Scenario: Validates the value of numDevices when devices are hidden.
*/
#include <hip_test_common.hh>
#ifdef __linux__
#include <unistd.h>
#include <sys/wait.h>
#define MAX_SIZE 30
#define VISIBLE_DEVICE 0
/**
* Validate behavior of hipGetDeviceCount for masked devices.
*/
TEST_CASE("Unit_hipGetDeviceCount_MaskedDevices") {
int numDevices = 0;
char visibleDeviceString[MAX_SIZE] = {};
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
#endif
HIP_CHECK(hipGetDeviceCount(&numDevices));
REQUIRE(numDevices == 1);
}
#endif
@@ -0,0 +1,165 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* Scenario: Validate behavior of hipGetDeviceProperties for masked devices.
*/
#include <hip_test_common.hh>
#ifdef __linux__
#include <unistd.h>
#include <sys/wait.h>
#define MAX_SIZE 30
#define VISIBLE_DEVICE 0
/**
* Fetches Gpu device count
*/
static void getDeviceCount(int *pdevCnt) {
int fd[2], val = 0;
pid_t childpid;
// create pipe descriptors
pipe(fd);
// disable visible_devices env from shell
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
#endif
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// parent will wait to read the device cnt
read(fd[0], &val, sizeof(val));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
*pdevCnt = val;
} else if (!childpid) { // Child
int devCnt = 1;
// writing only, no need for read-descriptor
close(fd[0]);
HIP_CHECK(hipGetDeviceCount(&devCnt));
// send the value on the write-descriptor:
write(fd[1], &devCnt, sizeof(devCnt));
// close the write descriptor:
close(fd[1]);
exit(0);
} else { // failure
*pdevCnt = 0;
return;
}
}
/**
* Tries to fetch device properties of masked devices and returns pass/fail.
*/
static bool validateGetPropsOfMaskedDevices(int actualNumGPUs) {
bool testResult = true;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
hipError_t err;
char visibleDeviceString[MAX_SIZE] = {};
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
// disable visible_devices env from shell
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
HIP_CHECK(hipInit(0));
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
#endif
for (int count = 1;
count < actualNumGPUs; count++) {
hipDeviceProp_t prop;
err = hipGetDeviceProperties(&prop, count);
if (err == hipSuccess) {
testResult &= false;
} else {
printf("hipGetDeviceProperties: Error Code Returned: '%s'(%d)\n",
hipGetErrorString(err), err);
}
}
close(fd[0]);
printf("testResult = %d \n", testResult);
write(fd[1], &testResult, sizeof(testResult));
close(fd[1]);
exit(0);
} else if (cPid > 0) { // parent
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("fork() failed\n");
HIP_ASSERT(false);
}
return testResult;
}
/**
* Scenario: Validate behavior of hipGetDeviceProperties for masked devices.
*/
TEST_CASE("Unit_hipGetDeviceProperties_MaskedDevices") {
int count = -1;
constexpr int ReqGPUs = 2;
bool ret;
getDeviceCount(&count);
if (count >= ReqGPUs) {
ret = validateGetPropsOfMaskedDevices(count);
REQUIRE(ret == true);
} else {
SUCCEED("Not enough GPUs to run the masked GPU tests");
}
}
#endif
@@ -0,0 +1,443 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#ifdef __linux__
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
/**
* @addtogroup hipIpcGetEventHandle hipIpcGetEventHandle
* @{
* @ingroup DeviceTest
* `hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event)` -
* Gets an opaque interprocess handle for an event.
* This opaque handle may be copied into other processes and opened with hipIpcOpenEventHandle.
*/
#define BUF_SIZE 4096
#define MAX_DEVICES 16
typedef struct ipcEventInfo {
int device;
pid_t pid;
hipIpcEventHandle_t eventHandle;
hipIpcMemHandle_t memHandle;
} ipcEventInfo_t;
typedef struct ipcDevices {
int count;
int ordinals[MAX_DEVICES];
} ipcDevices_t;
typedef struct ipcBarrier {
int count;
bool sense;
bool allExit;
} ipcBarrier_t;
/*
Get device count and list down devices with
P2P access with Device 0.
*/
void getDevices(ipcDevices_t *devices) {
pid_t pid = fork();
if (!pid) {
// HIP APIs are called in child process,
// to avoid HIP Initialization in main process.
int i, devCnt{};
HIP_CHECK(hipGetDeviceCount(&devCnt));
if (devCnt < 2) {
devices->count = 0;
WARN("Count less than expected number of devices");
exit(EXIT_SUCCESS);
}
// Device 0
devices->ordinals[0] = 0;
devices->count = 1;
// Check possibility for peer accesses, relevant to our tests
INFO("Checking GPU(s) for support of p2p memory access ");
INFO("Between GPU0 and other GPU(s)");
int canPeerAccess_0i, canPeerAccess_i0;
for (i = 1; i < devCnt; i++) {
HIP_CHECK(hipDeviceCanAccessPeer(&canPeerAccess_0i, 0, i));
HIP_CHECK(hipDeviceCanAccessPeer(&canPeerAccess_i0, i, 0));
if (canPeerAccess_0i * canPeerAccess_i0) {
devices->ordinals[i] = i;
INFO("Two-way peer access is available between GPU"
<< devices->ordinals[0] <<" and GPU"
<< devices->ordinals[devices->count]);
devices->count += 1;
}
}
exit(EXIT_SUCCESS);
} else {
int status;
waitpid(pid, &status, 0);
HIP_ASSERT(!status);
}
}
static ipcBarrier_t *g_Barrier{};
static bool g_procSense;
static int g_processCnt;
/*
Calling process waits for other processes to signal/complete.
*/
void processBarrier() {
int newCount = __sync_add_and_fetch(&g_Barrier->count, 1);
if (newCount == g_processCnt) {
g_Barrier->count = 0;
g_Barrier->sense = !g_procSense;
} else {
while (g_Barrier->sense == g_procSense) {
if (!g_Barrier->allExit) {
sched_yield();
} else {
exit(EXIT_FAILURE);
}
}
}
g_procSense = !g_procSense;
}
__global__ void computeKernel(int *dst, int *src, int num) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
dst[idx] = src[idx] / num;
}
/*
* 1) Process 0 allocates buffer in GPU0 memory and exports the memory handle.
* 2) Other processes opens memory handle of GPU0 memory, performs computation
* and records event.
* 3) Process 0 synchronizes event and validates the resulting buffer.
*/
void runMultiProcKernel(ipcEventInfo_t *shmEventInfo, int index) {
int *d_ptr;
int hData[BUF_SIZE]{};
unsigned int seed = time(nullptr);
// Randomize data before computation
for (int i = 0; i < BUF_SIZE; i++) {
hData[i] = rand_r(&seed);
}
HIP_CHECK(hipSetDevice(shmEventInfo[index].device));
if (index == 0) {
int h_results[BUF_SIZE * MAX_DEVICES];
hipEvent_t event[MAX_DEVICES];
HIP_CHECK(hipMalloc(&d_ptr, BUF_SIZE * g_processCnt * sizeof(int)));
HIP_CHECK(hipIpcGetMemHandle(&shmEventInfo[0].memHandle, d_ptr));
HIP_CHECK(hipMemcpy(d_ptr, hData,
BUF_SIZE * sizeof(int), hipMemcpyHostToDevice));
// Barrier 1: Process0 will wait for all processes to create event handles,
// signals device memory creation.
processBarrier();
for (int i = 1; i < g_processCnt; i++) {
HIP_CHECK(hipIpcOpenEventHandle(&event[i], shmEventInfo[i].eventHandle));
}
// Barrier 2: Process0 waits for kernels to be launched
// and the events to be recorded.
processBarrier();
for (int i = 1; i < g_processCnt; i++) {
HIP_CHECK(hipEventSynchronize(event[i]));
}
HIP_CHECK(hipMemcpy(h_results, d_ptr + BUF_SIZE,
BUF_SIZE * (g_processCnt - 1) * sizeof(int), hipMemcpyDeviceToHost));
// Barrier 3: Process0 signals event usage is done.
processBarrier();
HIP_CHECK(hipFree(d_ptr));
for (int n = 1; n < g_processCnt; n++) {
for (int i = 0; i < BUF_SIZE; i++) {
if (hData[i]/(n + 1) != h_results[(n-1) * BUF_SIZE + i]) {
WARN("Data validation error at index " << i << " n" << n);
g_Barrier->allExit = true;
exit(EXIT_FAILURE);
}
}
}
for (int i = 1; i < g_processCnt; i++) {
HIP_CHECK(hipEventDestroy(event[i]));
}
} else {
hipEvent_t event;
HIP_CHECK(hipEventCreateWithFlags(&event,
hipEventDisableTiming | hipEventInterprocess));
HIP_CHECK(hipIpcGetEventHandle(&shmEventInfo[index].eventHandle, event));
// Barrier 1 : wait until proc 0 initializes device memory,
// signals event creation.
processBarrier();
HIP_CHECK(hipIpcOpenMemHandle(reinterpret_cast<void **>(&d_ptr),
shmEventInfo[0].memHandle,
hipIpcMemLazyEnablePeerAccess));
const dim3 threads(512, 1);
const dim3 blocks(BUF_SIZE / threads.x, 1);
hipLaunchKernelGGL(computeKernel, dim3(blocks), dim3(threads), 0, 0,
d_ptr + index *BUF_SIZE, d_ptr, index + 1);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipEventRecord(event));
// Barrier 2 : Signals that event is recorded
processBarrier();
HIP_CHECK(hipIpcCloseMemHandle(d_ptr));
// Barrier 3 : wait for all the events to be used up by processes
processBarrier();
HIP_CHECK(hipEventDestroy(event));
}
}
/**
* Test Description
* ------------------------
* - Validate use case of event handle along with memory handle
* across multiple processes with complex scenario.
* - Utilizes synchronization of processes and events.
* - Lauches kernels and validates computation results.
* Test source
* ------------------------
* - unit/multiproc/hipIpcEventHandle.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipIpcEventHandle_Functional") {
ipcDevices_t *shmDevices;
ipcEventInfo_t *shmEventInfo;
shmDevices = reinterpret_cast<ipcDevices_t *> (mmap(NULL, sizeof(*shmDevices),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0));
REQUIRE(MAP_FAILED != shmDevices);
getDevices(shmDevices);
if (shmDevices->count < 2) {
WARN("Test requires atleast two GPUs with P2P access. Skipping test.");
return;
}
g_processCnt = (shmDevices->count > MAX_DEVICES) ? MAX_DEVICES : shmDevices->count;
// Barrier is used to synchronize processes created.
g_Barrier = reinterpret_cast<ipcBarrier_t *> (mmap(NULL, sizeof(*g_Barrier),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0));
REQUIRE(MAP_FAILED != g_Barrier);
memset(g_Barrier, 0, sizeof(*g_Barrier));
// set local barrier sense flag
g_procSense = 0;
// shared memory for Event and memHandle Info
shmEventInfo = reinterpret_cast<ipcEventInfo_t *>(mmap(NULL,
g_processCnt * sizeof(*shmEventInfo),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0));
REQUIRE(MAP_FAILED != shmEventInfo);
// initialize shared memory
memset(shmEventInfo, 0, g_processCnt * sizeof(*shmEventInfo));
int index = 0;
for (int i = 1; i < g_processCnt; i++) {
int pid = fork();
if (!pid) {
index = i;
break;
} else {
shmEventInfo[i].pid = pid;
}
}
shmEventInfo[index].device = shmDevices->ordinals[index];
// Run the test
runMultiProcKernel(shmEventInfo, index);
// Cleanup
if (index == 0) {
for (int i = 1; i < g_processCnt; i++) {
int status;
waitpid(shmEventInfo[i].pid, &status, 0);
HIP_ASSERT(WIFEXITED(status));
}
}
}
/**
* Test Description
* ------------------------
* - Validates handling of invalid arguments for
* [hipIpcGetEventHandle](@ref hipIpcGetEventHandle):
* -# When pointer to the event handle is `nullptr`
* - Expected output: return `hipErrorInvalidValue`
* -# When pointer to the event is `nullptr`
* - Expected output: return `hipErrorInvalidValue`
* -# When both pointers are `nullptr`
* - Expected output: return `hipErrorInvalidValue`
* -# When event is not valid
* - Expected output: return `hipErrorInvalidValue`
* -# When event is created without interprocess flag
* - Expected output: return `hipErrorInvalidResourceHandle` or `hipErrorInvalidConfiguration`
* -# When event is created without flags
* - Expected output: return `hipErrorInvalidResourceHandle`
* - Validates handling of invalid arguments for
* [hipIpcOpenEventHandle](@ref hipIpcOpenEventHandle)
* -# When pointer to the event is `nullptr`
* - Expected output: return `hipErrorInvalidValue`
* -# When pointer to the event handle is `nullptr`
* - Expected output: return `hipErrorInvalidValue`
* -# When attemted to open handle in the process that created it
* - Expected output: return `hipErrorInvalidContext`
* Test source
* ------------------------
* - unit/multiproc/hipIpcEventHandle.cc
* Test requirements
* ------------------------
* - Host specific (LINUX)
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipIpcEventHandle_ParameterValidation") {
hipEvent_t event;
hipIpcEventHandle_t eventHandle;
hipError_t ret;
HIP_CHECK(hipEventCreateWithFlags(&event,
hipEventDisableTiming | hipEventInterprocess));
#if HT_AMD
// Test disabled for nvidia due to segfault with cuda api
SECTION("Get event handle with eventHandle(nullptr)") {
ret = hipIpcGetEventHandle(nullptr, event);
REQUIRE(ret == hipErrorInvalidValue);
}
#endif
SECTION("Get event handle with event(nullptr)") {
ret = hipIpcGetEventHandle(&eventHandle, nullptr);
REQUIRE(ret == hipErrorInvalidValue);
}
SECTION("Get event handle with handle == nullptr and event == nullptr") {
HIP_CHECK_ERROR(hipIpcGetEventHandle(nullptr, nullptr), hipErrorInvalidValue);
}
SECTION("Get event handle with invalid event object") {
hipEvent_t eventUninit{};
ret = hipIpcGetEventHandle(&eventHandle, eventUninit);
REQUIRE(ret == hipErrorInvalidValue);
}
SECTION("Get event handle for event allocated without Interprocess flag") {
hipEvent_t eventNoIpc;
HIP_CHECK(hipEventCreateWithFlags(&eventNoIpc, hipEventDisableTiming));
ret = hipIpcGetEventHandle(&eventHandle, eventNoIpc);
if ((ret != hipErrorInvalidResourceHandle) &&
(ret != hipErrorInvalidConfiguration)) {
INFO("Error returned : " << ret);
REQUIRE(false);
}
HIP_CHECK(hipEventDestroy(eventNoIpc));
}
SECTION("Open event handle with event(nullptr)") {
hipIpcEventHandle_t ipc_handle{};
ret = hipIpcOpenEventHandle(nullptr, ipc_handle);
REQUIRE(ret == hipErrorInvalidValue);
}
SECTION("Open event handle with eventHandle as invalid") {
hipIpcEventHandle_t ipc_handle{};
hipEvent_t eventOut;
ret = hipIpcOpenEventHandle(&eventOut, ipc_handle);
if ((ret != hipErrorInvalidValue) && (ret != hipErrorMapFailed)) {
INFO("Error returned : " << ret);
REQUIRE(false);
}
}
SECTION("Open handle in process that created it") {
hipIpcEventHandle_t event_handle;
hipEvent_t event1, event2;
HIP_CHECK(hipEventCreateWithFlags(&event1, hipEventDisableTiming | hipEventInterprocess));
HIP_CHECK(hipIpcGetEventHandle(&event_handle, event1));
HIP_CHECK_ERROR(hipIpcOpenEventHandle(&event2, event_handle), hipErrorInvalidContext);
HIP_CHECK(hipEventDestroy(event1));
}
// Disabled on AMD because of return value mismatch - EXSWHTEC-41
#if HT_NVIDIA
SECTION("Event created with no flags") {
hipEvent_t event;
hipIpcEventHandle_t event_handle;
HIP_CHECK(hipEventCreate(&event));
HIP_CHECK_ERROR(hipIpcGetEventHandle(&event_handle, event), hipErrorInvalidResourceHandle);
HIP_CHECK(hipEventDestroy(event));
}
#endif
}
/**
* End doxygen group hipIpcGetEventHandle.
* @}
*/
/**
* @addtogroup hipIpcOpenEventHandle hipIpcOpenEventHandle
* @{
* @ingroup DeviceTest
* `hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle)` -
* Opens an interprocess event handles.
* Opens an interprocess event handle exported from another process with hipIpcGetEventHandle.
* ________________________
* Test cases from other modules:
* - @ref Unit_hipIpcEventHandle_Functional
* - @ref Unit_hipIpcEventHandle_ParameterValidation
*/
/**
* End doxygen group hipIpcOpenEventHandle.
* @}
*/
#endif
@@ -0,0 +1,281 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#ifdef __linux__
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <semaphore.h>
#include <unistd.h>
/**
* @addtogroup hipIpcOpenMemHandle hipIpcOpenMemHandle
* @{
* @ingroup DeviceTest
* `hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags)` -
* Opens an interprocess memory handle exported from another process
* and returns a device pointer usable in the local process.
*/
#define NUM_ELMTS 1024
#define NUM_THREADS 10
typedef struct mem_handle {
int device;
hipIpcMemHandle_t memHandle;
bool IfTestPassed;
} hip_ipc_t;
// This testcase verifies the hipIpcMemAccess APIs as follows
// The following program spawns a child process and does the following
// Parent iterate through each device, create memory -- create hipIpcMemhandle
// stores the mem handle in mmaped memory, release the child using sem_post()
// and wait for child to release itself(parent process)
// child process:
// Child process get the ipc mem handle using hipIpcOpenMemHandle
// Iterate through all the available gpus and do Device to Device copies
// and check for data consistencies and close the hipIpcCloseMemHandle
// release the parent and wait for parent to release itself(child)
/**
* Test Description
* ------------------------
* - Verifies that getting and opening mem handle works correctly
* in specific scenarion, and handles the case when the same device
* is used in both processes.
* - Creates memory from the parent process for each device.
* - Spawns child process and waits for it to finish.
* - Child process gets the handle and check data consistencies.
* Test source
* ------------------------
* - unit/multiproc/hipIpcMemAccessTest.cc
* Test requirements
* ------------------------
* - Host specific (LINUX)
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipIpcMemAccess_Semaphores") {
hip_ipc_t *shrd_mem = NULL;
pid_t pid;
size_t N = 1024;
size_t Nbytes = N * sizeof(int);
int *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr};
int *A_h{nullptr}, *C_h{nullptr};
sem_t *sem_ob1{nullptr}, *sem_ob2{nullptr};
int Num_devices = 0, CanAccessPeer = 0;
std::string cmd_line = "rm -rf /dev/shm/sem.my-sem-object*";
int res = system(cmd_line.c_str());
REQUIRE(res != -1);
sem_ob1 = sem_open("/my-sem-object1", O_CREAT|O_EXCL, 0660, 0);
sem_ob2 = sem_open("/my-sem-object2", O_CREAT|O_EXCL, 0660, 0);
REQUIRE(sem_ob1 != SEM_FAILED);
REQUIRE(sem_ob2 != SEM_FAILED);
shrd_mem = reinterpret_cast<hip_ipc_t *>(mmap(NULL, sizeof(hip_ipc_t),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS,
0, 0));
REQUIRE(shrd_mem != NULL);
shrd_mem->IfTestPassed = true;
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h, N, false);
pid = fork();
if (pid != 0) {
// Parent process
HIP_CHECK(hipGetDeviceCount(&Num_devices));
for (int i = 0; i < Num_devices; ++i) {
if (shrd_mem->IfTestPassed == true) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipIpcGetMemHandle(reinterpret_cast<hipIpcMemHandle_t *>
(&shrd_mem->memHandle),
A_d));
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
shrd_mem->device = i;
if ((sem_post(sem_ob1)) == -1) {
// Need to use inline function to release resources.
shrd_mem->IfTestPassed = false;
WARN("sem_post() call failed in parent process.");
}
if ((sem_wait(sem_ob2)) == -1) {
shrd_mem->IfTestPassed = false;
WARN("sem_wait() call failed in parent process.");
}
HIP_CHECK(hipFree(A_d));
}
}
} else {
// Child process
HIP_CHECK(hipGetDeviceCount(&Num_devices));
for (int j = 0; j < Num_devices; ++j) {
HIP_CHECK(hipSetDevice(j));
if ((sem_wait(sem_ob1)) == -1) {
shrd_mem->IfTestPassed = false;
WARN("sem_wait() call failed in child process.");
if ((sem_post(sem_ob2)) == -1) {
shrd_mem->IfTestPassed = false;
WARN("sem_post() call on sem_ob2 failed");
exit(1);
}
}
for (int i = 0; i < Num_devices; ++i) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipDeviceCanAccessPeer(&CanAccessPeer, i, shrd_mem->device));
if (CanAccessPeer == 1) {
HIP_CHECK(hipMalloc(&C_d, Nbytes));
HIP_CHECK(hipIpcOpenMemHandle(reinterpret_cast<void **>(&B_d),
shrd_mem->memHandle,
hipIpcMemLazyEnablePeerAccess));
HIP_CHECK(hipMemcpy(C_d, B_d, Nbytes, hipMemcpyDeviceToDevice));
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HipTest::checkTest<int>(A_h, C_h, N);
memset(reinterpret_cast<void*>(C_h), 0, Nbytes);
// Checking if the data obtained from Ipc shared memory is consistent
HIP_CHECK(hipMemcpy(C_h, B_d, Nbytes, hipMemcpyDeviceToHost));
HipTest::checkTest<int>(A_h, C_h, N);
HIP_CHECK(hipIpcCloseMemHandle(reinterpret_cast<void*>(B_d)));
HIP_CHECK(hipFree(C_d));
}
}
if ((sem_post(sem_ob2)) == -1) {
shrd_mem->IfTestPassed = false;
WARN("sem_post() call on sem_ob2 failed");
exit(1);
}
}
exit(0);
}
if ((sem_unlink("/my-sem-object1")) == -1) {
WARN("sem_unlink() call on /my-sem-object1 failed");
}
if ((sem_unlink("/my-sem-object2")) == -1) {
WARN("sem_unlink() call on /my-sem-object2 failed");
}
int rFlag = 0;
waitpid(pid, &rFlag, 0);
REQUIRE(shrd_mem->IfTestPassed == true);
HipTest::freeArrays<int>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
}
/**
* Test Description
* ------------------------
* - Validates handling of valid and invalid arguments for
* [hipIpcGetMemHandle](@ref hipIpcGetMemHandle):
* -# When memory handle pointer is `nullptr`
* - Expected output: return `hipErrorInvalidValue`
* -# When device pointer is `nullptr`
* - Expected output: return `hipErrorInvalidValue`
* -# When both pointers are `nullptr`
* - Expected output: return `hipErrorInvalidValue`
* -# When both pointers are valid
* - Expected output: return `hipSuccess`
* - Validates handling of valid and invalid arguments for
* [hipIpcOpenMemHandle](@ref hipIpcOpenMemHandle):
* -# When device pointer is `nullptr`
* - Expected output: return `hipErrorInvalidValue`
* -# When memory handle pointer uninitialized
* - Expected output: return `hipErrorInvalidValue` or `hipErrorInvalidDevicePointer`
* -# When memory handle has random flags
* - Expected output: return `hipErrorInvalidValue`
* - Validates handling of valid and invalid arguments for
* [hipIpcCloseMemHandle](@ref hipIpcCloseMemHandle):
* -# When device pointer is `nullptr`
* - Expected output: return `hipErrorInvalidValue`
* Test source
* ------------------------
* - unit/multiproc/hipIpcMemAccessTest.cc
* Test requirements
* ------------------------
* - Host specific (LINUX)
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipIpcMemAccess_ParameterValidation") {
hipIpcMemHandle_t MemHandle;
hipIpcMemHandle_t MemHandleUninit;
void *Ad{}, *Ad2{};
hipError_t ret;
HIP_CHECK(hipMalloc(&Ad, 1024));
#if HT_AMD
// Test is disabled for nvidia as api resulting in seg fault.
SECTION("Get mem handle with handle as nullptr") {
ret = hipIpcGetMemHandle(nullptr, Ad);
REQUIRE(ret == hipErrorInvalidValue);
}
#endif
SECTION("Get mem handle with devptr as nullptr") {
ret = hipIpcGetMemHandle(&MemHandle, nullptr);
REQUIRE(ret == hipErrorInvalidValue);
}
SECTION("Get mem handle with handle/devptr as nullptr") {
ret = hipIpcGetMemHandle(nullptr, nullptr);
REQUIRE(ret == hipErrorInvalidValue);
}
SECTION("Get mem handle with valid devptr") {
ret = hipIpcGetMemHandle(&MemHandle, Ad);
REQUIRE(ret == hipSuccess);
}
SECTION("Open mem handle with devptr as nullptr") {
ret = hipIpcOpenMemHandle(nullptr, MemHandle,
hipIpcMemLazyEnablePeerAccess);
REQUIRE(ret == hipErrorInvalidValue);
}
SECTION("Open mem handle with handle as un-initialized") {
ret = hipIpcOpenMemHandle(&Ad2, MemHandleUninit,
hipIpcMemLazyEnablePeerAccess);
REQUIRE((ret == hipErrorInvalidValue || ret == hipErrorInvalidDevicePointer));
}
#if HT_AMD
// Test is disabled for nvidia as api not returning expected value.
SECTION("Open mem handle with flags as random value") {
constexpr unsigned int flags = 123;
HIP_CHECK(hipIpcGetMemHandle(&MemHandle, Ad));
ret = hipIpcOpenMemHandle(&Ad2, MemHandle, flags);
REQUIRE(ret == hipErrorInvalidValue);
}
#endif
SECTION("Close mem handle with devptr(nullptr)") {
ret = hipIpcCloseMemHandle(nullptr);
REQUIRE(ret == hipErrorInvalidValue);
}
HIP_CHECK(hipFree(Ad));
}
/**
* End doxygen group hipIpcOpenMemHandle.
* @}
*/
#endif
@@ -0,0 +1,244 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
1) Run hipMalloc() api/kernel code on same gpu parallely from parent and child
processes, validate the results.
2) Execute hipMalloc() api simultaneously on all the gpus by spawning multiple
child processes. Validate buffers allocated after running kernel code.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#ifdef __linux__
#include <sys/wait.h>
#include <sys/types.h>
#include <unistd.h>
/**
* Fetches Gpu device count
*/
static void getDeviceCount(int* pdevCnt) {
int fd[2], val = 0;
pid_t childpid;
// create pipe descriptors
pipe(fd);
// disable visible_devices env from shell
#ifdef HT_NVIDIA
unsetenv("CUDA_VISIBLE_DEVICES");
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
#endif
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// parent will wait to read the device cnt
read(fd[0], &val, sizeof(val));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(nullptr);
*pdevCnt = val;
} else if (!childpid) { // Child
int devCnt = 1;
// writing only, no need for read-descriptor
close(fd[0]);
HIP_CHECK(hipGetDeviceCount(&devCnt));
// send the value on the write-descriptor:
write(fd[1], &devCnt, sizeof(devCnt));
// close the write descriptor:
close(fd[1]);
exit(0);
} else { // failure
*pdevCnt = 0;
return;
}
}
/**
* Validates data consistency on supplied gpu
*/
static bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) {
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t prevAvl, prevTot, curAvl, curTot;
bool TestPassed = true;
constexpr auto N = 4 * 1024 * 1024;
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
size_t Nbytes = N * sizeof(int);
HIP_CHECK(hipSetDevice(gpu));
HIP_CHECK(hipMemGetInfo(&prevAvl, &prevTot));
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
HIP_CHECK(hipMemGetInfo(&curAvl, &curTot));
if (!concurOnOneGPU && (prevAvl < curAvl || prevTot != curTot)) {
//In concurrent calls on one GPU, we cannot verify leaking in this way
printf("%s : Memory allocation mismatch observed."
"Possible memory leak.\n", __func__);
TestPassed &= false;
}
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, 0, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) {
printf("Validation PASSED for gpu %d from pid %d\n", gpu, getpid());
} else {
printf("Validation FAILED for gpu %d from pid %d\n", gpu, getpid());
TestPassed = false;
}
HIP_CHECK(hipMemGetInfo(&prevAvl, &prevTot));
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipMemGetInfo(&curAvl, &curTot));
if (!concurOnOneGPU && (curAvl < prevAvl || prevTot != curTot)) {
// In concurrent calls on one GPU, we cannot verify leaking in this way
UNSCOPED_INFO("validateMemoryOnGPU : Memory allocation mismatch observed."
<< "Possible memory leak.");
TestPassed = false;
}
if (!concurOnOneGPU && (prevAvl != curAvl || prevTot != curTot)) {
// In concurrent calls on one GPU, we cannot verify leaking in this way
printf(
"%s : Memory allocation mismatch observed."
"Possible memory leak.\n",
__func__);
TestPassed = false;
}
return TestPassed;
}
/**
* Parallel execution of parent and child on gpu0
*/
TEST_CASE("Unit_hipMalloc_ChildConcurrencyDefaultGpu") {
int devCnt = 0, pid = 0;
constexpr auto resSuccess = 1, resFailure = 2;
bool TestPassed = true;
// Get GPU count
getDeviceCount(&devCnt);
REQUIRE(devCnt > 0);
if ((pid = fork()) < 0) {
INFO("Child_Concurrency_DefaultGpu : fork() returned error : " << pid);
HIP_ASSERT(false);
} else if (!pid) { // Child process
bool TestPassedChild = false;
// Allocates and validates memory on Gpu0 simultaneously with parent
TestPassedChild = validateMemoryOnGPU(0, true);
if (TestPassedChild) {
exit(resSuccess); // child exit with success status
} else {
exit(resFailure); // child exit with failure status
}
} else { // Parent process
int exitStatus;
// Allocates and validates memory on Gpu0 simultaneously with child
TestPassed = validateMemoryOnGPU(0, true);
// Wait and get result from child
pid = wait(&exitStatus);
if ((WEXITSTATUS(exitStatus) == resFailure) || (pid < 0))
TestPassed = false;
}
REQUIRE(TestPassed == true);
}
/**
* Parallel execution of api on multiple gpus from
* different child processes.
*/
TEST_CASE("Unit_hipMalloc_ChildConcurrencyMultiGpu") {
int devCnt = 0, pid = 0;
constexpr auto resSuccess = 1, resFailure = 2;
// Get GPU count
getDeviceCount(&devCnt);
REQUIRE(devCnt > 0);
// Spawn child for each GPU
for (int gpu = 0; gpu < devCnt; gpu++) {
if ((pid = fork()) < 0) {
INFO("Child_Concurrency_MultiGpu : fork() returned error : " << pid);
REQUIRE(false);
} else if (!pid) { // Child process
bool TestPassedChild = false;
TestPassedChild = validateMemoryOnGPU(gpu, true);
if (TestPassedChild) {
exit(resSuccess); // child exit with success status
} else {
exit(resFailure); // child exit with failure status
}
}
}
// Parent shall wait for child to complete
int passCnt = 0;
for (int i = 0; i < devCnt; i++) {
int pidwait = 0, exitStatus;
pidwait = wait(&exitStatus);
printf("exitStatus for dev:%d is %d\n", i, WEXITSTATUS(exitStatus));
if (pidwait < 0) {
break;
}
if (WEXITSTATUS(exitStatus) == resSuccess) passCnt++;
}
REQUIRE(passCnt == devCnt);
}
#endif // __linux__
@@ -0,0 +1,519 @@
/*
Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* Test Case Description:
Scenario 3: The test validates if fine grain
behavior is observed or not with memory allocated using malloc()
Scenario 4: The test validates if coarse grain memory
behavior is observed or not with memory allocated using malloc()
Scenario 5: The test validates if fine memory
behavior is observed or not with memory allocated using mmap()
Scenario 6: The test validates if coarse grain memory
behavior is observed or not with memory allocated using mmap()
Scenario:7 Test Case Description: The following test checks if the memory is
accessible when HIP_HOST_COHERENT is set to 0
Scenario:8 Test Case Description: The following test checks if the memory
exhibits fine grain behavior when HIP_HOST_COHERENT is set to 1
*/
#ifdef __linux__
#include <hip_test_common.hh>
#include <hip_test_features.hh>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <chrono>
#include "../unit/memory/hipSVMCommon.h"
__global__ void CoherentTst(int *ptr, volatile unsigned int *expired) {
// Incrementing the value by 1
atomicAdd_system(ptr, 1);
// The following while loop checks the value until expiration.
while (*expired == 0) {
if (atomicCAS_system(ptr, 3, 4) == 3) break;
}
}
__global__ void SquareKrnl(int *ptr) {
// ptr value squared here
*ptr = (*ptr) * (*ptr);
}
// The function tests the coherency of allocated memory
// Return false on failure, true on success.
bool static TstCoherency(int *Ptr, bool HmmMem) {
using namespace std::chrono_literals;
int *Dptr = nullptr;
hipStream_t strm;
HIP_CHECK(hipStreamCreate(&strm));
// storing value 1 in the memory created above
*Ptr = 1;
unsigned int *expired = nullptr;
HIP_CHECK(hipHostMalloc(&expired, sizeof(unsigned int))); // hipHostMallocCoherent by defaut
*expired = 0;
if (!HmmMem) {
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void **>(&Dptr), Ptr, 0));
CoherentTst<<<1, 1, 0, strm>>>(Dptr, expired);
} else {
CoherentTst<<<1, 1, 0, strm>>>(Ptr, expired);
}
// looping until the value is 2 for 3 seconds
std::chrono::steady_clock::time_point start =
std::chrono::steady_clock::now();
while (std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::steady_clock::now() - start).count() < 3) {
if (*Ptr == 2) {
*Ptr += 1;
std::this_thread::sleep_for(200ms); // Make sure kernel gets updated Dptr
break;
}
}
*expired = 1; // Notify kernel loop to exit
HIP_CHECK(hipStreamSynchronize(strm));
HIP_CHECK(hipStreamDestroy(strm));
HIP_CHECK(hipHostFree(expired));
if (*Ptr == 4) {
return true;
}
fprintf(stderr, "TstCoherency: *Ptr=%u\b", *Ptr);
return false;
}
/* Test case description: The following test validates if fine grain
behavior is observed or not with memory allocated using malloc()*/
// The following test is failing on Nvidia platform hence disabled it for now
#if HT_AMD
TEST_CASE("Unit_malloc_CoherentTst") {
CHECK_PCIE_ATOMICS_SUPPORT
hipDeviceProp_t prop;
HIPCHECK(hipGetDeviceProperties(&prop, 0));
char *p = NULL;
p = strstr(prop.gcnArchName, "xnack+");
if (p) {
// Test Case execution begins from here
int managed = 0;
HIPCHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
0));
if (managed == 1) {
int *Ptr = nullptr, SIZE = sizeof(int);
bool HmmMem = true;
// Allocating hipMallocManaged() memory
Ptr = reinterpret_cast<int*>(malloc(SIZE));
auto ret = TstCoherency(Ptr, HmmMem);
free(Ptr);
REQUIRE(ret);
}
} else {
HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n");
}
}
#endif
/* Test case description: The following test validates if coarse grain memory
behavior is observed or not with memory allocated using malloc()*/
// The following test is failing on Nvidia platform hence disabling it for now
#if HT_AMD
TEST_CASE("Unit_malloc_CoherentTstWthAdvise") {
hipDeviceProp_t prop;
HIPCHECK(hipGetDeviceProperties(&prop, 0));
char *p = NULL;
p = strstr(prop.gcnArchName, "xnack+");
if (p) {
int managed = 0;
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
0));
if (managed == 1) {
int *Ptr = nullptr, SIZE = sizeof(int);
// Allocating hipMallocManaged() memory
Ptr = reinterpret_cast<int*>(malloc(SIZE));
*Ptr = 4;
hipStream_t strm;
HIP_CHECK(hipStreamCreate(&strm));
SquareKrnl<<<1, 1, 0, strm>>>(Ptr);
HIP_CHECK(hipStreamSynchronize(strm));
HIP_CHECK(hipStreamDestroy(strm));
REQUIRE (*Ptr == 16);
}
} else {
HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n");
}
}
#endif
/* Test case description: The following test validates if fine memory
behavior is observed or not with memory allocated using mmap()*/
// The following test is failing on Nvidia platform hence disabling it for now
#if HT_AMD
TEST_CASE("Unit_mmap_CoherentTst") {
CHECK_PCIE_ATOMICS_SUPPORT
hipDeviceProp_t prop;
HIPCHECK(hipGetDeviceProperties(&prop, 0));
char *p = NULL;
p = strstr(prop.gcnArchName, "xnack+");
if (p) {
int managed = 0;
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
0));
if (managed == 1) {
bool HmmMem = true;
int *Ptr = reinterpret_cast<int*>(mmap(NULL, sizeof(int),
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0));
if (Ptr == MAP_FAILED) {
WARN("Mapping Failed\n");
REQUIRE(false);
}
auto ret = TstCoherency(Ptr, HmmMem);
int err = munmap(Ptr, sizeof(int));
if (err != 0) {
WARN("munmap failed\n");
}
REQUIRE(ret);
}
} else {
HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n");
}
}
#endif
/* Test case description: The following test validates if coarse grain memory
behavior is observed or not with memory allocated using mmap()*/
// The following test is failing on Nvidia platform hence disabling it for now
#if HT_AMD
TEST_CASE("Unit_mmap_CoherentTstWthAdvise") {
hipDeviceProp_t prop;
HIPCHECK(hipGetDeviceProperties(&prop, 0));
char *p = NULL;
p = strstr(prop.gcnArchName, "xnack+");
if (p) {
int managed = 0;
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
0));
if (managed == 1) {
int SIZE = sizeof(int);
int *Ptr = reinterpret_cast<int*>(mmap(NULL, SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0));
if (Ptr == MAP_FAILED) {
WARN("Mapping Failed\n");
REQUIRE(false);
}
HIP_CHECK(hipMemAdvise(Ptr, SIZE, hipMemAdviseSetCoarseGrain, 0));
// Initializing the value with 9
*Ptr = 9;
hipStream_t strm;
HIP_CHECK(hipStreamCreate(&strm));
SquareKrnl<<<1, 1, 0, strm>>>(Ptr);
HIP_CHECK(hipStreamSynchronize(strm));
bool IfTstPassed = false;
if (*Ptr == 81) {
IfTstPassed = true;
}
int err = munmap(Ptr, SIZE);
if (err != 0) {
WARN("munmap failed\n");
}
REQUIRE(IfTstPassed);
}
} else {
HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n");
}
}
#endif
/* Test Case Description: The following test checks if the memory is
accessible when HIP_HOST_COHERENT is set to 0*/
// The following test is AMD specific test hence skipping for Nvidia
#if HT_AMD
TEST_CASE("Unit_hipHostMalloc_WthEnv0Flg1") {
if ((setenv("HIP_HOST_COHERENT", "0", 1)) != 0) {
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
REQUIRE(false);
}
int stat = 0;
if (fork() == 0) {
int *Ptr = nullptr, *PtrD = nullptr, SIZE = sizeof(int);
// Allocating hipHostMalloc() memory
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocPortable));
*Ptr = 4;
hipStream_t strm;
HIP_CHECK(hipStreamCreate(&strm));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&PtrD), Ptr, 0));
SquareKrnl<<<1, 1, 0, strm>>>(PtrD);
HIP_CHECK(hipStreamSynchronize(strm));
HIP_CHECK(hipStreamDestroy(strm));
if (*Ptr == 16) {
// exit() with code 10 which indicates pass
HIP_CHECK(hipHostFree(Ptr));
exit(10);
} else {
// exit() with code 9 which indicates fail
HIP_CHECK(hipHostFree(Ptr));
exit(9);
}
} else {
wait(&stat);
int Result = WEXITSTATUS(stat);
if (Result != 10) {
REQUIRE(false);
}
}
}
#endif
/* Test Case Description: The following test checks if the memory is
accessible when HIP_HOST_COHERENT is set to 0*/
// The following test is AMD specific test hence skipping for Nvidia
#if HT_AMD
TEST_CASE("Unit_hipHostMalloc_WthEnv0Flg2") {
if ((setenv("HIP_HOST_COHERENT", "0", 1)) != 0) {
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
REQUIRE(false);
}
int stat = 0;
if (fork() == 0) {
int *Ptr = nullptr, *PtrD = nullptr, SIZE = sizeof(int);
// Allocating hipHostMalloc() memory
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocWriteCombined));
*Ptr = 4;
hipStream_t strm;
HIP_CHECK(hipStreamCreate(&strm));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&PtrD), Ptr, 0));
SquareKrnl<<<1, 1, 0, strm>>>(PtrD);
HIP_CHECK(hipStreamSynchronize(strm));
HIP_CHECK(hipStreamDestroy(strm));
if (*Ptr == 16) {
// exit() with code 10 which indicates pass
HIP_CHECK(hipHostFree(Ptr));
exit(10);
} else {
// exit() with code 9 which indicates fail
HIP_CHECK(hipHostFree(Ptr));
exit(9);
}
} else {
wait(&stat);
int Result = WEXITSTATUS(stat);
if (Result != 10) {
REQUIRE(false);
}
}
}
#endif
/* Test Case Description: The following test checks if the memory is
accessible when HIP_HOST_COHERENT is set to 0*/
// The following test is AMD specific test hence skipping for Nvidia
#if HT_AMD
TEST_CASE("Unit_hipHostMalloc_WthEnv0Flg3") {
if ((setenv("HIP_HOST_COHERENT", "0", 1)) != 0) {
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
REQUIRE(false);
}
int stat = 0;
if (fork() == 0) {
int *Ptr = nullptr, *PtrD = nullptr, SIZE = sizeof(int);
// Allocating hipHostMalloc() memory
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocNumaUser));
*Ptr = 4;
hipStream_t strm;
HIP_CHECK(hipStreamCreate(&strm));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&PtrD), Ptr, 0));
SquareKrnl<<<1, 1, 0, strm>>>(PtrD);
HIP_CHECK(hipStreamSynchronize(strm));
HIP_CHECK(hipStreamDestroy(strm));
if (*Ptr == 16) {
// exit() with code 10 which indicates pass
HIP_CHECK(hipHostFree(Ptr));
exit(10);
} else {
// exit() with code 9 which indicates fail
HIP_CHECK(hipHostFree(Ptr));
exit(9);
}
} else {
wait(&stat);
int Result = WEXITSTATUS(stat);
if (Result != 10) {
REQUIRE(false);
}
}
}
#endif
/* Test Case Description: The following test checks if the memory is
accessible when HIP_HOST_COHERENT is set to 0*/
// The following test is AMD specific test hence skipping for Nvidia
#if HT_AMD
TEST_CASE("Unit_hipHostMalloc_WthEnv0Flg4") {
if ((setenv("HIP_HOST_COHERENT", "0", 1)) != 0) {
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
REQUIRE(false);
}
int stat = 0;
if (fork() == 0) {
int *Ptr = nullptr, *PtrD = nullptr, SIZE = sizeof(int);
// Allocating hipHostMalloc() memory
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocNonCoherent));
*Ptr = 4;
hipStream_t strm;
HIP_CHECK(hipStreamCreate(&strm));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&PtrD), Ptr, 0));
SquareKrnl<<<1, 1, 0, strm>>>(PtrD);
HIP_CHECK(hipStreamSynchronize(strm));
HIP_CHECK(hipStreamDestroy(strm));
if (*Ptr == 16) {
// exit() with code 10 which indicates pass
HIP_CHECK(hipHostFree(Ptr));
exit(10);
} else {
// exit() with code 9 which indicates fail
HIP_CHECK(hipHostFree(Ptr));
exit(9);
}
} else {
wait(&stat);
int Result = WEXITSTATUS(stat);
if (Result != 10) {
REQUIRE(false);
}
}
}
#endif
/* Test Case Description: The following test checks if the memory exhibits
fine grain behavior when HIP_HOST_COHERENT is set to 1*/
// The following test is AMD specific test hence skipping for Nvidia
#if HT_AMD
TEST_CASE("Unit_hipHostMalloc_WthEnv1") {
if ((setenv("HIP_HOST_COHERENT", "1", 1)) != 0) {
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
REQUIRE(false);
}
int stat = 0;
if (fork() == 0) { // child process
CHECK_PCIE_ATOMICS_SUPPORT
int *Ptr = nullptr, SIZE = sizeof(int);
bool HmmMem = false;
// Allocating hipHostMalloc() memory
HIP_CHECK(hipHostMalloc(&Ptr, SIZE));
auto ret = TstCoherency(Ptr, HmmMem);
HIP_CHECK(hipHostFree(Ptr));
exit(ret ? EXIT_SUCCESS : EXIT_FAILURE);
} else { // parent process
wait(&stat);
if (WEXITSTATUS(stat) != EXIT_SUCCESS) {
REQUIRE(false);
}
}
}
#endif
/* Test Case Description: The following test checks if the memory exhibits
fine grain behavior when HIP_HOST_COHERENT is set to 1*/
// The following test is AMD specific test hence skipping for Nvidia
#if HT_AMD
TEST_CASE("Unit_hipHostMalloc_WthEnv1Flg1") {
if ((setenv("HIP_HOST_COHERENT", "1", 1)) != 0) {
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
REQUIRE(false);
}
int stat = 0;
if (fork() == 0) { // child process
CHECK_PCIE_ATOMICS_SUPPORT
int *Ptr = nullptr, SIZE = sizeof(int);
bool HmmMem = false;
// Allocating hipHostMalloc() memory
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocPortable));
auto ret = TstCoherency(Ptr, HmmMem);
HIP_CHECK(hipHostFree(Ptr));
exit(ret ? EXIT_SUCCESS : EXIT_FAILURE);
} else { // parent process
wait(&stat);
if (WEXITSTATUS(stat) != EXIT_SUCCESS) {
REQUIRE(false);
}
}
}
#endif
/* Test Case Description: The following test checks if the memory exhibits
fine grain behavior when HIP_HOST_COHERENT is set to 1*/
// The following test is AMD specific test hence skipping for Nvidia
#if HT_AMD
TEST_CASE("Unit_hipHostMalloc_WthEnv1Flg2") {
if ((setenv("HIP_HOST_COHERENT", "1", 1)) != 0) {
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
REQUIRE(false);
}
int stat = 0;
if (fork() == 0) { // child process
CHECK_PCIE_ATOMICS_SUPPORT
int *Ptr = nullptr, SIZE = sizeof(int);
bool HmmMem = false;
// Allocating hipHostMalloc() memory
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocWriteCombined));
auto ret = TstCoherency(Ptr, HmmMem);
HIP_CHECK(hipHostFree(Ptr));
exit(ret ? EXIT_SUCCESS : EXIT_FAILURE);
} else { // parent process
wait(&stat);
if (WEXITSTATUS(stat) != EXIT_SUCCESS) {
REQUIRE(false);
}
}
}
#endif
/* Test Case Description: The following test checks if the memory exhibits
fine grain behavior when HIP_HOST_COHERENT is set to 1*/
// The following test is AMD specific test hence skipping for Nvidia
#if HT_AMD
TEST_CASE("Unit_hipHostMalloc_WthEnv1Flg3") {
if ((setenv("HIP_HOST_COHERENT", "1", 1)) != 0) {
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
REQUIRE(false);
}
int stat = 0;
if (fork() == 0) { // child process
CHECK_PCIE_ATOMICS_SUPPORT
int *Ptr = nullptr, SIZE = sizeof(int);
bool HmmMem = false;
// Allocating hipHostMalloc() memory
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocNumaUser));
auto ret = TstCoherency(Ptr, HmmMem);
HIP_CHECK(hipHostFree(Ptr));
exit(ret ? EXIT_SUCCESS : EXIT_FAILURE);
} else { // parent process
wait(&stat);
if (WEXITSTATUS(stat) != EXIT_SUCCESS) {
REQUIRE(false);
}
}
}
#endif
#endif
@@ -0,0 +1,393 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <stdlib.h>
#include <stdio.h>
#ifdef __linux__
#include <unistd.h>
#include <sys/wait.h>
#include <sys/types.h>
#define ReadEnd 0
#define WriteEnd 1
#define MAX_SIZE 32
#define FREE_MEM_TO_HIDE 4294967296
#define SIZE_TO_ALLOCATE 2147483648
/*
* In main process allocate 2 GB of device memory.
* Fork() a child process and verify that 2 GB has been
* allocated in parent process.
*/
TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario1") {
constexpr size_t size = 2147483648; // 2GB
int fd[2], fd1[2], status;
status = pipe(fd);
REQUIRE(status == 0);
status = pipe(fd1);
REQUIRE(status == 0);
pid_t child_pid;
child_pid = fork(); // Create a new child process
if (child_pid < 0) {
WARN("Fork failed!!!!");
} else if (child_pid == 0) { // child
close(fd1[WriteEnd]);
close(fd[ReadEnd]);
int result;
size_t free = 0, total = 0;
// Wait for signal from parent
int check_child;
status = read(fd1[ReadEnd], &check_child, sizeof(check_child));
REQUIRE(status != -1);
close(fd1[ReadEnd]);
// Check the total and free memory which is allocated in parent
HIP_CHECK(hipMemGetInfo(&free, &total));
if ((total - free) >= size) {
result = 1;
} else {
result = 0;
}
// Write the result to parent
status = write(fd[WriteEnd], &result, sizeof(result));
REQUIRE(status != -1);
close(fd[WriteEnd]);
exit(0);
} else { // Parent
close(fd1[ReadEnd]);
close(fd[WriteEnd]);
// Allocate memory
char* A_d = nullptr;
HIP_CHECK(hipMalloc(&A_d, size));
// Signal the child
int check = 0;
status = write(fd1[WriteEnd], &check, sizeof(check));
REQUIRE(status != -1);
close(fd1[WriteEnd]);
// Read the result from Child
int read_result;
status = read(fd[ReadEnd], &read_result, sizeof(read_result));
REQUIRE(status != -1);
close(fd[ReadEnd]);
REQUIRE(read_result == 1);
HIP_CHECK(hipFree(A_d));
// wait for child exit
wait(NULL);
}
}
/**
* From main process Fork() a child process. In the child process allocate
* 2 GB of device memory. Signal the parent process. Verify from the parent
* process that 2 GB is allocated in the child process.
*/
TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario2") {
constexpr size_t size = 2147483648; // 2GB
int fd[2], fd2[2], status;
status = pipe(fd);
REQUIRE(status == 0);
status = pipe(fd2);
REQUIRE(status == 0);
pid_t child_pid;
child_pid = fork(); // Create a new child process
if (child_pid < 0) {
WARN("Fork failed!!!!");
} else if (child_pid == 0) { // Child
close(fd[ReadEnd]);
close(fd2[WriteEnd]);
// Allocate memory
float* A_d = nullptr;
HIP_CHECK(hipMalloc(&A_d, size));
// Signal the parent
int data = 0;
status = write(fd[WriteEnd], &data, sizeof(data));
REQUIRE(status != -1);
close(fd[WriteEnd]);
int valid = 0;
// Wait for Signal from parent before freeing memory and exiting
status = read(fd2[ReadEnd], &valid, sizeof(valid));
REQUIRE(status != -1);
close(fd2[ReadEnd]);
// Free allocated device memory
HIP_CHECK(hipFree(A_d));
exit(0);
} else { // Parent
size_t free = 0, total = 0;
close(fd[WriteEnd]);
close(fd2[ReadEnd]);
// Wait for child signal
int data = 0;
status = read(fd[ReadEnd], &data, sizeof(data));
REQUIRE(status != -1);
close(fd[ReadEnd]);
// Verify the memory
HIP_CHECK(hipMemGetInfo(&free , &total));
REQUIRE((total - free) >= size);
// Signal child that validation is over and child can free memory
int valid = 0;
status = write(fd2[WriteEnd], &valid, sizeof(valid));
REQUIRE(status != -1);
close(fd2[WriteEnd]);
// wait for child exit
wait(NULL);
}
}
/*
* From main process Fork() a child process. In the child process
* allocate 2 GB of device memory. Free the memory and exit from
* child process. Verify from the parent process that 2 GB is
* freed in the child process.
*/
TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario3") {
constexpr size_t size = 2147483648; // 2GB
int fd[2], status;
status = pipe(fd);
REQUIRE(status == 0);
pid_t child_pid;
child_pid = fork(); // Create a new child process
if (child_pid < 0) {
WARN("Fork failed!!!!");
} else if (child_pid == 0) { // Child
close(fd[ReadEnd]);
// Allocate the memory
void* A_d = nullptr;
HIP_CHECK(hipMalloc(&A_d, size));
// Free the allocated memory
HIP_CHECK(hipFree(A_d));
// Signal the parent about memory free
int check = 0;
status = write(fd[WriteEnd], &check, sizeof(check));
REQUIRE(status != -1);
close(fd[WriteEnd]);
exit(0);
} else { // Parent
close(fd[WriteEnd]);
// Wait for the signal from child about memory free
int check_parent;
status = read(fd[ReadEnd], &check_parent, sizeof(check_parent));
REQUIRE(status != -1);
close(fd[ReadEnd]);
size_t free = 0, total = 0;
// Verify the memory
HIP_CHECK(hipMemGetInfo(&free , &total));
REQUIRE((total - free) >= 0);
// wait for child exit
wait(NULL);
}
}
/*
* From main process Fork() a child process. In the child process allocate
* 2 GB of device memory. Exit from child process. Verify from the parent
* process that 2 GB is freed in the child process.
*/
TEST_CASE("Unit_hipMemGetInfo_Functional_scenario4") {
constexpr size_t size = 2147483648; // 2GB
pid_t child_pid;
child_pid = fork(); // Create a new child process
if (child_pid < 0) {
WARN("Fork failed!!!!");
} else if (child_pid == 0) { // Child
// Allocate the memory
void* A_d = nullptr;
HIP_CHECK(hipMalloc(&A_d, size));
exit(0);
} else { // Parent
// wait for child exit
wait(NULL);
size_t free = 0, total = 0;
// Verify the memory
HIP_CHECK(hipMemGetInfo(&free , &total));
REQUIRE((total-free) >= 0);
}
}
/*
* Multidevice Scenario: In main process allocate 2 GB of device memory
* in every device. Verify that 2 GB is allocated using hipMemGetInfo.
* Fork() a child process and verify that 2 GB has been allocated from
* parent process in every device.
*/
TEST_CASE("Unit_hipMemGetInfo_Functional_MultiDevice_Scenario5") {
constexpr size_t size = 2147483648; // 2GB
size_t free = 0, total = 0;
int fd1[2], fd2[2], status;
status = pipe(fd1);
REQUIRE(status == 0);
status = pipe(fd2);
REQUIRE(status == 0);
pid_t child_pid;
child_pid = fork(); // Create a new child process
if (child_pid < 0) {
WARN("Fork failed!!!!");
} else if (child_pid == 0) { // Child
close(fd1[WriteEnd]);
close(fd2[ReadEnd]);
// Wait for the signal from parent after memory allocatoin
int check_child;
status = read(fd1[ReadEnd], &check_child, sizeof(check_child));
REQUIRE(status != -1);
close(fd1[ReadEnd]);
int num_devices, result, count = 0;
// Get the device count
HIP_CHECK(hipGetDeviceCount(&num_devices));
for (int i = 0; i < num_devices; i++) {
HIP_CHECK(hipSetDevice(i));
// Check the memory
HIP_CHECK(hipMemGetInfo(&free , &total));
if ((total - free) >= size) {
count+=1;
}
}
if ( count == num_devices ) {
result = 1;
} else {
result = 0;
}
// Write the result to Parent
status = write(fd2[WriteEnd], &result, sizeof(result));
REQUIRE(status != -1);
close(fd2[WriteEnd]);
exit(0);
} else { // Parent
close(fd1[ReadEnd]);
close(fd2[WriteEnd]);
int num_devices;
// Get the device count
HIP_CHECK(hipGetDeviceCount(&num_devices));
std::vector<void*>v(num_devices, nullptr);
for (int i = 0; i < num_devices; i++) {
HIP_CHECK(hipSetDevice(i));
// verify the memory
HIP_CHECK(hipMemGetInfo(&free , &total));
// Allocate memory
HIP_CHECK(hipMalloc(&v[i], size));
// Verify the memory
HIP_CHECK(hipMemGetInfo(&free , &total));
}
// Signal the child about memory allocation
int check = 0;
status = write(fd1[WriteEnd], &check, sizeof(check));
REQUIRE(status != -1);
close(fd1[WriteEnd]);
// Read result from child
int result_parent;
status = read(fd2[ReadEnd], &result_parent, sizeof(result_parent));
REQUIRE(status != -1);
REQUIRE(result_parent == 1);
close(fd2[ReadEnd]);
// Free the allocated memory on each device
for (int i = 0; i < num_devices; i++) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipFree(v[i]));
}
// wait for child exit
wait(NULL);
}
}
#if HT_AMD
static bool testHiddenFreeMemFromChild() {
bool result = true;
int testResult = 0, result_dummy = 0;
int fd_c2p[2], fd_p2c[2];
pipe(fd_c2p);
pipe(fd_p2c);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
size_t free = 0, total = 0, min_size = 0;
close(fd_c2p[ReadEnd]);
close(fd_p2c[WriteEnd]);
int64_t size_tohide = (FREE_MEM_TO_HIDE/(1024*1024)); // in MB
// set environment variable from shell
unsetenv("HIP_HIDDEN_FREE_MEM");
setenv("HIP_HIDDEN_FREE_MEM", std::to_string(size_tohide).c_str(), 1);
// allocate memory in device
char* d_ptr{nullptr};
HIP_CHECK(hipMalloc(&d_ptr, SIZE_TO_ALLOCATE));
HIP_CHECK(hipMemGetInfo(&free, &total));
min_size = (FREE_MEM_TO_HIDE + SIZE_TO_ALLOCATE);
if ((total - free) >= min_size) {
testResult = 1;
}
// Write to and signal parent
write(fd_c2p[WriteEnd], &testResult, sizeof(testResult));
close(fd_c2p[WriteEnd]);
// Wait for signal from parent
read(fd_p2c[ReadEnd], &result_dummy, sizeof(result_dummy));
close(fd_p2c[ReadEnd]);
exit(0);
} else if (cPid > 0) { // parent
close(fd_c2p[WriteEnd]);
close(fd_p2c[ReadEnd]);
// wait for result from child
read(fd_c2p[ReadEnd], &testResult, sizeof(testResult));
close(fd_c2p[ReadEnd]);
if (testResult) {
result &= true;
} else {
result &= false;
}
size_t free = 0, total = 0, min_size = SIZE_TO_ALLOCATE;
HIP_CHECK(hipMemGetInfo(&free, &total));
if ((total - free) >= min_size) {
result &= true;
} else {
result &= false;
}
// Write to and signal child
write(fd_p2c[WriteEnd], &result_dummy, sizeof(result_dummy));
close(fd_p2c[WriteEnd]);
wait(NULL);
} else {
WARN("fork() failed");
HIP_ASSERT(false);
}
return result;
}
/**
* Scenario: Fork() a child process. In child, get free and total memory.
* Set the HIP_HIDDEN_FREE_MEM to 4GB. Allocate 2 GB of device memory.
* Get the free and total memory. Free memory available should be
* (actual free - 6 GB). Signal parent process. Wait for signal from child
* in parent. Get free and total memory. Free memory available should be
* actual (actual free - 4 GB).
*/
TEST_CASE("Unit_hipMemGetInfo_SetHiddenFreeMemFromChild") {
REQUIRE(true == testHiddenFreeMemFromChild());
}
/**
* Scenario: Set the HIP_HIDDEN_FREE_MEM to 4GB. Invoke hipMemGetInfo to
* verify that 4GB free memory is hidden for all available GPUs.
*/
TEST_CASE("Unit_hipMemGetInfo_VerifyHiddenFreeMemForAllGpu") {
int numDevices = 0;
int64_t size_tohide = (FREE_MEM_TO_HIDE/(1024*1024)); // in MB
// set environment variable from shell
unsetenv("HIP_HIDDEN_FREE_MEM");
setenv("HIP_HIDDEN_FREE_MEM", std::to_string(size_tohide).c_str(), 1);
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int dev = 0; dev < numDevices; dev++) {
HIP_CHECK(hipSetDevice(dev));
size_t free = 0, total = 0;
HIP_CHECK(hipMemGetInfo(&free, &total));
REQUIRE((total - free) >= FREE_MEM_TO_HIDE);
}
}
#endif
#endif
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,561 @@
/*
* Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
* Test designed to run on Linux based platforms
* Verifies functionality of
* -- hipSetDevice and hipGetDevice with different ROCR_VISIBLE_DEVICES and
* HIP_VISIBLE_DEVICES values set
*/
#include <hip_test_common.hh>
#ifdef __linux__
#include <sys/wait.h>
#include <unistd.h>
#define MAX_SIZE 30
/**
* Fetches Gpu device count
*/
static void getDeviceCount(int *pdevCnt) {
int fd[2], val = 0;
pid_t childpid;
// create pipe descriptors
pipe(fd);
// disable visible_devices env from shell
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// parent will wait to read the device cnt
read(fd[0], &val, sizeof(val));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
*pdevCnt = val;
} else if (!childpid) { // Child
int devCnt = 1;
// writing only, no need for read-descriptor
close(fd[0]);
HIP_CHECK(hipGetDeviceCount(&devCnt));
// send the value on the write-descriptor:
write(fd[1], &devCnt, sizeof(devCnt));
// close the write descriptor:
close(fd[1]);
exit(0);
} else { // failure
*pdevCnt = 0;
}
}
// Pass either -1 in deviceNumber or invalid device number
static void testInvalidDevice(int numDevices, bool useRocrEnv,
int deviceNumber) {
bool testResult = true;
int device;
int tempCount = 0;
int setDeviceErrorCheck = 0;
int getDeviceErrorCheck = 0;
int getDeviceCountErrorCheck = 0;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
char visibleDeviceString[MAX_SIZE] = {};
snprintf(visibleDeviceString, MAX_SIZE, "%d", deviceNumber);
if (cPid == 0) { // child
hipError_t err;
#ifdef __HIP_PLATFORM_NVIDIA__
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
#else
if (true == useRocrEnv) {
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
} else {
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
}
#endif
err = hipGetDeviceCount(&tempCount);
if (err != hipSuccess) {
getDeviceCountErrorCheck = 1;
}
for (int i = 0; i < numDevices; i++) {
err = hipSetDevice(i);
if (err != hipSuccess) {
setDeviceErrorCheck+= 1;
}
err = hipGetDevice(&device);
if (err != hipSuccess) {
getDeviceErrorCheck+= 1;
}
}
if ((getDeviceCountErrorCheck == 1) && (setDeviceErrorCheck == numDevices)
&& (getDeviceErrorCheck == numDevices)) {
testResult = true;
} else {
printf("Test failed for invalid device, getDeviceCountErrorCheck %d,"
"setDeviceErrorCheck %d, getDeviceErrorCheck %d\n",
getDeviceCountErrorCheck, setDeviceErrorCheck,
getDeviceErrorCheck);
testResult = false;
}
close(fd[0]);
write(fd[1], &testResult, sizeof(testResult));
close(fd[1]);
exit(0);
} else if (cPid > 0) { // parent
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("fork() failed\n");
HIP_ASSERT(false);
}
REQUIRE(testResult == true);
}
static void testValidDevices(int numDevices, bool useRocrEnv, int *deviceList,
int deviceListLength) {
bool testResult = true;
int tempCount = 0;
int device;
int setDeviceErrorCheck = 0;
int getDeviceErrorCheck = 0;
int getDeviceCountErrorCheck = 0;
int *deviceListPtr = deviceList;
std::string visibleDeviceString;
if ((NULL == deviceList) || ((deviceListLength < 1) ||
deviceListLength > numDevices)) {
INFO("Invalid argument for number of devices. Skipping current test");
REQUIRE(false);
}
for (int i = 0; i < deviceListLength; i++) {
visibleDeviceString.append(std::to_string(*deviceListPtr++) + ",");
}
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) {
#ifdef __HIP_PLATFORM_NVIDIA__
unsetenv("CUDA_VISIBLE_DEVICES");
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString.c_str(), 1);
#else
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
if (true == useRocrEnv) {
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString.c_str(), 1);
} else {
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString.c_str(), 1);
}
#endif
hipError_t err;
err = hipGetDeviceCount(&tempCount);
if (tempCount == deviceListLength) {
getDeviceCountErrorCheck = 1;
} else {
printf("hipGetDeviceCount failed. return value: %u\n", hipError_t(err));
}
for (int i = 0; i < numDevices; i++) {
err = hipSetDevice(i);
if (err != hipSuccess) {
setDeviceErrorCheck+= 1;
}
err = hipGetDevice(&device);
if (err != hipSuccess) {
getDeviceErrorCheck+= 1;
}
}
if ((getDeviceCountErrorCheck == 1) && (setDeviceErrorCheck ==
(numDevices-deviceListLength)) && (getDeviceErrorCheck == 0)) {
testResult = true;
} else {
printf("Test failed for device count %d\n", deviceListLength);
testResult = false;
}
close(fd[0]);
write(fd[1], &testResult, sizeof(testResult));
close(fd[1]);
exit(0);
} else if (cPid > 0) {
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("fork() failed\n");
HIP_ASSERT(false);
}
REQUIRE(testResult == true);
}
static void Initialize(int *deviceList, int numDevices, int count,
std::string& min_visibleDeviceString, std::string& max_visibleDeviceString) {
int *deviceListPtr = deviceList;
for (int i =0; i < count; i++) {
if (i == count-1) {
min_visibleDeviceString.append(std::to_string(*deviceListPtr++));
} else {
min_visibleDeviceString.append(std::to_string(*deviceListPtr++) + ",");
}
}
for (int i =0; i < numDevices; i++) {
if (i == numDevices-1) {
max_visibleDeviceString.append(std::to_string(i));
} else {
max_visibleDeviceString.append(std::to_string(i) + ",");
}
}
}
static void testMaxRvdMinHvd(int numDevices, int *deviceList, int count) {
bool testResult = true;
int device;
int validateCount = 0;
std::string min_visibleDeviceString;
std::string max_visibleDeviceString;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
Initialize(deviceList, numDevices,
count, min_visibleDeviceString, max_visibleDeviceString);
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", max_visibleDeviceString.c_str(), 1);
setenv("HIP_VISIBLE_DEVICES", min_visibleDeviceString.c_str(), 1);
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int i = 0; i < numDevices; i++) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipGetDevice(&device));
if (device == i) {
validateCount+= 1;
}
}
if (count != validateCount) {
testResult = false;
}
} else if (cPid > 0) {
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("fork() failed\n");
HIP_ASSERT(false);
}
REQUIRE(testResult == true);
}
static void testRvdCvd(int numDevices, int *deviceList, int count) {
bool testResult = true;
int device;
int validateCount = 0;
std::string min_visibleDeviceString;
std::string max_visibleDeviceString;;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
Initialize(deviceList, numDevices, count,
min_visibleDeviceString, max_visibleDeviceString);
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", max_visibleDeviceString.c_str(), 1);
setenv("CUDA_VISIBLE_DEVICES", min_visibleDeviceString.c_str(), 1);
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int i = 0; i < numDevices; i++) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipGetDevice(&device));
if (device == i) {
validateCount+= 1;
}
}
if (count != validateCount) {
testResult = false;
}
} else if (cPid > 0) {
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("fork() failed\n");
HIP_ASSERT(false);
}
REQUIRE(testResult == true);
}
static void testMinRvdMaxHvd(int numDevices, int *deviceList, int count) {
bool testResult = true;
int device;
int validateCount = 0;
std::string min_visibleDeviceString;
std::string max_visibleDeviceString;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
Initialize(deviceList, numDevices, count,
min_visibleDeviceString, max_visibleDeviceString);
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", min_visibleDeviceString.c_str(), 1);
setenv("HIP_VISIBLE_DEVICES", max_visibleDeviceString.c_str(), 1);
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int i = 0; i < numDevices; i++) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipGetDevice(&device));
if (device == i) {
validateCount+= 1;
}
}
if (count != validateCount) {
testResult = false;
}
close(fd[0]);
write(fd[1], &testResult, sizeof(testResult));
close(fd[1]);
exit(0);
} else if (cPid > 0) {
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("fork() failed\n");
HIP_ASSERT(false);
}
REQUIRE(testResult == true);
}
/**
* Scenario sets Invalid visible device list and checks behavior.
*/
TEST_CASE("Unit_hipSetDevice_InvalidVisibleDeviceList") {
int numDevices = 0;
getDeviceCount(&numDevices);
REQUIRE(numDevices != 0);
SECTION("Test setting -1 to HIP_VISIBLE_DEVICES") {
testInvalidDevice(numDevices, false, -1);
}
SECTION("Test setting invalid device to HIP_VISIBLE_DEVICES") {
testInvalidDevice(numDevices, false, numDevices);
}
#ifndef __HIP_PLATFORM_NVIDIA__
SECTION("Test setting -1 to ROCR_VISIBLE_DEVICES") {
testInvalidDevice(numDevices, true, -1);
}
SECTION("Test setting invalid device to ROCR_VISIBLE_DEVICES") {
testInvalidDevice(numDevices, true, numDevices);
}
#endif
}
/**
* Scenario sets valid visible device list and checks behavior.
*/
TEST_CASE("Unit_hipSetDevice_ValidVisibleDeviceList") {
int numDevices = 0;
std::vector<int> deviceList;
getDeviceCount(&numDevices);
REQUIRE(numDevices != 0);
// Test for all available devices
for (int i = 0; i < numDevices; i++) {
deviceList.push_back(i);
}
SECTION("Test setting valid hip visible device list") {
testValidDevices(numDevices, false, deviceList.data(), numDevices);
}
#ifndef __HIP_PLATFORM_NVIDIA__
SECTION("Test setting valid rocr visible device list") {
testValidDevices(numDevices, true, deviceList.data(), numDevices);
}
#endif
}
/**
* Scenario sets subset of available devices and checks behavior.
*/
TEST_CASE("Unit_hipSetDevice_SubsetOfAvailableDevices") {
int numDevices = 0;
int deviceList[MAX_SIZE];
int deviceListLength = 1;
getDeviceCount(&numDevices);
REQUIRE(numDevices != 0);
// Test for subset of available gpus
for (int i=0; i < deviceListLength; i++) {
deviceList[i] = deviceListLength-1-i;
}
#ifndef __HIP_PLATFORM_NVIDIA__
testValidDevices(numDevices, true, deviceList,
deviceListLength);
#endif
testValidDevices(numDevices, false, deviceList,
deviceListLength);
}
#ifndef __HIP_PLATFORM_NVIDIA__
/* Following tests apply only for AMD Platforms */
/**
* Scenario tests getDevice behavior with Minimal Len of RVD
* and Maximal Len of HVD
*/
TEST_CASE("Unit_hipSetDevice_MinRvdMaxHvdDevicesList") {
int numDevices = 0;
std::vector<int> deviceList;
int count = 0;
getDeviceCount(&numDevices);
REQUIRE(numDevices != 0);
if (numDevices == 1) {
deviceList.push_back(0);
count = 1;
} else {
for (int i=0; i < numDevices; i++) {
if (i%2 == 0) {
deviceList.push_back(i);
count++;
}
}
}
testMinRvdMaxHvd(numDevices, deviceList.data(), count);
}
/**
* Scenario tests getDevice behavior with Maximal Len of RVD
* and Minimal Len of HVD
*/
TEST_CASE("Unit_hipSetDevice_MaxRvdMinHvdDevicesList") {
int numDevices = 0;
std::vector<int> deviceList;
getDeviceCount(&numDevices);
REQUIRE(numDevices != 0);
if (numDevices == 1) {
deviceList.push_back(0);
} else {
for (int i=0; i < numDevices; i++) {
if (i%2 == 0) {
deviceList.push_back(i);
}
}
}
testMaxRvdMinHvd(numDevices, deviceList.data(), deviceList.size());
}
/**
* Scenario tests getDevice behavior with combination of RVD and CVD
*/
TEST_CASE("Unit_hipSetDevice_RvdCvdDevicesList") {
int numDevices = 0;
int deviceList[MAX_SIZE];
int count = 0;
getDeviceCount(&numDevices);
REQUIRE(numDevices != 0);
if (numDevices == 1) {
deviceList[0] = 0;
count = 1;
} else {
for (int i=0; i < numDevices; i++) {
if (i%2 == 0) {
deviceList[count] = i;
count++;
}
}
}
testRvdCvd(numDevices, deviceList, count);
}
#endif // __HIP_PLATFORM_NVIDIA__
#endif // __linux__
@@ -0,0 +1,175 @@
# Copyright (c) 2016 - 2023 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
cmake_minimum_required(VERSION 3.16.8)
# vc 19.31.31107.0 gives issue while packaging using makefile
# To avoid error NMAKE : fatal error U1065: invalid option 'w'
# Windows to use Ninja generator like other projects
# to skip the simple compiler test
set(CMAKE_C_COMPILER_WORKS 1)
set(CMAKE_CXX_COMPILER_WORKS 1)
include(GNUInstallDirs)
#############################
# Packaging steps
#############################
if(NOT DEFINED CPACK_INSTALL_PREFIX)
set(CPACK_INSTALL_PREFIX "/opt/rocm/")
endif()
if(BUILD_SHARED_LIBS)
set(PKG_NAME hip-catch-${HIP_PLATFORM})
else()
set(PKG_NAME hip-catch-${HIP_PLATFORM}-static)
endif()
set(CPACK_PACKAGE_NAME ${PKG_NAME})
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [CATCH TESTS]")
set(CPACK_PACKAGE_DESCRIPTION "HIP:
Heterogenous-computing Interface for Portability [CATCH TESTS]")
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
set(CPACK_PACKAGE_CONTACT "HIP Support <hip.support@amd.com>")
set(CPACK_PACKAGE_VERSION ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_PACKAGING_VERSION_PATCH})
# by default rpm tries to reduce the file size by stripping the executable
# To avoid build/packaging failures SWDEV-375603 and skip stripping
set(CPACK_RPM_SPEC_MORE_DEFINE "%global __os_install_post %{nil}")
set (CPACK_RPM_PACKAGE_AUTOREQ 0)
# to remove hip-catch-* package during uninstallation of rocm
if(BUILD_SHARED_LIBS)
set (CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core, hip-runtime-amd")
set (CPACK_RPM_PACKAGE_REQUIRES "rocm-core, hip-runtime-amd")
else()
set (CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core, hip-static-dev")
set (CPACK_RPM_PACKAGE_REQUIRES "rocm-core, hip-static-devel")
endif()
# Get all the files that needs to be installed in the global property
get_property(INSTALL_CTEST_INCLUDE_FILES GLOBAL PROPERTY G_INSTALL_CTEST_INCLUDE_FILES)
get_property(INSTALL_EXE_TARGETS GLOBAL PROPERTY G_INSTALL_EXE_TARGETS)
get_property(INSTALL_CUSTOM_TARGETS GLOBAL PROPERTY G_INSTALL_CUSTOM_TARGETS)
get_property(INSTALL_SRC_FILES GLOBAL PROPERTY G_INSTALL_SRC_FILES)
get_property(INSTALL_HEADER_FILES GLOBAL PROPERTY G_INSTALL_HEADER_FILES)
# Create top level CTestTestfile.cmake
file(WRITE ${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/script/CTestTestfile.cmake ${_autogen})
foreach(CTEST_INCLUDE_FILE ${INSTALL_CTEST_INCLUDE_FILES})
get_filename_component(INCLUDE_FILE_NAME ${CTEST_INCLUDE_FILE} NAME)
file(APPEND ${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/script/CTestTestfile.cmake
"include(script/${INCLUDE_FILE_NAME})\n")
endforeach()
file(WRITE ${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/script/windows/CTestTestfile.cmake
"${_autogen}\nsubdirs(${CATCH_BUILD_DIR})")
if(NOT WIN32)
set(INSTALL_DIR ${CMAKE_INSTALL_DATADIR}/hip)
set(CPACK_GENERATOR "TGZ;DEB;RPM" CACHE STRING "Linux package types for catch tests")
set(CPACK_BINARY_DEB "ON")
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
set(CPACK_DEBIAN_PACKAGE_PROVIDES "catch")
set(CPACK_BINARY_RPM "ON")
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt")
if (CPACK_PACKAGE_VERSION MATCHES "local" )
#If building locally default value will cause build failure
#DEBUG SYMBOL pacaking require SOURCE_DIR to be small
set(CPACK_RPM_BUILD_SOURCE_DIRS_PREFIX ${CPACK_INSTALL_PREFIX})
endif()
if (DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
else()
set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
endif()
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
else()
set(CPACK_RPM_PACKAGE_RELEASE "local")
endif()
execute_process( COMMAND rpm --eval %{?dist}
RESULT_VARIABLE PROC_RESULT
OUTPUT_VARIABLE EVAL_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE )
# Add os distribution tag to rpm package name . For deb package its set from build env
if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" )
string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}")
endif()
set(CPACK_SOURCE_GENERATOR "TGZ")
# Install license file
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/../LICENSE.txt" )
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${INSTALL_DIR})
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
else()
# windows packaging
set(INSTALL_DIR .)
set(CPACK_INSTALL_PREFIX "")
set(CPACK_SYSTEM_NAME "")
set(CPACK_GENERATOR "ZIP" CACHE STRING "Windows package types for catch tests")
set(CPACK_TEST_ZIP "ON")
set(CPACK_ZIP_TEST_PACKAGE_NAME "catch")
endif()
set(INSTALL_DIR_TESTS ${INSTALL_DIR}/${CATCH_BUILD_DIR})
set(INSTALL_DIR_SCRIPT ${INSTALL_DIR}/${CATCH_BUILD_DIR}/script)
set(INSTALL_DIR_SRC ${INSTALL_DIR}/${CATCH_BUILD_DIR}/src)
set(INSTALL_DIR_HEADERS ${INSTALL_DIR}/${CATCH_BUILD_DIR}/headers)
# install catch scripts
install(FILES
${ADD_SCRIPT_PATH}
${CATCH_INCLUDE_PATH}
DESTINATION ${INSTALL_DIR_SCRIPT})
# install cmake include files
foreach(CTEST_INCLUDE_FILE ${INSTALL_CTEST_INCLUDE_FILES})
install(FILES ${CTEST_INCLUDE_FILE} DESTINATION ${INSTALL_DIR_SCRIPT})
endforeach()
# install source files
foreach(SRC_FILE ${INSTALL_SRC_FILES})
install(FILES ${SRC_FILE} DESTINATION ${INSTALL_DIR_SRC})
endforeach()
# install header files
foreach(HEADER_FILES ${INSTALL_HEADER_FILES})
install(FILES ${HEADER_FILES} DESTINATION ${INSTALL_DIR_HEADERS})
endforeach()
# install executables
foreach(EXE_TARGET ${INSTALL_EXE_TARGETS})
install(TARGETS ${EXE_TARGET}
RUNTIME DESTINATION ${INSTALL_DIR_TESTS} OPTIONAL)
endforeach()
# install custom targets
foreach(CUSTOM_TARGET ${INSTALL_CUSTOM_TARGETS})
install(FILES ${CUSTOM_TARGET} DESTINATION ${INSTALL_DIR_TESTS})
endforeach()
# install ctest file
install(FILES
${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/script/CTestTestfile.cmake
DESTINATION ${INSTALL_DIR_TESTS})
install(FILES
${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/script/windows/CTestTestfile.cmake
DESTINATION ${INSTALL_DIR})
# install catchinfo
install(FILES ${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/catchInfo.txt DESTINATION ${INSTALL_DIR})
# install hip test config
install(DIRECTORY
${HIP_TEST_CONFIG_BINARY_DIR}
DESTINATION ${INSTALL_DIR_TESTS})
include(CPack)
@@ -0,0 +1,27 @@
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
add_subdirectory(memset)
add_subdirectory(memcpy)
add_subdirectory(kernelLaunch)
add_subdirectory(stream)
add_subdirectory(event)
add_subdirectory(warpSync)
add_subdirectory(example)
@@ -0,0 +1,34 @@
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
set(TEST_SRC
hipEventCreate.cc
hipEventCreateWithFlags.cc
hipEventRecord.cc
hipEventDestroy.cc
hipEventSynchronize.cc
hipEventElapsedTime.cc
hipEventQuery.cc
)
hip_add_exe_to_target(NAME EventPerformance
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS -std=c++17)
@@ -0,0 +1,61 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup event event
* @{
* @ingroup PerformanceTest
* Contains performance tests for all hipEvent related HIP APIs.
*/
class HipEventCreateBenchmark : public Benchmark<HipEventCreateBenchmark> {
public:
void operator()() {
hipEvent_t event;
TIMED_SECTION(kTimerTypeCpu) { HIP_CHECK(hipEventCreate(&event)); }
HIP_CHECK(hipEventDestroy(event));
}
};
/**
* Test Description
* ------------------------
* - Executes `hipEventCreate`
* Test source
* ------------------------
* - performance/event/hipEventCreate.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipEventCreate") {
HipEventCreateBenchmark benchmark;
benchmark.Run();
}
/**
* End doxygen group PerformanceTest.
* @}
*/
@@ -0,0 +1,88 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup event event
* @{
* @ingroup PerformanceTest
*/
class HipEventCreateWithFlagsBenchmark : public Benchmark<HipEventCreateWithFlagsBenchmark> {
public:
void operator()(unsigned flag) {
hipEvent_t event;
TIMED_SECTION(kTimerTypeCpu) { HIP_CHECK(hipEventCreateWithFlags(&event, flag)); }
HIP_CHECK(hipEventDestroy(event));
}
};
static std::string GetEventCreateFlagName(unsigned flag) {
switch (flag) {
case hipEventDefault:
return "hipEventDefault";
case hipEventBlockingSync:
return "hipEventBlockingSync";
case hipEventDisableTiming:
return "hipEventDisableTiming";
case hipEventInterprocess:
return "hipEventInterprocess";
default:
return "flag combination";
}
}
static void RunBenchmark(unsigned flag) {
HipEventCreateWithFlagsBenchmark benchmark;
benchmark.AddSectionName(GetEventCreateFlagName(flag));
benchmark.Run(flag);
}
/**
* Test Description
* ------------------------
* - Executes `hipEventCreateWithFlags` with all flags:
* -# Flags
* - hipEventDefault
* - hipEventBlockingSync
* - hipEventDisableTiming
* - hipEventInterprocess (currently disabled)
* Test source
* ------------------------
* - performance/event/hipEventCreate.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipEventCreateWithFlags") {
const auto flag = GENERATE(
hipEventDefault, hipEventBlockingSync,
hipEventDisableTiming /*, hipEventInterprocess disabled until fixed (EXSWHTEC-25) */);
RunBenchmark(flag);
}
/**
* End doxygen group PerformanceTest.
* @}
*/

برخی از فایل ها نشان داده نشدند زیرا تعداد زیادی فایل در این تفاوت تغییر کرده اند نمایش بیشتر