Add 'projects/hip-tests/' from commit '5ce508401e1934053b127de5bf756dcd56a326a2'
git-subtree-dir: projects/hip-tests git-subtree-mainline:cc7a96c46fgit-subtree-split:5ce508401e
This commit is contained in:
@@ -0,0 +1,48 @@
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
pipelines:
|
||||
- pipeline: hip_pipeline
|
||||
source: \HIP
|
||||
trigger:
|
||||
branches:
|
||||
include:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml@pipelines_repo
|
||||
|
||||
trigger:
|
||||
batch: true
|
||||
branches:
|
||||
include:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
paths:
|
||||
exclude:
|
||||
- '.jenkins'
|
||||
- CODEOWNERS
|
||||
- LICENSE.txt
|
||||
- '**/.md'
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
branches:
|
||||
include:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
paths:
|
||||
exclude:
|
||||
- '.jenkins'
|
||||
- CODEOWNERS
|
||||
- LICENSE.txt
|
||||
- '**/.md'
|
||||
drafts: false
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/hip-tests.yml@pipelines_repo
|
||||
@@ -0,0 +1,10 @@
|
||||
Language: Cpp
|
||||
BasedOnStyle: Google
|
||||
AlignEscapedNewlinesLeft: false
|
||||
AlignOperands: false
|
||||
ColumnLimit: 100
|
||||
AlwaysBreakTemplateDeclarations: false
|
||||
DerivePointerAlignment: false
|
||||
IndentFunctionDeclarationAfterType: false
|
||||
MaxEmptyLinesToKeep: 2
|
||||
SortIncludes: false
|
||||
@@ -0,0 +1,20 @@
|
||||
# Set the default behavior, in case people don't have core.autolf set.
|
||||
* text=auto
|
||||
|
||||
# Explicitly declare text files you want to always be normalized and converted
|
||||
# to have LF line endings on checkout.
|
||||
*.c text eol=lf
|
||||
*.cpp text eol=lf
|
||||
*.cc text eol=lf
|
||||
*.h text eol=lf
|
||||
*.hpp text eol=lf
|
||||
*.txt text eol=lf
|
||||
|
||||
# Define files to support auto-remove trailing white space
|
||||
# Need to run the command below, before add modified file(s) to the staging area
|
||||
# git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"'
|
||||
*.cpp filter=trimspace
|
||||
*.c filter=trimspace
|
||||
*.h filter=trimspacecpp
|
||||
*.hpp filter=trimspace
|
||||
*.md filter=trimspace
|
||||
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
RANGE=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
echo $1
|
||||
echo $2
|
||||
case "$1" in
|
||||
--range)
|
||||
RANGE="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown arg $1" >&2
|
||||
exit 64
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
regex='\.(c|cc|cpp|cxx|h|hh|hpp|hxx)$'
|
||||
|
||||
if [[ -n $RANGE ]]; then
|
||||
files=$(git diff --name-only "$RANGE" | grep -E "$regex" || true)
|
||||
else
|
||||
files=$(git diff --cached --name-only --diff-filter=ACMR | grep -E "$regex" || true)
|
||||
fi
|
||||
echo "Checking $files"
|
||||
[[ -z $files ]] && exit 0
|
||||
|
||||
clang_bin="${CLANG_FORMAT:-clang-format}"
|
||||
if ! command -v "$clang_bin" >/dev/null 2>&1; then
|
||||
if [[ -x "/c/Program Files/LLVM/bin/clang-format.exe" ]]; then
|
||||
clang_bin="/c/Program Files/LLVM/bin/clang-format.exe"
|
||||
fi
|
||||
fi
|
||||
|
||||
clang_format_diff="${CLANG_FORMAT_DIFF:-clang-format-diff}"
|
||||
if ! command -v "$clang_format_diff" >/dev/null 2>&1; then
|
||||
if [[ -x "/c/Program Files/LLVM/share/clang/clang-format-diff.py" ]]; then
|
||||
clang_format_diff="/c/Program Files/LLVM/share/clang/clang-format-diff.py"
|
||||
fi
|
||||
fi
|
||||
|
||||
for file in $files; do
|
||||
echo "Checking lines of $file"
|
||||
|
||||
if [[ -n $RANGE ]]; then
|
||||
diff_output=$(git diff -U0 "$RANGE" -- "$file")
|
||||
else
|
||||
diff_output=$(git diff -U0 --cached -- "$file")
|
||||
fi
|
||||
|
||||
echo "$diff_output" | "$clang_format_diff" -style=file -fallback-style=none -p1
|
||||
done
|
||||
فروخته شده
@@ -0,0 +1,2 @@
|
||||
#!/usr/bin/env bash
|
||||
exec "$(git rev-parse --show-toplevel)/.github/hooks/clang-format-check.sh"
|
||||
فروخته شده
@@ -0,0 +1,5 @@
|
||||
disabled: false
|
||||
scmId: gh-emu-rocm
|
||||
branchesToScan:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
+36
@@ -0,0 +1,36 @@
|
||||
## Associated JIRA ticket number/Github issue number
|
||||
<!-- For example: "Closes #1234" or "Fixes SWDEV-123456" -->
|
||||
|
||||
## What type of PR is this? (check all applicable)
|
||||
|
||||
- [ ] Refactor
|
||||
- [ ] Feature
|
||||
- [ ] Bug Fix
|
||||
- [ ] Optimization
|
||||
- [ ] Documentation Update
|
||||
- [ ] Continuous Integration
|
||||
|
||||
## What were the changes?
|
||||
|
||||
<!-- Please give a short summary of the change. -->
|
||||
|
||||
## Why are these changes needed?
|
||||
|
||||
<!-- Please explain the motivation behind the change and why this solves the given problem. -->
|
||||
|
||||
## Updated CHANGELOG?
|
||||
|
||||
<!-- Needed for Release updates for a ROCm release. -->
|
||||
|
||||
- [ ] Yes
|
||||
- [ ] No, Does not apply to this PR.
|
||||
|
||||
## Added/Updated documentation?
|
||||
|
||||
- [ ] Yes
|
||||
- [ ] No, Does not apply to this PR.
|
||||
|
||||
## Additional Checks
|
||||
|
||||
- [ ] I have added tests relevant to the introduced functionality, and the unit tests are passing locally.
|
||||
- [ ] Any dependent changes have been merged.
|
||||
@@ -0,0 +1,76 @@
|
||||
import os, re, sys
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
def is_checkbox(line: str) -> bool:
|
||||
return bool(re.match(r"^\s*-\s*\[[ xX]\]\s*.+", line))
|
||||
|
||||
|
||||
def is_checked(line: str) -> bool:
|
||||
return bool(re.match(r"^\s*-\s*\[[xX]\]\s*.+", line))
|
||||
|
||||
|
||||
def is_comment(line: str) -> bool:
|
||||
return bool(re.match(r"^\s*<!--.*-->\s*$", line))
|
||||
|
||||
|
||||
def text_clean(lines: List[str]) -> str:
|
||||
text = [line for line in lines if not is_comment(line)]
|
||||
return "".join("".join(text).strip().split())
|
||||
|
||||
|
||||
def validate_section(section_name: str, lines: List[str]) -> Optional[str]:
|
||||
has_checkboxes = any(is_checkbox(line) for line in lines)
|
||||
if has_checkboxes:
|
||||
if not any(is_checked(line) for line in lines):
|
||||
return f"Section {section_name} is a checklist without selections"
|
||||
return None
|
||||
if not text_clean(lines):
|
||||
return f"Section {section_name} is empty text section"
|
||||
return None
|
||||
|
||||
|
||||
def check_description(description: str) -> List[str]:
|
||||
if not description:
|
||||
# pull_request_template is not merged yet, so treat as valid for now
|
||||
return []
|
||||
# return ["PR description is empty"]
|
||||
|
||||
sections = []
|
||||
current_section = None
|
||||
current_lines = []
|
||||
errors = []
|
||||
|
||||
for line in description.splitlines():
|
||||
header_match = re.match(r"^\s*##\s*(.+?)\s*$", line)
|
||||
if header_match:
|
||||
if current_section:
|
||||
sections.append((current_section, current_lines))
|
||||
current_section = header_match.group(1)
|
||||
current_lines = []
|
||||
elif current_section:
|
||||
current_lines.append(line)
|
||||
|
||||
if current_section:
|
||||
sections.append((current_section, current_lines))
|
||||
|
||||
if not sections:
|
||||
return ["No sections available, template is empty"]
|
||||
|
||||
for section_name, section_lines in sections:
|
||||
error = validate_section(section_name, section_lines)
|
||||
if error:
|
||||
errors.append(error)
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pr_description = os.getenv("PR_DESCRIPTION", "")
|
||||
|
||||
errors = check_description(pr_description)
|
||||
if not errors:
|
||||
print("All good")
|
||||
exit(0)
|
||||
print("\n".join(errors))
|
||||
exit(1)
|
||||
@@ -0,0 +1,22 @@
|
||||
name: Clang format check
|
||||
on:
|
||||
pull_request:
|
||||
types: [synchronize, opened]
|
||||
|
||||
jobs:
|
||||
format:
|
||||
runs-on: AMD-ROCm-Internal-dev1
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install clang-format
|
||||
run: |
|
||||
sudo apt update && sudo apt install -y clang-format
|
||||
|
||||
- name: Run clang-format-check
|
||||
id: clang-format
|
||||
run: |
|
||||
chmod +x .github/hooks/clang-format-check.sh
|
||||
./.github/hooks/clang-format-check.sh --range "${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
|
||||
@@ -0,0 +1,73 @@
|
||||
name: Keywords checker
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, edited]
|
||||
branches:
|
||||
- amd-staging
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
check-keywords:
|
||||
runs-on: AMD-ROCm-Internal-dev1
|
||||
env:
|
||||
KEYWORDS: ${{ vars.KEYWORDS }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check keywords
|
||||
run: |
|
||||
set -e
|
||||
|
||||
if [ -z "$KEYWORDS" ]; then
|
||||
echo "No keywords set. Skipping check"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
IFS=',' read -ra KEYWORDS_ARRAY <<< "$KEYWORDS"
|
||||
echo "Checking against list of keywords: ${KEYWORDS_ARRAY[*]}"
|
||||
|
||||
MATCHED=0
|
||||
BASE_BRANCH=${{github.event.pull_request.base.ref}}
|
||||
HEAD_BRANCH=${{github.event.pull_request.head.ref}}
|
||||
PR_TITLE="${{ github.event.pull_request.title }}"
|
||||
|
||||
for file in $(git diff --name-only origin/$BASE_BRANCH..origin/$HEAD_BRANCH); do
|
||||
if [ -f "$file" ]; then
|
||||
for keyword in "${KEYWORDS_ARRAY[*]}"; do
|
||||
grep -in -E "${keyword}" "$file" | while IFS= read -r line; do
|
||||
echo "Matched in '$file': $line"
|
||||
MATCHED=1
|
||||
done
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
for commit in $(git log --format=%H origin/$BASE_BRANCH..origin/$HEAD_BRANCH); do
|
||||
msg=$(git log -1 --format=%B "$commit")
|
||||
for keyword in "${KEYWORDS_ARRAY[*]}"; do
|
||||
if echo "$msg" | grep -i -q "$keyword"; then
|
||||
echo "Match in commit $commit: $msg"
|
||||
MATCHED=1
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
for keyword in "${KEYWORDS_ARRAY[*]}"; do
|
||||
if echo "$PR_TITLE" | grep -i -q "$keyword"; then
|
||||
echo "Match in PR title"
|
||||
MATCHED=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$MATCHED" -eq 1 ]; then
|
||||
echo "Keywords found, please see diagnostics higher"
|
||||
exit 1
|
||||
else
|
||||
echo "No keywords found"
|
||||
exit 0
|
||||
fi
|
||||
+15
@@ -0,0 +1,15 @@
|
||||
name: Rocm Validation Suite KWS
|
||||
on:
|
||||
push:
|
||||
branches: [amd-staging, amd-mainline]
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
kws:
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/kws.yml@mainline
|
||||
secrets: inherit
|
||||
with:
|
||||
pr_number: ${{github.event.pull_request.number}}
|
||||
base_branch: ${{github.base_ref}}
|
||||
@@ -0,0 +1,46 @@
|
||||
name: Validate PR Title
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, edited, synchronize, reopened]
|
||||
|
||||
jobs:
|
||||
validate-pr-title:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check PR Title
|
||||
id: check-pr-title
|
||||
run: |
|
||||
PR_TITLE="${{ github.event.pull_request.title }}"
|
||||
|
||||
if [[ ! "$PR_TITLE" =~ ^SWDEV-[0-9]+ ]]; then
|
||||
echo "::error::PR title must start with a Jira ticket ID, SWDEV-<num>"
|
||||
exit 1
|
||||
else
|
||||
echo "PR title is valid"
|
||||
fi
|
||||
|
||||
validate-commit-messages:
|
||||
runs-on: AMD-ROCm-Internal-dev1
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check all commit messages
|
||||
id: validate-commit-messags
|
||||
run: |
|
||||
COMMITS=$(git log --format="%H %s" origin/${{ github.event.pull_request.base.ref }}..origin/${{ github.event.pull_request.head.ref }})
|
||||
echo "$COMMITS"
|
||||
echo "$COMMITS" | while read -r hash message; do
|
||||
echo -e "$hash $message\n "
|
||||
if [[ "$message" =~ ^SWDEV-[0-9]+ ]]; then
|
||||
echo "Valid JIRA ticket format"
|
||||
elif [[ "$message" =~ ^Merge\ branch ]]; then
|
||||
echo "Merge commits are allowed"
|
||||
else
|
||||
echo "::error:: $hash commit should start with Jira ticket ID, SWDEV-<num> or be a merge commit"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
@@ -0,0 +1,25 @@
|
||||
name: ROCm CI Caller
|
||||
on:
|
||||
pull_request:
|
||||
branches: [amd-staging, amd-npi, release/rocm-rel-*, amd-mainline]
|
||||
types: [opened, reopened, synchronize]
|
||||
push:
|
||||
branches: [amd-mainline]
|
||||
workflow_dispatch:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
call-workflow:
|
||||
if: github.event_name != 'issue_comment' ||(github.event_name == 'issue_comment' && github.event.issue.pull_request && (startsWith(github.event.comment.body, '!verify') || startsWith(github.event.comment.body, '!linux-hip-psdb') || startsWith(github.event.comment.body, '!verify release') || startsWith(github.event.comment.body, '!verify retest')))
|
||||
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/rocm_ci.yml@mainline
|
||||
secrets: inherit
|
||||
with:
|
||||
input_sha: ${{github.event_name == 'pull_request' && github.event.pull_request.head.sha || (github.event_name == 'push' && github.sha) || (github.event_name == 'issue_comment' && github.event.issue.pull_request.head.sha) || github.sha}}
|
||||
input_pr_num: ${{github.event_name == 'pull_request' && github.event.pull_request.number || (github.event_name == 'issue_comment' && github.event.issue.number) || 0}}
|
||||
input_pr_url: ${{github.event_name == 'pull_request' && github.event.pull_request.html_url || (github.event_name == 'issue_comment' && github.event.issue.pull_request.html_url) || ''}}
|
||||
input_pr_title: ${{github.event_name == 'pull_request' && github.event.pull_request.title || (github.event_name == 'issue_comment' && github.event.issue.pull_request.title) || ''}}
|
||||
repository_name: ${{ github.repository }}
|
||||
base_ref: ${{github.event_name == 'pull_request' && github.event.pull_request.base.ref || (github.event_name == 'issue_comment' && github.event.issue.pull_request.base.ref) || github.ref}}
|
||||
trigger_event_type: ${{ github.event_name }}
|
||||
comment_text: ${{ github.event_name == 'issue_comment' && github.event.comment.body || '' }}
|
||||
@@ -0,0 +1,22 @@
|
||||
name: Validate PR desription
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, edited, synchronize]
|
||||
|
||||
jobs:
|
||||
validate-pr-description:
|
||||
runs-on: AMD-ROCm-Internal-dev1
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.13"
|
||||
|
||||
- name: Validate PR description
|
||||
env:
|
||||
PR_DESCRIPTION: ${{ github.event.pull_request.body }}
|
||||
run: python .github/scripts/validate_pr_description.py
|
||||
@@ -0,0 +1,22 @@
|
||||
.*
|
||||
!.gitignore
|
||||
*.o
|
||||
*.exe
|
||||
*.swp
|
||||
lib
|
||||
packages
|
||||
build
|
||||
tags
|
||||
samples/0_Intro/module_api/runKernel.hip.out
|
||||
samples/0_Intro/module_api/vcpy_isa.code
|
||||
samples/0_Intro/module_api/vcpy_isa.hsaco
|
||||
samples/0_Intro/module_api/vcpy_kernel.co
|
||||
samples/0_Intro/module_api/vcpy_kernel.code
|
||||
samples/1_Utils/hipInfo/hipInfo
|
||||
samples/1_Utils/hipDispatchLatency/hipDispatchLatency
|
||||
|
||||
utils/coverage/generateHipAPICoverage
|
||||
utils/coverage/CoverageReport.xml
|
||||
utils/coverage/coverageReportHTML/CoverageReport.html
|
||||
utils/coverage/coverageReportHTML/testAPIs
|
||||
utils/coverage/coverageReportHTML/testModules
|
||||
@@ -0,0 +1,110 @@
|
||||
def hipBuildTest(String backendLabel) {
|
||||
node(backendLabel) {
|
||||
stage("SYNC - ${backendLabel}") {
|
||||
|
||||
// Checkout hip-tests repository with the PR patch
|
||||
dir("${WORKSPACE}/hip-tests") {
|
||||
checkout scm
|
||||
env.HIP_TESTS_DIR = "${WORKSPACE}" + "/hip-tests"
|
||||
}
|
||||
|
||||
// Clone hip repository
|
||||
dir("${WORKSPACE}/hip") {
|
||||
git branch: 'develop',
|
||||
url: 'https://github.com/ROCm-Developer-Tools/hip'
|
||||
env.HIP_DIR = "${WORKSPACE}" + "/hip"
|
||||
}
|
||||
|
||||
// Clone clr repository
|
||||
dir("${WORKSPACE}/clr") {
|
||||
git branch: 'develop',
|
||||
credentialsId: 'branch-credentials',
|
||||
url: 'https://github.com/ROCm-Developer-Tools/clr'
|
||||
env.CLR_DIR = "${WORKSPACE}" + "/clr"
|
||||
}
|
||||
|
||||
// Clone hipcc repspoitory
|
||||
dir("${WORKSPACE}/hipcc") {
|
||||
git branch: 'develop',
|
||||
credentialsId: 'branch-credentials',
|
||||
url: 'https://github.com/ROCm-Developer-Tools/hipcc'
|
||||
env.HIPCC_DIR = "${WORKSPACE}" + "/hipcc"
|
||||
}
|
||||
}
|
||||
|
||||
stage("BUILD HIP - ${backendLabel}") {
|
||||
// Running the build on clr workspace
|
||||
dir("${WORKSPACE}/clr") {
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
rm -rf build
|
||||
mkdir -p build
|
||||
cd build
|
||||
# Check if backend label contains string "amd" or backend host is a server with amd gpu
|
||||
if [[ $backendLabel =~ amd ]]; then
|
||||
cmake -DCLR_BUILD_HIP=ON -DHIP_PATH=\$PWD/install -DHIPCC_BIN_DIR=\$HIPCC_DIR/bin -DHIP_COMMON_DIR=\$HIP_DIR -DCMAKE_PREFIX_PATH="/opt/rocm/" -DCMAKE_INSTALL_PREFIX=\$PWD/install ..
|
||||
else
|
||||
cmake -DCLR_BUILD_HIP=ON -DHIP_PLATFORM=nvidia -DHIPCC_BIN_DIR=\$HIPCC_DIR/bin -DHIP_COMMON_DIR=\$HIP_DIR -DCMAKE_INSTALL_PREFIX=\$PWD/install ..
|
||||
fi
|
||||
make -j\$(nproc)
|
||||
make install -j\$(nproc)
|
||||
"""
|
||||
}
|
||||
}
|
||||
|
||||
stage("BUILD HIP TESTS - ${backendLabel}") {
|
||||
// Running the build on HIP TESTS workspace
|
||||
dir("${WORKSPACE}/hip-tests") {
|
||||
env.HIP_PATH = "${CLR_DIR}" + "/build/install"
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
rm -rf build
|
||||
mkdir -p build
|
||||
cd build
|
||||
echo "testing $HIP_PATH"
|
||||
# Check if backend label contains string "amd" or backend host is a server with amd gpu
|
||||
if [[ $backendLabel =~ amd ]]; then
|
||||
cmake -DHIP_PLATFORM=amd -DHIP_PATH=\$CLR_DIR/build/install ../catch
|
||||
else
|
||||
export HIP_PLATFORM=nvidia
|
||||
cmake -DHIP_PLATFORM=nvidia -DHIP_PATH=\$CLR_DIR/build/install ../catch
|
||||
fi
|
||||
make -j\$(nproc) build_tests
|
||||
"""
|
||||
}
|
||||
}
|
||||
|
||||
timeout(time: 1, unit: 'HOURS') {
|
||||
stage("TEST - ${backendLabel}") {
|
||||
dir("${WORKSPACE}/hip-tests") {
|
||||
sh """#!/usr/bin/env bash
|
||||
set -x
|
||||
cd build
|
||||
if [[ $backendLabel =~ amd ]]; then
|
||||
ctest --overwrite BuildDirectory=. --output-junit hiptest_output_catch_amd.xml
|
||||
else
|
||||
ctest --overwrite BuildDirectory=. --output-junit hiptest_output_catch_nvidia.xml -E 'Unit_hipMemcpyHtoD_Positive_Synchronization_Behavior|Unit_hipMemcpy_Positive_Synchronization_Behavior|Unit_hipFreeNegativeHost'
|
||||
fi
|
||||
"""
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
timestamps {
|
||||
node('external-bootstrap') {
|
||||
skipDefaultCheckout()
|
||||
|
||||
// labels belonging to each backend - AMD, NVIDIA
|
||||
String[] labels = ['hip-amd-gfx908-ubu2004', 'hip-nvidia-rtx5000-ubu2004']
|
||||
buildMap = [:]
|
||||
|
||||
labels.each { backendLabel ->
|
||||
echo "backendLabel: ${backendLabel}"
|
||||
buildMap[backendLabel] = { hipBuildTest(backendLabel) }
|
||||
}
|
||||
buildMap['failFast'] = false
|
||||
parallel buildMap
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
* @cpaquot_amdeng @gandryey_amdeng @skudchad_amdeng @lmoriche_amdeng
|
||||
|
||||
@@ -0,0 +1,116 @@
|
||||
# Contributing to hip-tests #
|
||||
|
||||
We welcome contributions to the hip-tests project. Please follow these details to help ensure your contributions will be successfully accepted.
|
||||
If you want to contribute to our documentation, refer to {doc}`Contribute to ROCm docs <rocm:contribute/contributing>`.
|
||||
|
||||
## Issue Discussion ##
|
||||
|
||||
Please use the [GitHub Issue](https://github.com/ROCm/hip-tests/issues) tab to notify us of issues.
|
||||
|
||||
* Use your best judgement for issue creation. If your issue is already listed, upvote the issue and
|
||||
comment or post to provide additional details, such as how you reproduced this issue.
|
||||
* If you're not sure if your issue is the same, err on the side of caution and file your issue.
|
||||
You can add a comment to include the issue number (and link) for the similar issue. If we evaluate
|
||||
your issue as being the same as the existing issue, we'll close the duplicate.
|
||||
* If your issue doesn't exist, use the issue template to file a new issue.
|
||||
* When filing an issue, be sure to provide as much information as possible, including script output so
|
||||
we can collect information about your configuration. This helps reduce the time required to
|
||||
reproduce your issue.
|
||||
* Check your issue regularly, as we may require additional information to successfully reproduce the
|
||||
issue.
|
||||
* You may also open an issue to ask questions to the maintainers about whether a proposed change
|
||||
meets the acceptance criteria, or to discuss an idea pertaining to the library.
|
||||
|
||||
## Acceptance Criteria ##
|
||||
|
||||
HIP-tests is a C++ based Catch2 unit test framework that allows developers to test HIP API functionalities for AMD and NVIDIA GPUs from single source code.
|
||||
Contributors wishing to make bug fixes or unit test enhancements should run on both AMD and NVIDIA platforms.### Doxygen Editing Guidelines ###
|
||||
|
||||
- bugs should be marked with @bugs near the code where the bug might be fixed. The @bug message will appear in the API description and also in the
|
||||
doxygen bug list.
|
||||
|
||||
## Code Structure ##
|
||||
|
||||
hip-tests contains mainly three types of application/test codes,
|
||||
|
||||
- `catch/unit`
|
||||
|
||||
This contains catch2 unit tests for different kinds of feature functionalities in HIP, for example,
|
||||
- `atomic` is to test all types of atomic operations,
|
||||
- `compiler` contains tests to launch kernels,
|
||||
- `memory` includes applications to test and use different kinds of memory related HIP APIs.
|
||||
etc.
|
||||
|
||||
- `perftest`
|
||||
|
||||
This directory has some perftest examples.
|
||||
|
||||
- `samples`
|
||||
|
||||
This directory has many hip samples, including codes and instructions for building and executing different kinds of hip features and operations.
|
||||
|
||||
## Coding Style ##
|
||||
|
||||
Catch2 is a unit testing framework for C++, using cmake integration. For more information and tutorial, you can refer to [catch2 documents] (https://github.com/catchorg/Catch2/blob/devel/docs/tutorial.md).
|
||||
|
||||
## Pull Request Guidelines ##
|
||||
|
||||
By creating a pull request, you agree to the statements made in the code license section. Your pull request should target the default branch. Our current default branch is the develop branch, which serves as our integration branch.
|
||||
|
||||
Follow existing best practice for writing a good Git commit message.
|
||||
|
||||
Some tips:
|
||||
http://chris.beams.io/posts/git-commit/
|
||||
https://robots.thoughtbot.com/5-useful-tips-for-a-better-commit-message
|
||||
|
||||
In particular :
|
||||
- Use imperative voice, ie "Fix this bug", "Refactor the XYZ routine", "Update the doc".
|
||||
Not : "Fixing the bug", "Fixed the bug", "Bug fix", etc.
|
||||
- Subject should summarize the commit. Do not end subject with a period. Use a blank line
|
||||
after the subject.
|
||||
|
||||
### Deliverables ###
|
||||
|
||||
hip-tests is an open source repository. Because of this, we include the following license description at the top of every source file.
|
||||
If you create new source files in the repository, please include this text in them as well (replacing "xx" with the digits for the current year):
|
||||
```
|
||||
// Copyright (c) 20xx Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
```
|
||||
|
||||
### Process ###
|
||||
|
||||
After you create a PR, you can take a look at a diff of the changes you made using the PR's "Files" tab.
|
||||
|
||||
PRs must pass through the checks and the code review described in the [Acceptance Criteria](#acceptance-criteria) section before they can be merged.
|
||||
|
||||
Checks may take some time to complete. You can view their progress in the table near the bottom of the pull request page. You may also be able to use the links in the table
|
||||
to view logs associated with a check if it fails.
|
||||
|
||||
During code reviews, another developer will take a look through your proposed change. If any modifications are requested (or further discussion about anything is
|
||||
needed), they may leave a comment. You can follow up and respond to the comment, and/or create comments of your own if you have questions or ideas.
|
||||
When a modification request has been completed, the conversation thread about it will be marked as resolved.
|
||||
|
||||
To update the code in your PR (eg. in response to a code review discussion), you can simply push another commit to the branch used in your pull request.
|
||||
|
||||
## References ##
|
||||
* For hip-test repository information, refer to [README](https://github.com/ROCm/hip-tests).
|
||||
* For how to build and run hip runtime and hip catch2 unint tests from source, on the platform of AMD or NVIDIA, refer to [build HIP] (https://github.com/ROCm/HIP/blob/develop/docs/install/build.rst).
|
||||
* hip samples give detail instructions on how to build and run HIP applications, you can refer to [HIP samples] (https://github.com/ROCm/hip-tests/tree/develop/samples).
|
||||
@@ -0,0 +1,20 @@
|
||||
Copyright (c) 2008 - 2024 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
## What is this repository for? ###
|
||||
|
||||
This repository provides unit tests for [HIP](https://github.com/ROCm/HIP) implementation.
|
||||
|
||||
## DISCLAIMER
|
||||
|
||||
The information presented in this document is for informational purposes only and may contain technical inaccuracies, omissions, and typographical errors. The information contained herein is subject to change and may be rendered inaccurate for many reasons, including but not limited to product and roadmap changes, component and motherboard versionchanges, new model and/or product releases, product differences between differing manufacturers, software changes, BIOS flashes, firmware upgrades, or the like. Any computer system has risks of security vulnerabilities that cannot be completely prevented or mitigated.AMD assumes no obligation to update or otherwise correct or revise this information. However, AMD reserves the right to revise this information and to make changes from time to time to the content hereof without obligation of AMD to notify any person of such revisions or changes.THIS INFORMATION IS PROVIDED ‘AS IS.” AMD MAKES NO REPRESENTATIONS OR WARRANTIES WITH RESPECT TO THE CONTENTS HEREOF AND ASSUMES NO RESPONSIBILITY FOR ANY INACCURACIES, ERRORS, OR OMISSIONS THAT MAY APPEAR IN THIS INFORMATION. AMD SPECIFICALLY DISCLAIMS ANY IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR ANY PARTICULAR PURPOSE. IN NO EVENT WILL AMD BE LIABLE TO ANY PERSON FOR ANY RELIANCE, DIRECT, INDIRECT, SPECIAL, OR OTHER CONSEQUENTIAL DAMAGES ARISING FROM THE USE OF ANY INFORMATION CONTAINED HEREIN, EVEN IF AMD IS EXPRESSLY ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies.
|
||||
|
||||
©2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
|
||||
## Repository branches
|
||||
|
||||
The hip-tests repository maintains several branches. The branches that are of importance are:
|
||||
|
||||
* Main branch: This is the stable branch. It is up to date with the latest release branch, for example, if the latest release is rocm-5.4, main branch will be the repository based on this release.
|
||||
* Develop branch: This is the default branch, on which the new features are still under development and visible. While this maybe of interest to many, it should be noted that this branch and the features under development might not be stable.
|
||||
* Release branches. These are branches corresponding to each ROCM release, listed with release tags, such as rocm-5.4, etc.
|
||||
|
||||
## Release tagging
|
||||
|
||||
hip-tests releases are typically naming convention for each ROCM release to help differentiate them.
|
||||
|
||||
* rocm x.yy: These are the stable releases based on the ROCM release.
|
||||
This type of release is typically made once a month.
|
||||
|
||||
## Build HIP Catch tests
|
||||
|
||||
For building HIP from source, please check instructions on the [HIP page](https://rocm.docs.amd.com/projects/HIP/en/latest/install/build.html).
|
||||
|
||||
HIP catch tests can be built via the following instructions:
|
||||
|
||||
1 .Clone the hip-tests source code from the repository, with definition of branch. The default branch is `develop`, as an example,
|
||||
```bash
|
||||
$ git clone -b develop https://github.com/ROCm/hip-tests.git
|
||||
$ export HIP_TESTS_DIR="$(readlink -f hip-tests)"
|
||||
```
|
||||
|
||||
2. Build the catch tests
|
||||
```bash
|
||||
$ cd "$HIP_TESTS_DIR"
|
||||
$ mkdir -p build; cd build
|
||||
$ cmake ../catch/ -DHIP_PLATFORM=amd
|
||||
$ make -j$(nproc) build_tests
|
||||
$ ctest # run tests
|
||||
```
|
||||
|
||||
HIP catch tests are built under the folder `$HIP_TESTS_DIR/build`.
|
||||
|
||||
### Build HIP Catch2 standalone test
|
||||
|
||||
HIP Catch2 supports building standalone tests, for example,
|
||||
|
||||
```bash
|
||||
$ hipcc $HIP_TESTS_DIR/catch/unit/memory/hipPointerGetAttributes.cc -I ./catch/include ./catch/hipTestMain/standalone_main.cc -I ./catch/external/Catch2 -o hipPointerGetAttributes
|
||||
$ ./hipPointerGetAttributes
|
||||
```
|
||||
|
||||
### Building with address sanitizer
|
||||
|
||||
To build catch tests with Address Sanitizer options, use the cmake option `-DENABLE_ADDRESS_SANITIZER=ON`.
|
||||
@@ -0,0 +1,8 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
set(TEST_SRC
|
||||
add.cc
|
||||
)
|
||||
|
||||
hip_add_exe_to_target(NAME ABMAddKernels
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests)
|
||||
@@ -0,0 +1,41 @@
|
||||
#include <hip_test_common.hh>
|
||||
#include <iostream>
|
||||
|
||||
template <typename T> __global__ void add(T* a, T* b, T* c, size_t size) {
|
||||
size_t i = threadIdx.x;
|
||||
if (i < size) c[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
TEMPLATE_TEST_CASE("ABM_AddKernel_MultiTypeMultiSize", "", int, long, float, long long, double) {
|
||||
auto size = GENERATE(as<size_t>{}, 100, 500, 1000);
|
||||
TestType *d_a, *d_b, *d_c;
|
||||
auto res = hipMalloc(&d_a, sizeof(TestType) * size);
|
||||
REQUIRE(res == hipSuccess);
|
||||
res = hipMalloc(&d_b, sizeof(TestType) * size);
|
||||
REQUIRE(res == hipSuccess);
|
||||
res = hipMalloc(&d_c, sizeof(TestType) * size);
|
||||
REQUIRE(res == hipSuccess);
|
||||
|
||||
std::vector<TestType> a, b, c;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
a.push_back(i + 1);
|
||||
b.push_back(i + 1);
|
||||
c.push_back(2 * (i + 1));
|
||||
}
|
||||
|
||||
res = hipMemcpy(d_a, a.data(), sizeof(TestType) * size, hipMemcpyHostToDevice);
|
||||
REQUIRE(res == hipSuccess);
|
||||
res = hipMemcpy(d_b, b.data(), sizeof(TestType) * size, hipMemcpyHostToDevice);
|
||||
REQUIRE(res == hipSuccess);
|
||||
|
||||
hipLaunchKernelGGL(add<TestType>, 1, size, 0, 0, d_a, d_b, d_c, size);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
res = hipMemcpy(a.data(), d_c, sizeof(TestType) * size, hipMemcpyDeviceToHost);
|
||||
REQUIRE(res == hipSuccess);
|
||||
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
HIP_CHECK(hipFree(d_b));
|
||||
HIP_CHECK(hipFree(d_c));
|
||||
REQUIRE(a == c);
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
add_subdirectory(AddKernels)
|
||||
@@ -0,0 +1,378 @@
|
||||
cmake_minimum_required(VERSION 3.16.8)
|
||||
|
||||
# to skip the simple compiler test
|
||||
set(CMAKE_C_COMPILER_WORKS 1)
|
||||
set(CMAKE_CXX_COMPILER_WORKS 1)
|
||||
|
||||
project(hiptests)
|
||||
|
||||
option(ENABLE_ADDRESS_SANITIZER "Option to enable ASAN build" OFF)
|
||||
option(BUILD_SHARED_LIBS "Option for testing shared libraries" ON)
|
||||
|
||||
option(TEST_CLOCK_CYCLE "Option to use clock64" OFF)
|
||||
if (TEST_CLOCK_CYCLE)
|
||||
add_definitions(-DTEST_CLOCK_CYCLE)
|
||||
endif()
|
||||
|
||||
# flag to generate standalone exe per src file.
|
||||
message(STATUS "STANDALONE_TESTS : ${STANDALONE_TESTS}")
|
||||
|
||||
if(NOT WIN32)
|
||||
set(CPACK_SET_DESTDIR ON CACHE BOOL "Installer package will install hip catch to CMAKE_INSTALL_PREFIX instead of CPACK_PACKAGING_INSTALL_PREFIX")
|
||||
endif()
|
||||
|
||||
# Check if platform is set
|
||||
if(NOT HIP_PLATFORM STREQUAL "amd" AND NOT HIP_PLATFORM STREQUAL "nvidia")
|
||||
message(FATAL_ERROR "Unexpected HIP_PLATFORM: " ${HIP_PLATFORM})
|
||||
endif()
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "amd")
|
||||
if(UNIX AND DEFINED ROCM_PATH)
|
||||
# Read -DROCM_PATH and set CXX_FLAGS for amd platform only
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --rocm-path=${ROCM_PATH}")
|
||||
endif()
|
||||
|
||||
if(DEFINED HIP_PATH)
|
||||
# Read -DHIP_PATH and set CXX_FLAGS for amd platform only
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --hip-path=${HIP_PATH}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Read -DHIP_PATH
|
||||
# If not set read env{HIP_PATH} only on Windows
|
||||
if(WIN32)
|
||||
if(NOT DEFINED HIP_PATH)
|
||||
if(DEFINED ENV{HIP_PATH})
|
||||
set(HIP_PATH $ENV{HIP_PATH} CACHE STRING "HIP Path")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED HIP_PATH)
|
||||
if(DEFINED ROCM_PATH)
|
||||
set(HIP_PATH ${ROCM_PATH})
|
||||
else()
|
||||
set(HIP_PATH "/opt/rocm")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED ROCM_PATH)
|
||||
set(ROCM_PATH "/opt/rocm")
|
||||
endif()
|
||||
|
||||
message(STATUS "HIP_PATH: ${HIP_PATH}")
|
||||
message(STATUS "ROCM_PATH: ${ROCM_PATH}")
|
||||
|
||||
if (WIN32)
|
||||
set(HIPCC_EXEC "hipcc.exe")
|
||||
set(HIPCONFIG_EXEC "hipconfig.exe")
|
||||
else()
|
||||
set(HIPCC_EXEC "hipcc")
|
||||
set(HIPCONFIG_EXEC "hipconfig")
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_COMPILER "${HIP_PATH}/bin/${HIPCC_EXEC}")
|
||||
set(CMAKE_CXX_COMPILER "${HIP_PATH}/bin/${HIPCC_EXEC}")
|
||||
execute_process(COMMAND ${HIP_PATH}/bin/${HIPCONFIG_EXEC} --version
|
||||
OUTPUT_VARIABLE HIP_VERSION
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
if(NOT WIN32)
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/llvm)
|
||||
find_package(amd_comgr)
|
||||
find_package(hsa-runtime64)
|
||||
link_libraries(hsa-runtime64::hsa-runtime64)
|
||||
link_libraries(amd_comgr)
|
||||
link_libraries(hiprtc-builtins)
|
||||
endif() # end BUILD_SHARED_LIBS
|
||||
endif() # end win32
|
||||
|
||||
# enforce c++17
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++17")
|
||||
|
||||
# Address sanitizer options
|
||||
if(ENABLE_ADDRESS_SANITIZER)
|
||||
message(STATUS "Building catch tests with Address Sanitizer options")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan -g -gz")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan -g -gz")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--enable-new-dtags -fuse-ld=lld -fsanitize=address -shared-libasan -g -gz -Wl,--build-id=sha1 -L${ROCM_PATH}/lib/asan")
|
||||
endif()
|
||||
|
||||
string(REPLACE "." ";" VERSION_LIST ${HIP_VERSION})
|
||||
list(GET VERSION_LIST 0 HIP_VERSION_MAJOR)
|
||||
list(GET VERSION_LIST 1 HIP_VERSION_MINOR)
|
||||
list(GET VERSION_LIST 2 HIP_VERSION_PATCH_GITHASH)
|
||||
string(REPLACE "-" ";" VERSION_LIST ${HIP_VERSION_PATCH_GITHASH})
|
||||
list(GET VERSION_LIST 0 HIP_VERSION_PATCH)
|
||||
list(GET VERSION_LIST 1 HIP_VERSION_GITHASH)
|
||||
|
||||
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
|
||||
set(HIP_PACKAGING_VERSION_PATCH ${HIP_VERSION_PATCH}.$ENV{ROCM_LIBPATCH_VERSION})
|
||||
else()
|
||||
set(HIP_PACKAGING_VERSION_PATCH ${HIP_VERSION_PATCH}-${HIP_VERSION_GITHASH})
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CATCH2_PATH)
|
||||
if(DEFINED ENV{CATCH2_PATH})
|
||||
set(CATCH2_PATH $ENV{CATCH2_PATH} CACHE STRING "Catch2 Path")
|
||||
else()
|
||||
set(CATCH2_PATH "${CMAKE_CURRENT_LIST_DIR}/external/Catch2")
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "Catch2 Path: ${CATCH2_PATH}")
|
||||
|
||||
# Set JSON Parser path
|
||||
if(NOT DEFINED JSON_PARSER)
|
||||
if(DEFINED ENV{JSON_PARSER})
|
||||
set(JSON_PARSER $ENV{JSON_PARSER} CACHE STRING "JSON Parser Path")
|
||||
else()
|
||||
set(JSON_PARSER "${CMAKE_CURRENT_LIST_DIR}/external/picojson")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
message(STATUS "Searching Catch2 in: ${CMAKE_CURRENT_LIST_DIR}/external")
|
||||
find_package(Catch2 REQUIRED
|
||||
PATHS
|
||||
${CMAKE_CURRENT_LIST_DIR}/external
|
||||
PATH_SUFFIXES
|
||||
Catch2/cmake/Catch2
|
||||
)
|
||||
include(Catch)
|
||||
include(CTest)
|
||||
|
||||
# path used for generating the *_include.cmake file
|
||||
set(CATCH2_INCLUDE ${CATCH2_PATH}/cmake/Catch2/catch_include.cmake.in)
|
||||
|
||||
include_directories(
|
||||
${CATCH2_PATH}
|
||||
"./include"
|
||||
"./kernels"
|
||||
${HIP_PATH}/include
|
||||
${JSON_PARSER}
|
||||
)
|
||||
|
||||
option(RTC_TESTING "Run tests using HIP RTC to compile the kernels" OFF)
|
||||
if (RTC_TESTING)
|
||||
add_definitions(-DRTC_TESTING=ON)
|
||||
endif()
|
||||
add_definitions(-DKERNELS_PATH="${CMAKE_CURRENT_SOURCE_DIR}/kernels/")
|
||||
|
||||
set(CATCH_BUILD_DIR catch_tests)
|
||||
set(HIP_TEST_CONFIG_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/config)
|
||||
file(MAKE_DIRECTORY ${HIP_TEST_CONFIG_BINARY_DIR})
|
||||
file(GLOB JSON_FILES "./hipTestMain/config/*.json")
|
||||
foreach(json IN LISTS JSON_FILES)
|
||||
file(COPY ${json}
|
||||
DESTINATION ${HIP_TEST_CONFIG_BINARY_DIR})
|
||||
endforeach()
|
||||
set(CATCH_SCRIPT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/script)
|
||||
file(COPY ./external/Catch2/cmake/Catch2/CatchAddTests.cmake
|
||||
DESTINATION ${CATCH_SCRIPT_BINARY_DIR})
|
||||
file(COPY ./external/Catch2/cmake/Catch2/catch_include.cmake
|
||||
DESTINATION ${CATCH_SCRIPT_BINARY_DIR})
|
||||
set(ADD_SCRIPT_PATH ${CATCH_SCRIPT_BINARY_DIR}/CatchAddTests.cmake)
|
||||
set(CATCH_INCLUDE_PATH ${CATCH_SCRIPT_BINARY_DIR}/catch_include.cmake)
|
||||
|
||||
if (WIN32)
|
||||
configure_file(catchProp_in_rc.in ${CMAKE_CURRENT_BINARY_DIR}/catchProp.rc @ONLY)
|
||||
cmake_path(SET LLVM_RC_PATH "${HIP_PATH}/../lc/bin/llvm-rc.exe")
|
||||
cmake_path(SET LLVM_RC_PATH NORMALIZE "${LLVM_RC_PATH}")
|
||||
|
||||
# generates the .res files to be used by executables to populate the properties
|
||||
# expects LC folder with clang, llvm-rc to be present one level up of HIP
|
||||
execute_process(COMMAND ${LLVM_RC_PATH} ${CMAKE_CURRENT_BINARY_DIR}/catchProp.rc
|
||||
OUTPUT_VARIABLE RC_OUTPUT)
|
||||
set(PROP_RC ${CMAKE_CURRENT_BINARY_DIR})
|
||||
# When args to linker exceeds max chars.
|
||||
# msbuild writes args to a rsp file.
|
||||
# This is used to reference the obj file correctly
|
||||
SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "")
|
||||
SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "")
|
||||
endif()
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "amd")
|
||||
add_compile_options(-Wall -Wextra -Wvla -Werror -Wno-deprecated -Wno-option-ignored)
|
||||
endif()
|
||||
|
||||
cmake_policy(PUSH)
|
||||
if(POLICY CMP0037)
|
||||
cmake_policy(SET CMP0037 OLD)
|
||||
endif()
|
||||
|
||||
# Turn off CMAKE_HIP_ARCHITECTURES Feature if cmake version is 3.21+
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.21.0)
|
||||
set(CMAKE_HIP_ARCHITECTURES OFF)
|
||||
endif()
|
||||
message(STATUS "CMAKE HIP ARCHITECTURES: ${CMAKE_HIP_ARCHITECTURES}")
|
||||
|
||||
# Note to pass arch use format like -DOFFLOAD_ARCH_STR="--offload-arch=gfx900 --offload-arch=gfx906"
|
||||
# having space at the start/end of OFFLOAD_ARCH_STR can cause build failures
|
||||
# Identify the GPU Targets.
|
||||
# This is done due to limitation of rocm_agent_enumerator
|
||||
# While building test parallelly, rocm_agent_enumerator can fail and give out an empty target
|
||||
# That results in hipcc building the test for gfx803 (the default target)
|
||||
# preference to pass arch -
|
||||
# OFFLOAD_ARCH_STR
|
||||
# rocm_agent_enumerator
|
||||
if(NOT DEFINED OFFLOAD_ARCH_STR
|
||||
AND EXISTS "${ROCM_PATH}/bin/rocm_agent_enumerator"
|
||||
AND HIP_PLATFORM STREQUAL "amd" AND UNIX)
|
||||
execute_process(COMMAND "${ROCM_PATH}/bin/rocm_agent_enumerator"
|
||||
OUTPUT_VARIABLE HIP_GPU_ARCH
|
||||
RESULT_VARIABLE ROCM_AGENT_ENUM_RESULT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
# Trim out gfx000
|
||||
string(REPLACE "gfx000\n" "" HIP_GPU_ARCH ${HIP_GPU_ARCH})
|
||||
if (NOT HIP_GPU_ARCH STREQUAL "")
|
||||
string(REGEX REPLACE "\n" ";" HIP_GPU_ARCH_LIST "${HIP_GPU_ARCH}")
|
||||
list(REMOVE_DUPLICATES HIP_GPU_ARCH_LIST)
|
||||
list(LENGTH HIP_GPU_ARCH_LIST HIP_GPU_ARCH_LEN)
|
||||
set(OFFLOAD_ARCH_STR "")
|
||||
foreach(_hip_gpu_arch ${HIP_GPU_ARCH_LIST})
|
||||
set(OFFLOAD_ARCH_STR "--offload-arch=${_hip_gpu_arch} ${OFFLOAD_ARCH_STR}")
|
||||
endforeach()
|
||||
else()
|
||||
message(STATUS "ROCm Agent Enumerator found no valid architectures")
|
||||
endif()
|
||||
elseif(DEFINED OFFLOAD_ARCH_STR)
|
||||
string(REPLACE "--offload-arch=" "" HIP_GPU_ARCH_LIST ${OFFLOAD_ARCH_STR})
|
||||
endif()
|
||||
|
||||
if(DEFINED OFFLOAD_ARCH_STR)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OFFLOAD_ARCH_STR} ")
|
||||
endif()
|
||||
message(STATUS "Using offload arch string: ${OFFLOAD_ARCH_STR}")
|
||||
|
||||
find_package(Git)
|
||||
# get hip-tests commit short hash
|
||||
execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
RESULT_VARIABLE git_result
|
||||
OUTPUT_VARIABLE git_output
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(git_result EQUAL 0)
|
||||
set(HIP_TESTS_GITHASH ${git_output})
|
||||
endif()
|
||||
|
||||
set(OFFLOAD_ARCH_LIST ${OFFLOAD_ARCH_STR})
|
||||
separate_arguments(OFFLOAD_ARCH_LIST)
|
||||
list(REMOVE_DUPLICATES OFFLOAD_ARCH_LIST)
|
||||
set(hip_gpu_arch_list "")
|
||||
foreach(__offload_arch ${OFFLOAD_ARCH_LIST})
|
||||
string(REPLACE "--offload-arch=" "" targetid ${__offload_arch})
|
||||
string(REPLACE ":" ";" target_id_list ${targetid})
|
||||
list(GET target_id_list 0 arch)
|
||||
string(APPEND hip_gpu_arch_list "${arch};")
|
||||
list(REMOVE_DUPLICATES hip_gpu_arch_list)
|
||||
endforeach()
|
||||
|
||||
if(WIN32)
|
||||
set(configToUse "config_amd_windows")
|
||||
set(config_file ${CMAKE_SOURCE_DIR}/hipTestMain/config/${configToUse})
|
||||
set(json_file ${HIP_TEST_CONFIG_BINARY_DIR}/${configToUse}.json)
|
||||
set(CLANG_CPP "${HIP_PATH}/../lc/bin/clang-cpp.exe")
|
||||
|
||||
set(cmd "${CLANG_CPP} -P -DGITHASH=\"${HIP_VERSION_GITHASH}\" ${config_file}>${json_file}")
|
||||
message(${cmd})
|
||||
execute_process(COMMAND cmd.exe /C ${cmd}
|
||||
RESULT_VARIABLE json_result)
|
||||
message(STATUS "${configToUse}.json file generation result: ${json_result}")
|
||||
else()
|
||||
set(configToUse "config_amd_linux")
|
||||
foreach(arch ${hip_gpu_arch_list})
|
||||
set(config_file ${CMAKE_SOURCE_DIR}/hipTestMain/config/${configToUse})
|
||||
set(json_file ${HIP_TEST_CONFIG_BINARY_DIR}/${configToUse}_${arch}.json)
|
||||
set(cmd "${ROCM_PATH}/llvm/bin/clang-cpp -P -D${arch} -DGITHASH=\\\"${HIP_VERSION_GITHASH}\\\" ${config_file}>${json_file}")
|
||||
message(${cmd})
|
||||
execute_process(COMMAND bash -c ${cmd}
|
||||
RESULT_VARIABLE json_result)
|
||||
message(STATUS "${configToUse}_${arch}.json file generation result: ${json_result}")
|
||||
endforeach()
|
||||
endif()
|
||||
# prints the catch info to a file
|
||||
string(TIMESTAMP _timestamp UTC)
|
||||
set(_autogen "# Auto-generated by cmake on ${_timestamp} UTC\n")
|
||||
set(_catchInfo ${_autogen} "HIP_VERSION=${HIP_VERSION}\n")
|
||||
set(_catchInfo ${_catchInfo} "HIP_PLATFORM=${HIP_PLATFORM}\n")
|
||||
set(_catchInfo ${_catchInfo} "HIP_TESTS_GITHASH=${HIP_TESTS_GITHASH}\n")
|
||||
set(_catchInfo ${_catchInfo} "ARCHS=${HIP_GPU_ARCH_LIST}\n")
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/catchInfo.txt ${_catchInfo})
|
||||
# allows user to run ctest from catch_tests level
|
||||
set(_subdirs ${_autogen} "subdirs(..)\n")
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/CTestTestfile.cmake ${_subdirs})
|
||||
find_package(Python3 COMPONENTS Interpreter REQUIRED)
|
||||
|
||||
# copy python script and headers to catch test package
|
||||
set(CATCH_INCLUDE_DIR include)
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E
|
||||
make_directory ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/${CATCH_INCLUDE_DIR})
|
||||
|
||||
file(COPY ./unit/compileAndCaptureOutput.py
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/unit )
|
||||
|
||||
file(COPY ./include/hip_test_common.hh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/${CATCH_INCLUDE_DIR})
|
||||
file(COPY ./include/hip_test_context.hh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/${CATCH_INCLUDE_DIR})
|
||||
file(COPY ./external/Catch2/catch.hpp DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${CATCH_BUILD_DIR}/${CATCH_INCLUDE_DIR})
|
||||
|
||||
# Enable device lambda on nvidia platforms
|
||||
if(HIP_PLATFORM STREQUAL "nvidia")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --extended-lambda")
|
||||
endif()
|
||||
|
||||
# Disable CXX extensions (gnu++11 etc)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
add_custom_target(build_tests)
|
||||
|
||||
# Tests folder
|
||||
add_subdirectory(unit ${CATCH_BUILD_DIR}/unit)
|
||||
add_subdirectory(ABM ${CATCH_BUILD_DIR}/ABM)
|
||||
add_subdirectory(kernels ${CATCH_BUILD_DIR}/kernels)
|
||||
add_subdirectory(hipTestMain ${CATCH_BUILD_DIR}/hipTestMain)
|
||||
add_subdirectory(stress ${CATCH_BUILD_DIR}/stress)
|
||||
add_subdirectory(TypeQualifiers ${CATCH_BUILD_DIR}/TypeQualifiers)
|
||||
add_subdirectory(perftests ${CATCH_BUILD_DIR}/perftests)
|
||||
add_subdirectory(multiproc ${CATCH_BUILD_DIR}/multiproc)
|
||||
add_subdirectory(performance ${CATCH_BUILD_DIR}/performance)
|
||||
|
||||
add_custom_target(gen_coverage
|
||||
COMMAND ${CMAKE_COMMAND} -B build/
|
||||
COMMAND ${CMAKE_COMMAND} --build build/
|
||||
COMMAND ./build/generateHipAPICoverage ${HIP_PATH}/include
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../utils/coverage
|
||||
COMMENT "Generating Test Coverage Report")
|
||||
|
||||
cmake_policy(POP)
|
||||
|
||||
# packaging the tests
|
||||
# make package_test to generate packages for test
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/)
|
||||
add_subdirectory(packaging)
|
||||
if(UNIX)
|
||||
add_custom_target(package_test COMMAND ${CMAKE_COMMAND} .
|
||||
COMMAND rm -rf *.deb *.rpm *.tar.gz
|
||||
COMMAND make package
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
else()
|
||||
file(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} CATCH_BINARY_DIR)
|
||||
add_custom_target(package_test COMMAND ${CMAKE_COMMAND} .
|
||||
COMMAND cpack
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
# Doxygen documentation
|
||||
# check if doxygen is installed
|
||||
find_package(Doxygen)
|
||||
if(DOXYGEN_FOUND)
|
||||
# Configure Doxyfile with proper paths
|
||||
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/DoxyfileTests)
|
||||
set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/DoxyfileTests.out)
|
||||
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
|
||||
|
||||
add_custom_target(build_docs
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT}
|
||||
COMMENT "Generating test plan documentation with Doxygen")
|
||||
else()
|
||||
message(STATUS "Doxygen was not found. Building test plan documentation will not be available")
|
||||
endif()
|
||||
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -0,0 +1,204 @@
|
||||
# HIP Tests - with Catch2
|
||||
|
||||
## Intro and Motivation
|
||||
HIP Tests were using HIT framework (a custom framework tailored for HIP) to add, build and run tests. As time progressed the frame got big and took substantial amount of effort to maintain and extend. It also took substantial amount of time to configure. We took this oppurtunity to rewrite the HIP's testing framework and porting the test infra to Catch2 format.
|
||||
|
||||
## How to write tests
|
||||
Tests in Catch2 are declared via ```TEST_CASE```.
|
||||
|
||||
[Please read the Catch2 documentation on how to write test cases](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/tutorial.md#top)
|
||||
|
||||
[Catch2 Detailed Reference](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/Readme.md#top)
|
||||
|
||||
## Taking care of existing features
|
||||
- Don’t build on platform: EXCLUDE_HIP_PLATFORM, can be done via CMAKE. Adding source in if(HIP_PLATFORM == amd/nvidia).
|
||||
- HIPCC_OPTIONS/CLANG Options: Can be done via: set_source_files_properties(src.cc PROPERTIES COMPILE_FLAGS “…”).
|
||||
- Additional libraries: Can be done via target_link_libraries()
|
||||
- Multiple runs with different args: This can be done by Catch’s Feature: GENERATE(…)
|
||||
Running Subtest: ctest –R “...” (Regex to match the subtest name)
|
||||
|
||||
## New Features
|
||||
- Skip test without recompiling tests, by addition of a json file. Default name is ```config.json``` , this can be overridden by using the variable ```HIP_CATCH_EXCLUDE_FILE=some_config.json```.
|
||||
- Json file supports regex. Ex: All tests which has the word ‘Memset’ can be skipped using ‘*Memset*’
|
||||
- Support multiple skip test list which can be set via environment variable, so you can have multiple files containing different skip test lists and can pick and choose among them depending on your platform and os.
|
||||
- Better CI integration via xunit compatible output
|
||||
|
||||
## Testing Context
|
||||
HIP testing framework gives you a context for each test. This context will have useful information about the environment your test is running.
|
||||
|
||||
Some useful functions are:
|
||||
- `bool isWindows()` : true if os is windows
|
||||
- `bool isLinux()` : true if os is linux
|
||||
- `bool isAmd()` : true if platform is AMD
|
||||
- `bool isNvidia()` : true if platform is NVIDIA
|
||||
|
||||
This information can be accessed in any test via using: `TestContext::get().isAmd()`.
|
||||
|
||||
## Adding test for a specific platform
|
||||
There might be some functionality which is not present on some platforms. Those tests can be hidden inside following macros.
|
||||
|
||||
- ```HT_AMD``` is 1 when tests are running on AMD platform and 0 on NVIDIA.
|
||||
- ```HT_NVIDIA``` is 1 when tests are running on NVIDIA platform and 0 on AMD
|
||||
|
||||
Usage:
|
||||
|
||||
```cpp
|
||||
#if HT_AMD
|
||||
TEST_CASE("hipExtAPIs") {
|
||||
// ...
|
||||
}
|
||||
#endif
|
||||
```
|
||||
|
||||
## Config file schema
|
||||
Some tests can be skipped using a config file placed in hipTestMain/config folder. Multiple config files can be defined for different configurations.
|
||||
The naming convention for the file needs to be "config_platform_os_archname.json"
|
||||
Platform and os are mandatory.
|
||||
Arch name is optional and takes precedence while loading the json file.
|
||||
Currently the json files need to be manually chosen by the executor for the architecture of choice.
|
||||
|
||||
example:
|
||||
config_amd_windows.json
|
||||
config_nvidia_windows.json
|
||||
|
||||
The schema of the json file is as follows:
|
||||
```json
|
||||
{
|
||||
"DisabledTests":
|
||||
[
|
||||
"TestName1",
|
||||
"TestName2",
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
- `HIP_CATCH_EXCLUDE_FILE` : This variable can be set to the config file name or full path. Disabled tests will be read from this.
|
||||
- `HT_LOG_ENABLE` : This is for debugging the HIP Test Framework itself. Setting it to 1, all `LogPrintf` will be printed on screen
|
||||
|
||||
## Test Macros
|
||||
### Single Thread Macros
|
||||
These macros are to be used when your test is calling HIP APIs via the main thread.
|
||||
|
||||
- `HIP_CHECK` : This macro takes in a HIP API and tests for its result to be either ```hipSuccess``` or ```hipErrorPeerAccessAlreadyEnabled```.
|
||||
|
||||
- Usage: ```HIP_CHECK(hipMalloc(&dPtr, 10));```
|
||||
|
||||
- ```HIP_CHECK_ERROR``` : This macro takes in a HIP API and tests its result against a provided result. This can be used when the API is expected to fail with a particular result.
|
||||
|
||||
- Usage: ```HIP_CHECK_ERROR(hipMalloc(&dPtr, 0), hipErrorInvalidValue);```
|
||||
|
||||
- ```HIPRTC_CHECK``` : This macro takes in a HIPRTC API and tests its result against HIPRTC_SUCCESS.
|
||||
|
||||
- Usage: ```HIPRTC_CHECK(hiprtcCompileProgram(prog, count, options));```
|
||||
|
||||
- ```HIP_ASSERT``` : This macro takes in a bool condition as input and does a ```REQUIRE``` on the condition.
|
||||
|
||||
- Usage: ```HIP_ASSERT(result == 10);```
|
||||
|
||||
### Multi Thread Macros
|
||||
These macros are to be used when you call HIP APIs in a multi threaded way. They exist because Catch2 ```REQUIRE``` and ```CHECK``` macros can not handle multi threaded calls. To solve this problem, two macros are added```HIP_CHECK_THREAD``` and ```REQUIRE_THREAD``` which can be used to check result of HIP APIs and test assertions respectively. The results can be validate after the threads join via ```HIP_CHECK_THREAD_FINALIZE```.
|
||||
|
||||
Note: These should used in ```std::thread``` only. For multi proc guidelines look at [MultiProc Macros](#multi-process-macros) and [SpawnProc Class](#multiproc-management-class)
|
||||
|
||||
- ```HIP_CHECK_THREAD``` : This macro takes in a HIP API and tests for its result to be either ```hipSuccess``` or ```hipErrorPeerAccessAlreadyEnabled```. It can also tell other threads if an error has occured in one of the HIP API and can prematurely stop the threads.
|
||||
|
||||
- ```REQUIRE_THREAD``` : This macro takes in a bool condition and tests for its result to be true. If this check fails, it can signal other threads to terminate early.
|
||||
|
||||
- ```HIP_CHECK_THREAD_FINALIZE``` : This macro checks for the results logged by ```HIP_CHECK_THREAD```. This needs to be called after the threads have joined.
|
||||
|
||||
Please also note that you can not return values in functions calling ```HIP_CHECK_THREAD``` or ```REQUIRE_THREAD``` macro.
|
||||
|
||||
Usage:
|
||||
|
||||
```cpp
|
||||
auto threadFunc = []() {
|
||||
int *dPtr{nullptr};
|
||||
HIP_CHECK_THREAD(hipMalloc(&dPtr, 10));
|
||||
REQUIRE_THREAD(dPtr != nullptr);
|
||||
// Some other work
|
||||
};
|
||||
|
||||
// Launch threads
|
||||
std::vector<std::thread> threadPool;
|
||||
for(...) {
|
||||
threadPool.emplace_back(std::thread(threadFunc));
|
||||
}
|
||||
|
||||
// Join threads
|
||||
for(auto &i : threadPool) {
|
||||
i.join();
|
||||
}
|
||||
|
||||
// Validate all results
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
```
|
||||
|
||||
### Skipping Tests if certain criteria is not met
|
||||
If there arises a condition where certain flag is disabled and due to which a test can not run at that time, the following macro can be of use. It will highlight the test in ctest report as well.
|
||||
|
||||
- ```HIP_SKIP_TEST``` : The api takes in an input of the reason as well and prints out the line HIP_SKIP_THIS_TEST. This causes ctest to mark the test as skipped and the test shows up in the report as skipped prompting proper response from the team.
|
||||
|
||||
Usage:
|
||||
|
||||
```cpp
|
||||
TEST_CASE("TestOnlyOnXnack") {
|
||||
if(!XNACKEnabled) {
|
||||
HipTest::HIP_SKIP_TEST("Test only runs on system with XNACK enabled");
|
||||
return;
|
||||
}
|
||||
// Rest of test functionality
|
||||
}
|
||||
```
|
||||
|
||||
### Multi Process Macros
|
||||
These macros are to be called in multi process tests, inside a process which gets spawned. The reasoning is the same, Catch2 does not support multi process checks.
|
||||
|
||||
- ```HIPCHECK``` : Same as ```HIP_CHECK``` but will not call Catch2's ```REQUIRE``` on the HIP API. It will print if there is a mismatch and exit the process.
|
||||
|
||||
- ```HIPASSERT``` : Same as ```HIP_ASSERT``` but will not call Catch2's ```REQUIRE``` on the HIP API. It will print if there is a mismatch and exit the process.
|
||||
|
||||
## MultiProc Management Class
|
||||
There is a special interface available for process isolation. ```hip::SpawnProc``` in ```hip_test_process.hh```. Using this interface test can spawn a process and place passing conditions on its return value or its output to stdout. This can be useful for testing printf output.
|
||||
Sample Usage:
|
||||
```cpp
|
||||
hip::SpawnProc proc(<name of exe>, <optional bool value, if output is to be recorded>);
|
||||
REQUIRE(0 == proc.run()); // Test of return value of the proc
|
||||
REQUIRE(exepctedOutput == proc.getOutput()); // Test on expected output of the process
|
||||
```
|
||||
The process must be a standalone exe inside the same folder as other tests.
|
||||
|
||||
## Enabling New Tests
|
||||
Initially, the new tests can be enabled via using ```-DHIP_CATCH_TEST=1```. After porting existing tests, this will be turned on by default.
|
||||
|
||||
## Building a single test
|
||||
```bash
|
||||
hipcc <path_to_test.cpp> -I<HIP_SRC_DIR>/tests/catch/include <HIP_SRC_DIR>/tests/catch/hipTestMain/standalone_main.cc -I<HIP_SRC_DIR>/tests/catch/external/Catch2 -g -o <out_file_name>
|
||||
```
|
||||
|
||||
## Debugging support
|
||||
Catch2 allows multiple ways in which you can debug the test case.
|
||||
- `-b` options breaks into a debugger as soon as there is a failure encountered [Catch2 Options Reference](https://github.com/catchorg/Catch2/blob/devel/docs/command-line.md#breaking-into-the-debugger)
|
||||
- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure
|
||||
- User can also call [CATCH_BREAK_INTO_DEBUGGER](https://github.com/catchorg/Catch2/blob/devel/docs/configuration.md#overriding-catchs-debug-break--b) macro to break at a certain point in a test case.
|
||||
- User can also mention filename.cc:__LineNumber__ to break into a test case via gdb.
|
||||
|
||||
## External Libs being used
|
||||
- [Catch2](https://github.com/catchorg/Catch2) - Testing framework
|
||||
- [picojson](https://github.com/kazuho/picojson) - For config file parsing
|
||||
|
||||
# Testing Guidelines
|
||||
Tests fall in 5 categories and its file name prefix are as follows:
|
||||
- Unit tests (Prefix: Unit_\*API\*_\*Optional Scenario\*, example : Unit_hipMalloc_Negative or Unit_hipMalloc): Unit Tests are simplest test for an API, the target here is to test the API with different types of input and different ways of calling.
|
||||
- Application Behavior Modelling tests (Prefix: ABM_\*Intent\*_\*Optional Scenario\*, example: ABM_ModuleLoadAndRun): ABM tests are used to model a specific use case of HIP APIs, either seen in a customer app or a general purpose app. It mimics the calling behavior seen in aforementioned app.
|
||||
- Stress/Scale tests (Prefix: Stress_\*API\*_\*Intent\*_\*Optional Scenario\*, example: Stress_hipMemset_ExhaustVRAM): These tests are used to see the behavior of HIP APIs in edge scenarios, for example what happens when we have exhausted vram and do a hipMalloc or run many instances of same API in parallel.
|
||||
- Multi Process tests (Prefix: MultiProc_\*API\*_\*Optional Scenario\*, example: MultiProc_hipIPCMemHandle_GetDataFromProc): These tests are multi process tests and will only run on linux. They are used to test HIP APIs in multi process environment
|
||||
- Performance tests(Prefix: Perf_\*Intent\*_\*Optional Scenario\*, example: Perf_DispatchLatenc y): Performance tests are used to get results of HIP APIs.
|
||||
|
||||
# General Guidelines:
|
||||
- Do not use the catch2 tags. Tags wont be used for filtering
|
||||
- Add as many INFO() as you can in tests which prints state of the t est, this will help the debugger when the test fails (INFO macro only prints when the test fails)
|
||||
- Check return of each HIP API and fail whenever there is a misma tch with hipSuccess or hiprtcSuccess.
|
||||
- Each Category of test will hav e its own exe and catch_discover_test macro will be called on it to discover its tests
|
||||
- Optional Scenario in test names are optional. For example you can test all Scenarios of hipMalloc API in one file, you can name the file Unit_hipMalloc, if you are having a file just for negative scenarios you can name it as Unit_hipMalloc_Negative.
|
||||
@@ -0,0 +1,8 @@
|
||||
# Common Tests
|
||||
set(TEST_SRC
|
||||
hipManagedKeyword.cc
|
||||
)
|
||||
|
||||
hip_add_exe_to_target(NAME TypeQualifiers
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests)
|
||||
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase verifies the hipManagedKeyword basic scenario
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#define N 1048576
|
||||
__managed__ float A[N]; // Accessible by ALL CPU and GPU functions !!!
|
||||
__managed__ float B[N];
|
||||
__managed__ int x = 0;
|
||||
|
||||
__global__ void add(const float *A, float *B) {
|
||||
int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < N; i += stride)
|
||||
B[i] = A[i] + B[i];
|
||||
}
|
||||
|
||||
__global__ void GPU_func() {
|
||||
x++;
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipManagedKeyword_SingleGpu") {
|
||||
for (int i = 0; i < N; i++) {
|
||||
A[i] = 1.0f;
|
||||
B[i] = 2.0f;
|
||||
}
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (N + blockSize - 1) / blockSize;
|
||||
dim3 dimGrid(numBlocks, 1, 1);
|
||||
dim3 dimBlock(blockSize, 1, 1);
|
||||
hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0, static_cast<const float*>(A),
|
||||
static_cast<float*>(B));
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
float maxError = 0.0f;
|
||||
for (int i = 0; i < N; i++)
|
||||
maxError = fmax(maxError, fabs(B[i]-3.0f));
|
||||
|
||||
REQUIRE(maxError == 0.0f);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipManagedKeyword_MultiGpu") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
|
||||
for (int i = 0; i < numDevices; i++){
|
||||
int managed_memory = 0;
|
||||
HIPCHECK(hipDeviceGetAttribute(&managed_memory,
|
||||
hipDeviceAttributeManagedMemory,
|
||||
i));
|
||||
if (!managed_memory) {
|
||||
HipTest::HIP_SKIP_TEST("managed memory access not supported on device");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
GPU_func<<< 1, 1 >>>();
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
REQUIRE(x == numDevices);
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
#include <windows.h>
|
||||
|
||||
#define HIP_VERSION "@HIP_VERSION@"
|
||||
#define HIP_VERSION_MAJOR @HIP_VERSION_MAJOR@
|
||||
#define HIP_VERSION_MINOR @HIP_VERSION_MINOR@
|
||||
#define HIP_VERSION_PATCH @HIP_VERSION_PATCH@
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION HIP_VERSION_MAJOR, HIP_VERSION_MINOR , HIP_VERSION_PATCH
|
||||
PRODUCTVERSION 10,1
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS VS_FF_DEBUG
|
||||
#else
|
||||
FILEFLAGS 0x0L
|
||||
#endif
|
||||
FILEOS VOS_NT_WINDOWS32
|
||||
FILETYPE VFT_APP
|
||||
FILESUBTYPE VFT2_UNKNOWN
|
||||
BEGIN
|
||||
BLOCK "StringFileInfo"
|
||||
BEGIN
|
||||
BLOCK "040904b0"
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Advanced Micro Devices Inc.\0"
|
||||
VALUE "FileDescription", "HIP unit tests"
|
||||
VALUE "FileVersion", "amdhip64.dll" HIP_VERSION
|
||||
VALUE "LegalCopyright", "Copyright (C) 2022 Advanced Micro Devices Inc.\0"
|
||||
VALUE "ProductName", "HIP unit tests"
|
||||
VALUE "ProductVersion", HIP_VERSION
|
||||
VALUE "Comments", "\0"
|
||||
VALUE "InternalName", "HIP unit tests"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
BEGIN
|
||||
VALUE "Translation", 0x0409, 1200
|
||||
END
|
||||
END
|
||||
/* End of Version info */
|
||||
@@ -0,0 +1,23 @@
|
||||
Boost Software License - Version 1.0 - August 17th, 2003
|
||||
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
+17881
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -0,0 +1,438 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
#[=======================================================================[.rst:
|
||||
Catch
|
||||
-----
|
||||
|
||||
This module defines a function to help use the Catch test framework.
|
||||
|
||||
The :command:`catch_discover_tests` discovers tests by asking the compiled test
|
||||
executable to enumerate its tests. This does not require CMake to be re-run
|
||||
when tests change. However, it may not work in a cross-compiling environment,
|
||||
and setting test properties is less convenient.
|
||||
|
||||
This command is intended to replace use of :command:`add_test` to register
|
||||
tests, and will create a separate CTest test for each Catch test case. Note
|
||||
that this is in some cases less efficient, as common set-up and tear-down logic
|
||||
cannot be shared by multiple test cases executing in the same instance.
|
||||
However, it provides more fine-grained pass/fail information to CTest, which is
|
||||
usually considered as more beneficial. By default, the CTest test name is the
|
||||
same as the Catch name; see also ``TEST_PREFIX`` and ``TEST_SUFFIX``.
|
||||
|
||||
.. command:: catch_discover_tests
|
||||
|
||||
Automatically add tests with CTest by querying the compiled test executable
|
||||
for available tests::
|
||||
|
||||
catch_discover_tests(target
|
||||
[TEST_SPEC arg1...]
|
||||
[EXTRA_ARGS arg1...]
|
||||
[WORKING_DIRECTORY dir]
|
||||
[TEST_PREFIX prefix]
|
||||
[TEST_SUFFIX suffix]
|
||||
[PROPERTIES name1 value1...]
|
||||
[TEST_LIST var]
|
||||
[REPORTER reporter]
|
||||
[OUTPUT_DIR dir]
|
||||
[OUTPUT_PREFIX prefix}
|
||||
[OUTPUT_SUFFIX suffix]
|
||||
)
|
||||
|
||||
``catch_discover_tests`` sets up a post-build command on the test executable
|
||||
that generates the list of tests by parsing the output from running the test
|
||||
with the ``--list-test-names-only`` argument. This ensures that the full
|
||||
list of tests is obtained. Since test discovery occurs at build time, it is
|
||||
not necessary to re-run CMake when the list of tests changes.
|
||||
However, it requires that :prop_tgt:`CROSSCOMPILING_EMULATOR` is properly set
|
||||
in order to function in a cross-compiling environment.
|
||||
|
||||
Additionally, setting properties on tests is somewhat less convenient, since
|
||||
the tests are not available at CMake time. Additional test properties may be
|
||||
assigned to the set of tests as a whole using the ``PROPERTIES`` option. If
|
||||
more fine-grained test control is needed, custom content may be provided
|
||||
through an external CTest script using the :prop_dir:`TEST_INCLUDE_FILES`
|
||||
directory property. The set of discovered tests is made accessible to such a
|
||||
script via the ``<target>_TESTS`` variable.
|
||||
|
||||
The options are:
|
||||
|
||||
``target``
|
||||
Specifies the Catch executable, which must be a known CMake executable
|
||||
target. CMake will substitute the location of the built executable when
|
||||
running the test.
|
||||
|
||||
``TEST_SPEC arg1...``
|
||||
Specifies test cases, wildcarded test cases, tags and tag expressions to
|
||||
pass to the Catch executable with the ``--list-test-names-only`` argument.
|
||||
|
||||
``EXTRA_ARGS arg1...``
|
||||
Any extra arguments to pass on the command line to each test case.
|
||||
|
||||
``WORKING_DIRECTORY dir``
|
||||
Specifies the directory in which to run the discovered test cases. If this
|
||||
option is not provided, the current binary directory is used.
|
||||
|
||||
``TEST_PREFIX prefix``
|
||||
Specifies a ``prefix`` to be prepended to the name of each discovered test
|
||||
case. This can be useful when the same test executable is being used in
|
||||
multiple calls to ``catch_discover_tests()`` but with different
|
||||
``TEST_SPEC`` or ``EXTRA_ARGS``.
|
||||
|
||||
``TEST_SUFFIX suffix``
|
||||
Similar to ``TEST_PREFIX`` except the ``suffix`` is appended to the name of
|
||||
every discovered test case. Both ``TEST_PREFIX`` and ``TEST_SUFFIX`` may
|
||||
be specified.
|
||||
|
||||
``PROPERTIES name1 value1...``
|
||||
Specifies additional properties to be set on all tests discovered by this
|
||||
invocation of ``catch_discover_tests``.
|
||||
|
||||
``TEST_LIST var``
|
||||
Make the list of tests available in the variable ``var``, rather than the
|
||||
default ``<target>_TESTS``. This can be useful when the same test
|
||||
executable is being used in multiple calls to ``catch_discover_tests()``.
|
||||
Note that this variable is only available in CTest.
|
||||
|
||||
``REPORTER reporter``
|
||||
Use the specified reporter when running the test case. The reporter will
|
||||
be passed to the Catch executable as ``--reporter reporter``.
|
||||
|
||||
``OUTPUT_DIR dir``
|
||||
If specified, the parameter is passed along as
|
||||
``--out dir/<test_name>`` to Catch executable. The actual file name is the
|
||||
same as the test name. This should be used instead of
|
||||
``EXTRA_ARGS --out foo`` to avoid race conditions writing the result output
|
||||
when using parallel test execution.
|
||||
|
||||
``OUTPUT_PREFIX prefix``
|
||||
May be used in conjunction with ``OUTPUT_DIR``.
|
||||
If specified, ``prefix`` is added to each output file name, like so
|
||||
``--out dir/prefix<test_name>``.
|
||||
|
||||
``OUTPUT_SUFFIX suffix``
|
||||
May be used in conjunction with ``OUTPUT_DIR``.
|
||||
If specified, ``suffix`` is added to each output file name, like so
|
||||
``--out dir/<test_name>suffix``. This can be used to add a file extension to
|
||||
the output e.g. ".xml".
|
||||
|
||||
#]=======================================================================]
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# TARGET_LIST TEST_SET
|
||||
function(catch_discover_tests_compile_time_detection TARGET TEST_SET)
|
||||
cmake_parse_arguments(
|
||||
""
|
||||
""
|
||||
"TEST_PREFIX;TEST_SUFFIX;WORKING_DIRECTORY;TEST_LIST;REPORTER;OUTPUT_DIR;OUTPUT_PREFIX;OUTPUT_SUFFIX"
|
||||
"TEST_SPEC;EXTRA_ARGS;PROPERTIES"
|
||||
${ARGN}
|
||||
)
|
||||
|
||||
if(NOT _WORKING_DIRECTORY)
|
||||
set(_WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
endif()
|
||||
if(NOT _TEST_LIST)
|
||||
set(_TEST_LIST ${TARGET}_TESTS)
|
||||
endif()
|
||||
|
||||
## Generate a unique name based on the extra arguments
|
||||
string(SHA1 args_hash "${_TEST_SPEC} ${_EXTRA_ARGS} ${_REPORTER} ${_OUTPUT_DIR} ${_OUTPUT_PREFIX} ${_OUTPUT_SUFFIX}")
|
||||
string(SUBSTRING ${args_hash} 0 7 args_hash)
|
||||
|
||||
# Define rule to generate test list for aforementioned test executable
|
||||
set(ctest_include_file "${CMAKE_CURRENT_BINARY_DIR}/${TEST_SET}_include-${args_hash}.cmake")
|
||||
set(ctest_tests_file "${CMAKE_CURRENT_BINARY_DIR}/${TEST_SET}_tests-${args_hash}.cmake")
|
||||
|
||||
foreach(EXE_NAME ${TARGET})
|
||||
|
||||
add_custom_command(
|
||||
TARGET ${EXE_NAME} POST_BUILD
|
||||
COMMAND "${CMAKE_COMMAND}"
|
||||
-D "TEST_TARGET=${EXE_NAME}"
|
||||
-D "TEST_EXECUTABLE=$<TARGET_FILE:${EXE_NAME}>"
|
||||
-D "TEST_EXECUTOR=${crosscompiling_emulator}"
|
||||
-D "TEST_WORKING_DIR=${_WORKING_DIRECTORY}"
|
||||
-D "TEST_SPEC=${_TEST_SPEC}"
|
||||
-D "TEST_EXTRA_ARGS=${_EXTRA_ARGS}"
|
||||
-D "TEST_PROPERTIES=${_PROPERTIES}"
|
||||
-D "TEST_PREFIX=${_TEST_PREFIX}"
|
||||
-D "TEST_SUFFIX=${_TEST_SUFFIX}"
|
||||
-D "TEST_LIST=${_TEST_LIST}"
|
||||
-D "TEST_REPORTER=${_REPORTER}"
|
||||
-D "TEST_OUTPUT_DIR=${_OUTPUT_DIR}"
|
||||
-D "TEST_OUTPUT_PREFIX=${_OUTPUT_PREFIX}"
|
||||
-D "TEST_OUTPUT_SUFFIX=${_OUTPUT_SUFFIX}"
|
||||
-D "CTEST_FILE=${ctest_tests_file}"
|
||||
-P "${_CATCH_DISCOVER_TESTS_SCRIPT}"
|
||||
VERBATIM
|
||||
)
|
||||
endforeach()
|
||||
|
||||
file(RELATIVE_PATH ctestincludepath ${CMAKE_CURRENT_BINARY_DIR} ${ctest_include_file})
|
||||
file(RELATIVE_PATH ctestfilepath ${CMAKE_CURRENT_BINARY_DIR} ${ctest_tests_file})
|
||||
|
||||
file(WRITE "${ctest_include_file}"
|
||||
"if(EXISTS \"${ctestfilepath}\")\n"
|
||||
" include(\"${ctestfilepath}\")\n"
|
||||
"else()\n"
|
||||
" message(WARNING \"Test ${TARGET} not built yet.\")\n"
|
||||
"endif()\n"
|
||||
)
|
||||
|
||||
if(NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
|
||||
# Add discovered tests to directory TEST_INCLUDE_FILES
|
||||
set_property(DIRECTORY
|
||||
APPEND PROPERTY TEST_INCLUDE_FILES "${ctestincludepath}"
|
||||
)
|
||||
else()
|
||||
# Add discovered tests as directory TEST_INCLUDE_FILE if possible
|
||||
get_property(test_include_file_set DIRECTORY PROPERTY TEST_INCLUDE_FILE SET)
|
||||
if (NOT ${test_include_file_set})
|
||||
set_property(DIRECTORY
|
||||
PROPERTY TEST_INCLUDE_FILE "${ctestincludepath}"
|
||||
)
|
||||
else()
|
||||
message(FATAL_ERROR
|
||||
"Cannot set more than one TEST_INCLUDE_FILE"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endfunction()
|
||||
|
||||
###############################################################################
|
||||
|
||||
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# current staging
|
||||
function(catch_discover_tests TARGET)
|
||||
cmake_parse_arguments(
|
||||
""
|
||||
""
|
||||
"TEST_PREFIX;TEST_SUFFIX;WORKING_DIRECTORY;TEST_LIST;REPORTER;OUTPUT_DIR;OUTPUT_PREFIX;OUTPUT_SUFFIX"
|
||||
"TEST_SPEC;EXTRA_ARGS;PROPERTIES"
|
||||
${ARGN}
|
||||
)
|
||||
|
||||
if(NOT _WORKING_DIRECTORY)
|
||||
set(_WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
endif()
|
||||
get_property(crosscompiling_emulator
|
||||
TARGET ${TARGET}
|
||||
PROPERTY CROSSCOMPILING_EMULATOR
|
||||
)
|
||||
## Generate a unique name based on the extra arguments
|
||||
string(SHA1 args_hash "${_TEST_SPEC} ${_EXTRA_ARGS} ${_REPORTER} ${_OUTPUT_DIR} ${_OUTPUT_PREFIX} ${_OUTPUT_SUFFIX}")
|
||||
string(SUBSTRING ${args_hash} 0 7 args_hash)
|
||||
# Define rule to generate test list for aforementioned test executable
|
||||
set(ctest_include_file_build "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_include_build-${args_hash}.cmake")
|
||||
set(ctest_include_file_install "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_include_install-${args_hash}.cmake")
|
||||
set(ctest_tests_file_name "${TARGET}_tests-${args_hash}.cmake")
|
||||
set(ctest_tests_file "${CMAKE_CURRENT_BINARY_DIR}/${ctest_tests_file_name}")
|
||||
file(RELATIVE_PATH ctest_include_rel_path ${CMAKE_CURRENT_BINARY_DIR} ${ctest_include_file_build})
|
||||
file(RELATIVE_PATH ctest_file_rel_path ${CMAKE_CURRENT_BINARY_DIR} ${ctest_tests_file})
|
||||
file(RELATIVE_PATH _CATCH_ADD_TEST_SCRIPT ${CMAKE_CURRENT_BINARY_DIR} ${ADD_SCRIPT_PATH})
|
||||
file(RELATIVE_PATH CATCH_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR} ${CATCH_INCLUDE_PATH})
|
||||
if(NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
|
||||
# write build time include file
|
||||
file(WRITE ${ctest_include_file_build} "set(_TARGET_EXECUTABLE ${TARGET})\n")
|
||||
file(APPEND ${ctest_include_file_build} "set(TARGET ${TARGET})\n")
|
||||
file(APPEND ${ctest_include_file_build} "set(_TEST_LIST ${TARGET}_TESTS)\n")
|
||||
file(APPEND ${ctest_include_file_build} "set(ctestfilepath ${ctest_file_rel_path})\n")
|
||||
file(APPEND ${ctest_include_file_build} "set(_CATCH_ADD_TEST_SCRIPT ${_CATCH_ADD_TEST_SCRIPT})\n")
|
||||
file(APPEND ${ctest_include_file_build} "set(crosscompiling_emulator ${crosscompiling_emulator})\n")
|
||||
file(APPEND ${ctest_include_file_build} "set(_PROPERTIES ${_PROPERTIES})\n")
|
||||
file(APPEND ${ctest_include_file_build} "include(${CATCH_INCLUDE_PATH})\n")
|
||||
# Add discovered tests to directory TEST_INCLUDE_FILES
|
||||
set_property(DIRECTORY
|
||||
APPEND PROPERTY TEST_INCLUDE_FILES "${ctest_include_rel_path}"
|
||||
)
|
||||
|
||||
# write install time include file
|
||||
file(WRITE ${ctest_include_file_install} "set(_TARGET_EXECUTABLE ${TARGET})\n")
|
||||
file(APPEND ${ctest_include_file_install} "set(TARGET ${TARGET})\n")
|
||||
file(APPEND ${ctest_include_file_install} "set(_TEST_LIST ${TARGET}_TESTS)\n")
|
||||
file(APPEND ${ctest_include_file_install} "set(ctestfilepath script/${ctest_tests_file_name})\n")
|
||||
file(APPEND ${ctest_include_file_install} "set(_CATCH_ADD_TEST_SCRIPT script/CatchAddTests.cmake)\n")
|
||||
file(APPEND ${ctest_include_file_install} "set(crosscompiling_emulator ${crosscompiling_emulator})\n")
|
||||
file(APPEND ${ctest_include_file_install} "set(_PROPERTIES ${_PROPERTIES})\n")
|
||||
file(APPEND ${ctest_include_file_install} "include(script/catch_include.cmake)\n")
|
||||
|
||||
set_property(GLOBAL
|
||||
APPEND PROPERTY G_INSTALL_CTEST_INCLUDE_FILES "${ctest_include_file_install}"
|
||||
)
|
||||
endif()
|
||||
|
||||
endfunction()
|
||||
|
||||
###############################################################################
|
||||
|
||||
set(_CATCH_DISCOVER_TESTS_SCRIPT
|
||||
${CMAKE_CURRENT_LIST_DIR}/CatchAddTests.cmake
|
||||
CACHE INTERNAL "Catch2 full path to CatchAddTests.cmake helper file"
|
||||
)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# function to be called by all tests
|
||||
function(hip_add_exe_to_target_compile_time_detection)
|
||||
set(options)
|
||||
# NAME EventTest, TEST_SRC src, TEST_TARGET_NAME build_tests
|
||||
set(args NAME TEST_TARGET_NAME PLATFORM COMPILE_OPTIONS)
|
||||
set(list_args TEST_SRC LINKER_LIBS COMMON_SHARED_SRC PROPERTY)
|
||||
cmake_parse_arguments(
|
||||
PARSE_ARGV 0
|
||||
"" # variable prefix
|
||||
"${options}"
|
||||
"${args}"
|
||||
"${list_args}"
|
||||
)
|
||||
|
||||
foreach(SRC_NAME ${TEST_SRC})
|
||||
if(NOT STANDALONE_TESTS EQUAL "1")
|
||||
set(_EXE_NAME ${_NAME})
|
||||
# take the entire source set for building the executable
|
||||
set(SRC_NAME ${TEST_SRC})
|
||||
else()
|
||||
# strip extension of src and use exe name as src name
|
||||
get_filename_component(_EXE_NAME ${SRC_NAME} NAME_WLE)
|
||||
endif()
|
||||
|
||||
if(NOT RTC_TESTING)
|
||||
add_executable(${_EXE_NAME} EXCLUDE_FROM_ALL ${SRC_NAME} ${COMMON_SHARED_SRC} $<TARGET_OBJECTS:Main_Object> $<TARGET_OBJECTS:KERNELS>)
|
||||
else ()
|
||||
add_executable(${_EXE_NAME} EXCLUDE_FROM_ALL ${SRC_NAME} ${COMMON_SHARED_SRC} $<TARGET_OBJECTS:Main_Object>)
|
||||
if(HIP_PLATFORM STREQUAL "amd")
|
||||
target_link_libraries(${_EXE_NAME} hiprtc)
|
||||
else()
|
||||
target_link_libraries(${_EXE_NAME} nvrtc)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
if(UNIX)
|
||||
set(_LINKER_LIBS ${_LINKER_LIBS} stdc++fs)
|
||||
set(_LINKER_LIBS ${_LINKER_LIBS} -ldl)
|
||||
else()
|
||||
# res files are built resource files using rc files.
|
||||
# use llvm-rc exe to build the res files
|
||||
# Thes are used to populate the properties of the built executables
|
||||
if(EXISTS "${PROP_RC}/catchProp.res")
|
||||
set(_LINKER_LIBS ${_LINKER_LIBS} "${PROP_RC}/catchProp.res")
|
||||
endif()
|
||||
#set(_LINKER_LIBS ${_LINKER_LIBS} -noAutoResponse)
|
||||
endif()
|
||||
|
||||
if(DEFINED _LINKER_LIBS)
|
||||
target_link_libraries(${_EXE_NAME} ${_LINKER_LIBS})
|
||||
endif()
|
||||
|
||||
# Add dependency on build_tests to build it on this custom target
|
||||
add_dependencies(${_TEST_TARGET_NAME} ${_EXE_NAME})
|
||||
# add_dependencies(${_TEST_TARGET_NAME} ${_EXE_NAME})
|
||||
|
||||
if (DEFINED _PROPERTY)
|
||||
set_property(TARGET ${_EXE_NAME} PROPERTY ${_PROPERTY})
|
||||
endif()
|
||||
|
||||
if (DEFINED _COMPILE_OPTIONS)
|
||||
target_compile_options(${_EXE_NAME} PUBLIC ${_COMPILE_OPTIONS})
|
||||
endif()
|
||||
foreach(arg IN LISTS _UNPARSED_ARGUMENTS)
|
||||
message(WARNING "Unparsed arguments: ${arg}")
|
||||
endforeach()
|
||||
get_property(crosscompiling_emulator
|
||||
TARGET ${_EXE_NAME}
|
||||
PROPERTY CROSSCOMPILING_EMULATOR
|
||||
)
|
||||
set(_EXE_NAME_LIST ${_EXE_NAME_LIST} ${_EXE_NAME})
|
||||
if(NOT STANDALONE_TESTS EQUAL "1")
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
catch_discover_tests("${_EXE_NAME_LIST}" "${_NAME}" PROPERTIES SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST")
|
||||
endfunction()
|
||||
|
||||
###############################################################################
|
||||
# current staging
|
||||
# function to be called by all tests
|
||||
function(hip_add_exe_to_target)
|
||||
set(options)
|
||||
set(args NAME TEST_TARGET_NAME PLATFORM COMPILE_OPTIONS)
|
||||
set(list_args TEST_SRC LINKER_LIBS COMMON_SHARED_SRC PROPERTY)
|
||||
cmake_parse_arguments(
|
||||
PARSE_ARGV 0
|
||||
"" # variable prefix
|
||||
"${options}"
|
||||
"${args}"
|
||||
"${list_args}"
|
||||
)
|
||||
foreach(SRC_NAME ${TEST_SRC})
|
||||
|
||||
if(NOT STANDALONE_TESTS EQUAL "1")
|
||||
set(_EXE_NAME ${_NAME})
|
||||
set(SRC_NAME ${TEST_SRC})
|
||||
else()
|
||||
# strip extension of src and use exe name as src name
|
||||
get_filename_component(_EXE_NAME ${SRC_NAME} NAME_WLE)
|
||||
endif()
|
||||
|
||||
# Create shared lib of all tests
|
||||
if(NOT RTC_TESTING)
|
||||
add_executable(${_EXE_NAME} EXCLUDE_FROM_ALL ${SRC_NAME} ${COMMON_SHARED_SRC} $<TARGET_OBJECTS:Main_Object> $<TARGET_OBJECTS:KERNELS>)
|
||||
else ()
|
||||
add_executable(${_EXE_NAME} EXCLUDE_FROM_ALL ${SRC_NAME} ${COMMON_SHARED_SRC} $<TARGET_OBJECTS:Main_Object>)
|
||||
if(HIP_PLATFORM STREQUAL "amd")
|
||||
target_link_libraries(${_EXE_NAME} hiprtc)
|
||||
else()
|
||||
target_link_libraries(${_EXE_NAME} nvrtc)
|
||||
endif()
|
||||
endif()
|
||||
if (DEFINED _PROPERTY)
|
||||
set_property(TARGET ${_EXE_NAME} PROPERTY ${_PROPERTY})
|
||||
endif()
|
||||
if(UNIX)
|
||||
set(_LINKER_LIBS ${_LINKER_LIBS} stdc++fs)
|
||||
set(_LINKER_LIBS ${_LINKER_LIBS} -ldl)
|
||||
set(_LINKER_LIBS ${_LINKER_LIBS} pthread)
|
||||
set(_LINKER_LIBS ${_LINKER_LIBS} rt)
|
||||
else()
|
||||
# res files are built resource files using rc files.
|
||||
# use llvm-rc exe to build the res files
|
||||
# Thes are used to populate the properties of the built executables
|
||||
if(EXISTS "${PROP_RC}/catchProp.res")
|
||||
set(_LINKER_LIBS ${_LINKER_LIBS} "${PROP_RC}/catchProp.res")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(DEFINED _LINKER_LIBS)
|
||||
target_link_libraries(${_EXE_NAME} ${_LINKER_LIBS})
|
||||
endif()
|
||||
|
||||
# Add dependency on build_tests to build it on this custom target
|
||||
add_dependencies(${_TEST_TARGET_NAME} ${_EXE_NAME})
|
||||
|
||||
if (DEFINED _COMPILE_OPTIONS)
|
||||
target_compile_options(${_EXE_NAME} PUBLIC ${_COMPILE_OPTIONS})
|
||||
endif()
|
||||
|
||||
foreach(arg IN LISTS _UNPARSED_ARGUMENTS)
|
||||
message(WARNING "Unparsed arguments: ${arg}")
|
||||
endforeach()
|
||||
# add binary to global list of binaries to install
|
||||
set_property(GLOBAL APPEND PROPERTY G_INSTALL_EXE_TARGETS ${_EXE_NAME})
|
||||
catch_discover_tests("${_EXE_NAME}" PROPERTIES SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST")
|
||||
|
||||
if(NOT STANDALONE_TESTS EQUAL "1")
|
||||
break()
|
||||
endif()
|
||||
|
||||
endforeach()
|
||||
|
||||
endfunction()
|
||||
|
||||
+34
@@ -0,0 +1,34 @@
|
||||
|
||||
####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() #######
|
||||
####### Any changes to this file will be overwritten by the next CMake run ####
|
||||
####### The input file was Catch2Config.cmake.in ########
|
||||
|
||||
get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
|
||||
|
||||
macro(set_and_check _var _file)
|
||||
set(${_var} "${_file}")
|
||||
if(NOT EXISTS "${_file}")
|
||||
message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(check_required_components _NAME)
|
||||
foreach(comp ${${_NAME}_FIND_COMPONENTS})
|
||||
if(NOT ${_NAME}_${comp}_FOUND)
|
||||
if(${_NAME}_FIND_REQUIRED_${comp})
|
||||
set(${_NAME}_FOUND FALSE)
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
####################################################################################
|
||||
|
||||
|
||||
# Avoid repeatedly including the targets
|
||||
if(NOT TARGET Catch2::Catch2)
|
||||
# Provide path for scripts
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/Catch2Targets.cmake)
|
||||
endif()
|
||||
+51
@@ -0,0 +1,51 @@
|
||||
# This is a basic version file for the Config-mode of find_package().
|
||||
# It is used by write_basic_package_version_file() as input file for configure_file()
|
||||
# to create a version-file which can be installed along a config.cmake file.
|
||||
#
|
||||
# The created file sets PACKAGE_VERSION_EXACT if the current version string and
|
||||
# the requested version string are exactly the same and it sets
|
||||
# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version,
|
||||
# but only if the requested major version is the same as the current one.
|
||||
# The variable CVF_VERSION must be set before calling configure_file().
|
||||
|
||||
|
||||
set(PACKAGE_VERSION "2.13.6")
|
||||
|
||||
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)
|
||||
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||
else()
|
||||
|
||||
if("2.13.6" MATCHES "^([0-9]+)\\.")
|
||||
set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}")
|
||||
else()
|
||||
set(CVF_VERSION_MAJOR "2.13.6")
|
||||
endif()
|
||||
|
||||
if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR)
|
||||
set(PACKAGE_VERSION_COMPATIBLE TRUE)
|
||||
else()
|
||||
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||
endif()
|
||||
|
||||
if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)
|
||||
set(PACKAGE_VERSION_EXACT TRUE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
# if the installed project requested no architecture check, don't perform the check
|
||||
if("FALSE")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it:
|
||||
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "" STREQUAL "")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# check that the installed version has the same 32/64bit-ness as the one which is currently searching:
|
||||
if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "")
|
||||
math(EXPR installedBits " * 8")
|
||||
set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)")
|
||||
set(PACKAGE_VERSION_UNSUITABLE TRUE)
|
||||
endif()
|
||||
+99
@@ -0,0 +1,99 @@
|
||||
# Generated by CMake
|
||||
|
||||
if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5)
|
||||
message(FATAL_ERROR "CMake >= 2.6.0 required")
|
||||
endif()
|
||||
cmake_policy(PUSH)
|
||||
cmake_policy(VERSION 2.6...3.17)
|
||||
#----------------------------------------------------------------
|
||||
# Generated CMake target import file.
|
||||
#----------------------------------------------------------------
|
||||
|
||||
# Commands may need to know the format version.
|
||||
set(CMAKE_IMPORT_FILE_VERSION 1)
|
||||
|
||||
# Protect against multiple inclusion, which would fail when already imported targets are added once more.
|
||||
set(_targetsDefined)
|
||||
set(_targetsNotDefined)
|
||||
set(_expectedTargets)
|
||||
foreach(_expectedTarget Catch2::Catch2)
|
||||
list(APPEND _expectedTargets ${_expectedTarget})
|
||||
if(NOT TARGET ${_expectedTarget})
|
||||
list(APPEND _targetsNotDefined ${_expectedTarget})
|
||||
endif()
|
||||
if(TARGET ${_expectedTarget})
|
||||
list(APPEND _targetsDefined ${_expectedTarget})
|
||||
endif()
|
||||
endforeach()
|
||||
if("${_targetsDefined}" STREQUAL "${_expectedTargets}")
|
||||
unset(_targetsDefined)
|
||||
unset(_targetsNotDefined)
|
||||
unset(_expectedTargets)
|
||||
set(CMAKE_IMPORT_FILE_VERSION)
|
||||
cmake_policy(POP)
|
||||
return()
|
||||
endif()
|
||||
if(NOT "${_targetsDefined}" STREQUAL "")
|
||||
message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n")
|
||||
endif()
|
||||
unset(_targetsDefined)
|
||||
unset(_targetsNotDefined)
|
||||
unset(_expectedTargets)
|
||||
|
||||
|
||||
# Compute the installation prefix relative to this file.
|
||||
get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
||||
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
|
||||
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
|
||||
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
|
||||
if(_IMPORT_PREFIX STREQUAL "/")
|
||||
set(_IMPORT_PREFIX "")
|
||||
endif()
|
||||
|
||||
# Create imported target Catch2::Catch2
|
||||
add_library(Catch2::Catch2 INTERFACE IMPORTED)
|
||||
|
||||
set_target_properties(Catch2::Catch2 PROPERTIES
|
||||
INTERFACE_COMPILE_FEATURES "cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_defaulted_functions;cxx_deleted_functions;cxx_final;cxx_lambdas;cxx_noexcept;cxx_override;cxx_range_for;cxx_rvalue_references;cxx_static_assert;cxx_strong_enums;cxx_trailing_return_types;cxx_unicode_literals;cxx_user_literals;cxx_variadic_macros"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include"
|
||||
)
|
||||
|
||||
if(CMAKE_VERSION VERSION_LESS 3.0.0)
|
||||
message(FATAL_ERROR "This file relies on consumers using CMake 3.0.0 or greater.")
|
||||
endif()
|
||||
|
||||
# Load information for each installed configuration.
|
||||
get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
||||
file(GLOB CONFIG_FILES "${_DIR}/Catch2Targets-*.cmake")
|
||||
foreach(f ${CONFIG_FILES})
|
||||
include(${f})
|
||||
endforeach()
|
||||
|
||||
# Cleanup temporary variables.
|
||||
set(_IMPORT_PREFIX)
|
||||
|
||||
# Loop over all imported files and verify that they actually exist
|
||||
foreach(target ${_IMPORT_CHECK_TARGETS} )
|
||||
foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} )
|
||||
if(NOT EXISTS "${file}" )
|
||||
message(FATAL_ERROR "The imported target \"${target}\" references the file
|
||||
\"${file}\"
|
||||
but this file does not exist. Possible reasons include:
|
||||
* The file was deleted, renamed, or moved to another location.
|
||||
* An install or uninstall procedure did not complete successfully.
|
||||
* The installation package was faulty and contained
|
||||
\"${CMAKE_CURRENT_LIST_FILE}\"
|
||||
but not all the files it references.
|
||||
")
|
||||
endif()
|
||||
endforeach()
|
||||
unset(_IMPORT_CHECK_FILES_FOR_${target})
|
||||
endforeach()
|
||||
unset(_IMPORT_CHECK_TARGETS)
|
||||
|
||||
# This file does not depend on other imported targets which have
|
||||
# been exported from the same project but in a separate export set.
|
||||
|
||||
# Commands beyond this point should not need to know the version.
|
||||
set(CMAKE_IMPORT_FILE_VERSION)
|
||||
cmake_policy(POP)
|
||||
+134
@@ -0,0 +1,134 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
set(prefix "${TEST_PREFIX}")
|
||||
set(suffix "${TEST_SUFFIX}")
|
||||
set(spec ${TEST_SPEC})
|
||||
set(extra_args ${TEST_EXTRA_ARGS})
|
||||
set(properties ${TEST_PROPERTIES})
|
||||
set(reporter ${TEST_REPORTER})
|
||||
set(output_dir ${TEST_OUTPUT_DIR})
|
||||
set(output_prefix ${TEST_OUTPUT_PREFIX})
|
||||
set(output_suffix ${TEST_OUTPUT_SUFFIX})
|
||||
set(script)
|
||||
set(suite)
|
||||
set(tests)
|
||||
|
||||
function(add_command NAME)
|
||||
set(_args "")
|
||||
# use ARGV* instead of ARGN, because ARGN splits arrays into multiple arguments
|
||||
math(EXPR _last_arg ${ARGC}-1)
|
||||
foreach(_n RANGE 1 ${_last_arg})
|
||||
set(_arg "${ARGV${_n}}")
|
||||
if(_arg MATCHES "[^-./:a-zA-Z0-9_]")
|
||||
set(_args "${_args} [==[${_arg}]==]") # form a bracket_argument
|
||||
else()
|
||||
set(_args "${_args} ${_arg}")
|
||||
endif()
|
||||
endforeach()
|
||||
set(script "${script}${NAME}(${_args})\n" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
|
||||
if(WIN32)
|
||||
set(TEST_EXECUTABLE ${TEST_EXECUTABLE}.exe)
|
||||
endif()
|
||||
get_filename_component(TEST_EXECUTABLE ${TEST_EXECUTABLE} ABSOLUTE)
|
||||
|
||||
execute_process(
|
||||
COMMAND ${TEST_EXECUTOR} "${TEST_EXECUTABLE}" ${spec} --list-test-names-only
|
||||
OUTPUT_VARIABLE output
|
||||
RESULT_VARIABLE result
|
||||
WORKING_DIRECTORY "${TEST_WORKING_DIR}"
|
||||
)
|
||||
# Catch --list-test-names-only reports the number of tests, so 0 is... surprising
|
||||
if(${result} EQUAL 0)
|
||||
message(WARNING
|
||||
"Test executable '${TEST_EXECUTABLE}' contains no tests!\n"
|
||||
)
|
||||
elseif(${result} LESS 0)
|
||||
message(FATAL_ERROR
|
||||
"Error running test executable '${TEST_EXECUTABLE}':\n"
|
||||
" Result: ${result}\n"
|
||||
" Output: ${output}\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
string(REPLACE "\n" ";" output "${output}")
|
||||
|
||||
# Run test executable to get list of available reporters
|
||||
execute_process(
|
||||
COMMAND ${TEST_EXECUTOR} "${TEST_EXECUTABLE}" ${spec} --list-reporters
|
||||
OUTPUT_VARIABLE reporters_output
|
||||
RESULT_VARIABLE reporters_result
|
||||
WORKING_DIRECTORY "${TEST_WORKING_DIR}"
|
||||
)
|
||||
if(${reporters_result} EQUAL 0)
|
||||
message(WARNING
|
||||
"Test executable '${TEST_EXECUTABLE}' contains no reporters!\n"
|
||||
)
|
||||
elseif(${reporters_result} LESS 0)
|
||||
message(FATAL_ERROR
|
||||
"Error running test executable '${TEST_EXECUTABLE}':\n"
|
||||
" Result: ${reporters_result}\n"
|
||||
" Output: ${reporters_output}\n"
|
||||
)
|
||||
endif()
|
||||
string(FIND "${reporters_output}" "${reporter}" reporter_is_valid)
|
||||
if(reporter AND ${reporter_is_valid} EQUAL -1)
|
||||
message(FATAL_ERROR
|
||||
"\"${reporter}\" is not a valid reporter!\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
# Prepare reporter
|
||||
if(reporter)
|
||||
set(reporter_arg "--reporter ${reporter}")
|
||||
endif()
|
||||
|
||||
# Prepare output dir
|
||||
if(output_dir AND NOT IS_ABSOLUTE ${output_dir})
|
||||
set(output_dir "${TEST_WORKING_DIR}/${output_dir}")
|
||||
if(NOT EXISTS ${output_dir})
|
||||
file(MAKE_DIRECTORY ${output_dir})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Parse output
|
||||
foreach(line ${output})
|
||||
set(test ${line})
|
||||
# Escape characters in test case names that would be parsed by Catch2
|
||||
set(test_name ${test})
|
||||
foreach(char , [ ])
|
||||
string(REPLACE ${char} "\\${char}" test_name ${test_name})
|
||||
endforeach(char)
|
||||
# ...add output dir
|
||||
if(output_dir)
|
||||
string(REGEX REPLACE "[^A-Za-z0-9_]" "_" test_name_clean ${test_name})
|
||||
set(output_dir_arg "--out ${output_dir}/${output_prefix}${test_name_clean}${output_suffix}")
|
||||
endif()
|
||||
|
||||
# ...and add to script
|
||||
add_command(add_test
|
||||
"${prefix}${test}${suffix}"
|
||||
${TEST_EXECUTOR}
|
||||
"${TEST_EXECUTABLE}"
|
||||
"${test_name}"
|
||||
${extra_args}
|
||||
"${reporter_arg}"
|
||||
"${output_dir_arg}"
|
||||
)
|
||||
add_command(set_tests_properties
|
||||
"${prefix}${test}${suffix}"
|
||||
PROPERTIES
|
||||
${properties}
|
||||
)
|
||||
list(APPEND tests "${prefix}${test}${suffix}")
|
||||
endforeach()
|
||||
|
||||
# Create a list of all discovered tests, which users may use to e.g. set
|
||||
# properties on the tests
|
||||
add_command(set ${TEST_LIST} ${tests})
|
||||
|
||||
# Write CTest script
|
||||
file(WRITE "${CTEST_FILE}" "${script}")
|
||||
+252
@@ -0,0 +1,252 @@
|
||||
#==================================================================================================#
|
||||
# supported macros #
|
||||
# - TEST_CASE, #
|
||||
# - TEMPLATE_TEST_CASE #
|
||||
# - SCENARIO, #
|
||||
# - TEST_CASE_METHOD, #
|
||||
# - CATCH_TEST_CASE, #
|
||||
# - CATCH_TEMPLATE_TEST_CASE #
|
||||
# - CATCH_SCENARIO, #
|
||||
# - CATCH_TEST_CASE_METHOD. #
|
||||
# #
|
||||
# Usage #
|
||||
# 1. make sure this module is in the path or add this otherwise: #
|
||||
# set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake.modules/") #
|
||||
# 2. make sure that you've enabled testing option for the project by the call: #
|
||||
# enable_testing() #
|
||||
# 3. add the lines to the script for testing target (sample CMakeLists.txt): #
|
||||
# project(testing_target) #
|
||||
# set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake.modules/") #
|
||||
# enable_testing() #
|
||||
# #
|
||||
# find_path(CATCH_INCLUDE_DIR "catch.hpp") #
|
||||
# include_directories(${INCLUDE_DIRECTORIES} ${CATCH_INCLUDE_DIR}) #
|
||||
# #
|
||||
# file(GLOB SOURCE_FILES "*.cpp") #
|
||||
# add_executable(${PROJECT_NAME} ${SOURCE_FILES}) #
|
||||
# #
|
||||
# include(ParseAndAddCatchTests) #
|
||||
# ParseAndAddCatchTests(${PROJECT_NAME}) #
|
||||
# #
|
||||
# The following variables affect the behavior of the script: #
|
||||
# #
|
||||
# PARSE_CATCH_TESTS_VERBOSE (Default OFF) #
|
||||
# -- enables debug messages #
|
||||
# PARSE_CATCH_TESTS_NO_HIDDEN_TESTS (Default OFF) #
|
||||
# -- excludes tests marked with [!hide], [.] or [.foo] tags #
|
||||
# PARSE_CATCH_TESTS_ADD_FIXTURE_IN_TEST_NAME (Default ON) #
|
||||
# -- adds fixture class name to the test name #
|
||||
# PARSE_CATCH_TESTS_ADD_TARGET_IN_TEST_NAME (Default ON) #
|
||||
# -- adds cmake target name to the test name #
|
||||
# PARSE_CATCH_TESTS_ADD_TO_CONFIGURE_DEPENDS (Default OFF) #
|
||||
# -- causes CMake to rerun when file with tests changes so that new tests will be discovered #
|
||||
# #
|
||||
# One can also set (locally) the optional variable OptionalCatchTestLauncher to precise the way #
|
||||
# a test should be run. For instance to use test MPI, one can write #
|
||||
# set(OptionalCatchTestLauncher ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${NUMPROC}) #
|
||||
# just before calling this ParseAndAddCatchTests function #
|
||||
# #
|
||||
# The AdditionalCatchParameters optional variable can be used to pass extra argument to the test #
|
||||
# command. For example, to include successful tests in the output, one can write #
|
||||
# set(AdditionalCatchParameters --success) #
|
||||
# #
|
||||
# After the script, the ParseAndAddCatchTests_TESTS property for the target, and for each source #
|
||||
# file in the target is set, and contains the list of the tests extracted from that target, or #
|
||||
# from that file. This is useful, for example to add further labels or properties to the tests. #
|
||||
# #
|
||||
#==================================================================================================#
|
||||
|
||||
if (CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 2.8.8)
|
||||
message(FATAL_ERROR "ParseAndAddCatchTests requires CMake 2.8.8 or newer")
|
||||
endif()
|
||||
|
||||
option(PARSE_CATCH_TESTS_VERBOSE "Print Catch to CTest parser debug messages" OFF)
|
||||
option(PARSE_CATCH_TESTS_NO_HIDDEN_TESTS "Exclude tests with [!hide], [.] or [.foo] tags" OFF)
|
||||
option(PARSE_CATCH_TESTS_ADD_FIXTURE_IN_TEST_NAME "Add fixture class name to the test name" ON)
|
||||
option(PARSE_CATCH_TESTS_ADD_TARGET_IN_TEST_NAME "Add target name to the test name" ON)
|
||||
option(PARSE_CATCH_TESTS_ADD_TO_CONFIGURE_DEPENDS "Add test file to CMAKE_CONFIGURE_DEPENDS property" OFF)
|
||||
|
||||
function(ParseAndAddCatchTests_PrintDebugMessage)
|
||||
if(PARSE_CATCH_TESTS_VERBOSE)
|
||||
message(STATUS "ParseAndAddCatchTests: ${ARGV}")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# This removes the contents between
|
||||
# - block comments (i.e. /* ... */)
|
||||
# - full line comments (i.e. // ... )
|
||||
# contents have been read into '${CppCode}'.
|
||||
# !keep partial line comments
|
||||
function(ParseAndAddCatchTests_RemoveComments CppCode)
|
||||
string(ASCII 2 CMakeBeginBlockComment)
|
||||
string(ASCII 3 CMakeEndBlockComment)
|
||||
string(REGEX REPLACE "/\\*" "${CMakeBeginBlockComment}" ${CppCode} "${${CppCode}}")
|
||||
string(REGEX REPLACE "\\*/" "${CMakeEndBlockComment}" ${CppCode} "${${CppCode}}")
|
||||
string(REGEX REPLACE "${CMakeBeginBlockComment}[^${CMakeEndBlockComment}]*${CMakeEndBlockComment}" "" ${CppCode} "${${CppCode}}")
|
||||
string(REGEX REPLACE "\n[ \t]*//+[^\n]+" "\n" ${CppCode} "${${CppCode}}")
|
||||
|
||||
set(${CppCode} "${${CppCode}}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# Worker function
|
||||
function(ParseAndAddCatchTests_ParseFile SourceFile TestTarget)
|
||||
# If SourceFile is an object library, do not scan it (as it is not a file). Exit without giving a warning about a missing file.
|
||||
if(SourceFile MATCHES "\\\$<TARGET_OBJECTS:.+>")
|
||||
ParseAndAddCatchTests_PrintDebugMessage("Detected OBJECT library: ${SourceFile} this will not be scanned for tests.")
|
||||
return()
|
||||
endif()
|
||||
# According to CMake docs EXISTS behavior is well-defined only for full paths.
|
||||
get_filename_component(SourceFile ${SourceFile} ABSOLUTE)
|
||||
if(NOT EXISTS ${SourceFile})
|
||||
message(WARNING "Cannot find source file: ${SourceFile}")
|
||||
return()
|
||||
endif()
|
||||
ParseAndAddCatchTests_PrintDebugMessage("parsing ${SourceFile}")
|
||||
file(STRINGS ${SourceFile} Contents NEWLINE_CONSUME)
|
||||
|
||||
# Remove block and fullline comments
|
||||
ParseAndAddCatchTests_RemoveComments(Contents)
|
||||
|
||||
# Find definition of test names
|
||||
# https://regex101.com/r/JygOND/1
|
||||
string(REGEX MATCHALL "[ \t]*(CATCH_)?(TEMPLATE_)?(TEST_CASE_METHOD|SCENARIO|TEST_CASE)[ \t]*\\([ \t\n]*\"[^\"]*\"[ \t\n]*(,[ \t\n]*\"[^\"]*\")?(,[ \t\n]*[^\,\)]*)*\\)[ \t\n]*\{+[ \t]*(//[^\n]*[Tt][Ii][Mm][Ee][Oo][Uu][Tt][ \t]*[0-9]+)*" Tests "${Contents}")
|
||||
|
||||
if(PARSE_CATCH_TESTS_ADD_TO_CONFIGURE_DEPENDS AND Tests)
|
||||
ParseAndAddCatchTests_PrintDebugMessage("Adding ${SourceFile} to CMAKE_CONFIGURE_DEPENDS property")
|
||||
set_property(
|
||||
DIRECTORY
|
||||
APPEND
|
||||
PROPERTY CMAKE_CONFIGURE_DEPENDS ${SourceFile}
|
||||
)
|
||||
endif()
|
||||
|
||||
# check CMP0110 policy for new add_test() behavior
|
||||
if(POLICY CMP0110)
|
||||
cmake_policy(GET CMP0110 _cmp0110_value) # new add_test() behavior
|
||||
else()
|
||||
# just to be thorough explicitly set the variable
|
||||
set(_cmp0110_value)
|
||||
endif()
|
||||
|
||||
foreach(TestName ${Tests})
|
||||
# Strip newlines
|
||||
string(REGEX REPLACE "\\\\\n|\n" "" TestName "${TestName}")
|
||||
|
||||
# Get test type and fixture if applicable
|
||||
string(REGEX MATCH "(CATCH_)?(TEMPLATE_)?(TEST_CASE_METHOD|SCENARIO|TEST_CASE)[ \t]*\\([^,^\"]*" TestTypeAndFixture "${TestName}")
|
||||
string(REGEX MATCH "(CATCH_)?(TEMPLATE_)?(TEST_CASE_METHOD|SCENARIO|TEST_CASE)" TestType "${TestTypeAndFixture}")
|
||||
string(REGEX REPLACE "${TestType}\\([ \t]*" "" TestFixture "${TestTypeAndFixture}")
|
||||
|
||||
# Get string parts of test definition
|
||||
string(REGEX MATCHALL "\"+([^\\^\"]|\\\\\")+\"+" TestStrings "${TestName}")
|
||||
|
||||
# Strip wrapping quotation marks
|
||||
string(REGEX REPLACE "^\"(.*)\"$" "\\1" TestStrings "${TestStrings}")
|
||||
string(REPLACE "\";\"" ";" TestStrings "${TestStrings}")
|
||||
|
||||
# Validate that a test name and tags have been provided
|
||||
list(LENGTH TestStrings TestStringsLength)
|
||||
if(TestStringsLength GREATER 2 OR TestStringsLength LESS 1)
|
||||
message(FATAL_ERROR "You must provide a valid test name and tags for all tests in ${SourceFile}")
|
||||
endif()
|
||||
|
||||
# Assign name and tags
|
||||
list(GET TestStrings 0 Name)
|
||||
if("${TestType}" STREQUAL "SCENARIO")
|
||||
set(Name "Scenario: ${Name}")
|
||||
endif()
|
||||
if(PARSE_CATCH_TESTS_ADD_FIXTURE_IN_TEST_NAME AND "${TestType}" MATCHES "(CATCH_)?TEST_CASE_METHOD" AND TestFixture )
|
||||
set(CTestName "${TestFixture}:${Name}")
|
||||
else()
|
||||
set(CTestName "${Name}")
|
||||
endif()
|
||||
if(PARSE_CATCH_TESTS_ADD_TARGET_IN_TEST_NAME)
|
||||
set(CTestName "${TestTarget}:${CTestName}")
|
||||
endif()
|
||||
# add target to labels to enable running all tests added from this target
|
||||
set(Labels ${TestTarget})
|
||||
if(TestStringsLength EQUAL 2)
|
||||
list(GET TestStrings 1 Tags)
|
||||
string(TOLOWER "${Tags}" Tags)
|
||||
# remove target from labels if the test is hidden
|
||||
if("${Tags}" MATCHES ".*\\[!?(hide|\\.)\\].*")
|
||||
list(REMOVE_ITEM Labels ${TestTarget})
|
||||
endif()
|
||||
string(REPLACE "]" ";" Tags "${Tags}")
|
||||
string(REPLACE "[" "" Tags "${Tags}")
|
||||
else()
|
||||
# unset tags variable from previous loop
|
||||
unset(Tags)
|
||||
endif()
|
||||
|
||||
list(APPEND Labels ${Tags})
|
||||
|
||||
set(HiddenTagFound OFF)
|
||||
foreach(label ${Labels})
|
||||
string(REGEX MATCH "^!hide|^\\." result ${label})
|
||||
if(result)
|
||||
set(HiddenTagFound ON)
|
||||
break()
|
||||
endif(result)
|
||||
endforeach(label)
|
||||
if(PARSE_CATCH_TESTS_NO_HIDDEN_TESTS AND ${HiddenTagFound} AND ${CMAKE_VERSION} VERSION_LESS "3.9")
|
||||
ParseAndAddCatchTests_PrintDebugMessage("Skipping test \"${CTestName}\" as it has [!hide], [.] or [.foo] label")
|
||||
else()
|
||||
ParseAndAddCatchTests_PrintDebugMessage("Adding test \"${CTestName}\"")
|
||||
if(Labels)
|
||||
ParseAndAddCatchTests_PrintDebugMessage("Setting labels to ${Labels}")
|
||||
endif()
|
||||
|
||||
# Escape commas in the test spec
|
||||
string(REPLACE "," "\\," Name ${Name})
|
||||
|
||||
# Work around CMake 3.18.0 change in `add_test()`, before the escaped quotes were necessary,
|
||||
# only with CMake 3.18.0 the escaped double quotes confuse the call. This change is reverted in 3.18.1
|
||||
# And properly introduced in 3.19 with the CMP0110 policy
|
||||
if(_cmp0110_value STREQUAL "NEW" OR ${CMAKE_VERSION} VERSION_EQUAL "3.18")
|
||||
ParseAndAddCatchTests_PrintDebugMessage("CMP0110 set to NEW, no need for add_test(\"\") workaround")
|
||||
else()
|
||||
ParseAndAddCatchTests_PrintDebugMessage("CMP0110 set to OLD adding \"\" for add_test() workaround")
|
||||
set(CTestName "\"${CTestName}\"")
|
||||
endif()
|
||||
|
||||
# Handle template test cases
|
||||
if("${TestTypeAndFixture}" MATCHES ".*TEMPLATE_.*")
|
||||
set(Name "${Name} - *")
|
||||
endif()
|
||||
|
||||
# Add the test and set its properties
|
||||
add_test(NAME "${CTestName}" COMMAND ${OptionalCatchTestLauncher} $<TARGET_FILE:${TestTarget}> ${Name} ${AdditionalCatchParameters})
|
||||
# Old CMake versions do not document VERSION_GREATER_EQUAL, so we use VERSION_GREATER with 3.8 instead
|
||||
if(PARSE_CATCH_TESTS_NO_HIDDEN_TESTS AND ${HiddenTagFound} AND ${CMAKE_VERSION} VERSION_GREATER "3.8")
|
||||
ParseAndAddCatchTests_PrintDebugMessage("Setting DISABLED test property")
|
||||
set_tests_properties("${CTestName}" PROPERTIES DISABLED ON)
|
||||
else()
|
||||
set_tests_properties("${CTestName}" PROPERTIES FAIL_REGULAR_EXPRESSION "No tests ran"
|
||||
LABELS "${Labels}")
|
||||
endif()
|
||||
set_property(
|
||||
TARGET ${TestTarget}
|
||||
APPEND
|
||||
PROPERTY ParseAndAddCatchTests_TESTS "${CTestName}")
|
||||
set_property(
|
||||
SOURCE ${SourceFile}
|
||||
APPEND
|
||||
PROPERTY ParseAndAddCatchTests_TESTS "${CTestName}")
|
||||
endif()
|
||||
|
||||
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
# entry point
|
||||
function(ParseAndAddCatchTests TestTarget)
|
||||
message(DEPRECATION "ParseAndAddCatchTest: function deprecated because of possibility of missed test cases. Consider using 'catch_discover_tests' from 'Catch.cmake'")
|
||||
ParseAndAddCatchTests_PrintDebugMessage("Started parsing ${TestTarget}")
|
||||
get_target_property(SourceFiles ${TestTarget} SOURCES)
|
||||
ParseAndAddCatchTests_PrintDebugMessage("Found the following sources: ${SourceFiles}")
|
||||
foreach(SourceFile ${SourceFiles})
|
||||
ParseAndAddCatchTests_ParseFile(${SourceFile} ${TestTarget})
|
||||
endforeach()
|
||||
ParseAndAddCatchTests_PrintDebugMessage("Finished parsing ${TestTarget}")
|
||||
endfunction()
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
# when ctest is ran, each submodule includes this file to generate the <submodule>_tests.cmake file.
|
||||
# <submodule>_tests.cmake contains the add_test macro which runs the individual test.
|
||||
|
||||
get_filename_component(_cmake_path cmake ABSOLUTE)
|
||||
execute_process(
|
||||
COMMAND "${_cmake_path}"
|
||||
-D "TEST_TARGET=${TARGET}"
|
||||
-D "TEST_EXECUTABLE=${_TARGET_EXECUTABLE}"
|
||||
-D "TEST_EXECUTOR=${crosscompiling_emulator}"
|
||||
-D "TEST_WORKING_DIR=${_WORKING_DIRECTORY}"
|
||||
-D "TEST_SPEC=${_TEST_SPEC}"
|
||||
-D "TEST_EXTRA_ARGS=${_EXTRA_ARGS}"
|
||||
-D "TEST_PROPERTIES=${_PROPERTIES}"
|
||||
-D "TEST_PREFIX=${_TEST_PREFIX}"
|
||||
-D "TEST_SUFFIX=${_TEST_SUFFIX}"
|
||||
-D "TEST_LIST=${_TEST_LIST}"
|
||||
-D "TEST_REPORTER=${_REPORTER}"
|
||||
-D "TEST_OUTPUT_DIR=${_OUTPUT_DIR}"
|
||||
-D "TEST_OUTPUT_PREFIX=${_OUTPUT_PREFIX}"
|
||||
-D "TEST_OUTPUT_SUFFIX=${_OUTPUT_SUFFIX}"
|
||||
-D "CTEST_FILE=${ctestfilepath}"
|
||||
-P "${_CATCH_ADD_TEST_SCRIPT}"
|
||||
OUTPUT_VARIABLE output
|
||||
RESULT_VARIABLE result
|
||||
WORKING_DIRECTORY "${TEST_WORKING_DIR}"
|
||||
)
|
||||
|
||||
|
||||
if(EXISTS "${ctestfilepath}")
|
||||
# include the generated ctest file for execution
|
||||
include(${ctestfilepath})
|
||||
endif()
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
Copyright 2009-2010 Cybozu Labs, Inc.
|
||||
Copyright 2011-2014 Kazuho Oku
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
+1200
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -0,0 +1,30 @@
|
||||
# Copyright (c) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
if(CMAKE_BUILD_TYPE MATCHES "^Debug$")
|
||||
add_definitions(-DHT_LOG_ENABLE)
|
||||
endif()
|
||||
|
||||
add_library(Main_Object EXCLUDE_FROM_ALL OBJECT main.cc hip_test_context.cc hip_test_features.cc)
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set_property(TARGET Main_Object PROPERTY CXX_STANDARD 17)
|
||||
else()
|
||||
target_compile_options(Main_Object PUBLIC -std=c++17)
|
||||
endif()
|
||||
@@ -0,0 +1,794 @@
|
||||
#define COMMON
|
||||
|
||||
{
|
||||
"Info": [
|
||||
"File generated for commit on below mentioned date and time",
|
||||
__DATE__,
|
||||
__TIME__,
|
||||
GITHASH
|
||||
],
|
||||
"DisabledTests": [
|
||||
#if defined COMMON
|
||||
"Unit_hipMallocFromPoolAsync_MThread_MaxThresh",
|
||||
"Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool",
|
||||
"Unit_hipMemPoolTrimTo_Multithreaded",
|
||||
"Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU",
|
||||
"Unit_hipStreamPerThread_DeviceReset_1",
|
||||
"Unit_hipDeviceGetSharedMemConfig_Positive_Basic",
|
||||
"Unit_hipDeviceGetSharedMemConfig_Positive_Threaded",
|
||||
"Unit_hipGetDeviceFlags_Positive_Context",
|
||||
"Unit_hipInit_Negative",
|
||||
"Unit_hipDeviceReset_Positive_Basic",
|
||||
"Unit_hipDeviceReset_Positive_Threaded",
|
||||
"Unit_hipFuncSetCacheConfig_Positive_Basic",
|
||||
"Unit_hipFuncSetCacheConfig_Negative_Parameters",
|
||||
"Unit_hipFuncSetSharedMemConfig_Positive_Basic",
|
||||
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-241",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-242",
|
||||
"Unit_hipFuncGetAttributes_Positive_Basic",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-244",
|
||||
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
|
||||
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
|
||||
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Positive_Basic",
|
||||
"Unit_hipKernelNameRef_Negative_Parameters",
|
||||
"Unit_hipMemAdvise_No_Flag_Interference",
|
||||
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Array",
|
||||
"Unit_hipDrvMemcpy3DAsync_Positive_Array",
|
||||
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Basic",
|
||||
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Partial_Range",
|
||||
"Unit_hipMemGetAddressRange_Positive",
|
||||
"Unit_hipGraphAddMemcpyNode1D_Negative_Basic",
|
||||
"Unit_ChannelDescriptor_Positive_16BitFloatingPoint",
|
||||
"intermittent issue: failure expected but sucess returned",
|
||||
"Unit_hipMemAdvise_NegtveTsts",
|
||||
"Note: Following four tests disabled due to defect - EXSWHTEC-203",
|
||||
"Unit_hipStreamSetCaptureDependencies_Positive_Functional",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-207",
|
||||
"Unit_hipStreamCreateWithFlags_DefaultStreamInteraction",
|
||||
"Unit_hipMemset3DSync",
|
||||
"Unit_hipStreamAddCallback_StrmSyncTiming",
|
||||
"Disabling test tracked SWDEV-394199",
|
||||
"Unit_hipStreamCreateWithPriority_MulthreadNonblockingflag",
|
||||
"Disabling test tracked SWDEV-395683",
|
||||
"Unit_hipStreamPerThread_MultiThread",
|
||||
"Disabling tests tracked with SWDEV-389647..",
|
||||
"Unit_hipMemcpy2DToArrayAsync_Positive_Synchronization_Behavior",
|
||||
"Disabling test tracked SWDEV-391555",
|
||||
"Unit_hipMemcpyPeer_Positive_ZeroSize",
|
||||
"Unit_hipMemcpyPeerAsync_Positive_ZeroSize",
|
||||
"Fails in Stress test SWDEV-398971",
|
||||
"SWDEV-398977 fails in stress tests",
|
||||
"Unit_hipMemset2DSync",
|
||||
"SWDEV-398981 fails in stress test",
|
||||
"Unit_hipStreamCreateWithPriority_MulthreadDefaultflag",
|
||||
"SWDEV-402054 fails in external github build",
|
||||
"Unit_hipEventDestroy_WithWaitingStream",
|
||||
"=== Below tests fail in stress test on 30/06/23 ===",
|
||||
"Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/96 ===",
|
||||
"Unit_hipHostGetDevicePointer_Negative",
|
||||
"Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/18 ===",
|
||||
"Unit_hipMemcpyAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyDtoHAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyHtoDAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyDtoDAsync_Negative_Parameters",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_1",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/327 ===",
|
||||
"Unit_hiprtcDisabledSlpVectorizeComplrOptnTst",
|
||||
"Unit_hiprtcCombiComplrOptnTst",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/92 ===",
|
||||
"Unit_hipGetChannelDesc_Negative_Parameters",
|
||||
"Unit_hipGraphAddChildGraphNode_CmplxNstGrph_UpdKerFun_Clone",
|
||||
"=== Below tests fail in stress test on 24/07/23 ===",
|
||||
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
|
||||
"Unit_hipEventIpc",
|
||||
"=== SWDEV-427101:Below test fails randomly in PSDB ===",
|
||||
"Unit_deviceAllocation_InOneThread_AccessInAllThreads",
|
||||
"=== Below 2 tests are disable due to defect EXSWHTEC-356 ===",
|
||||
"Unit_Device___hisinf2_Accuracy_Positive",
|
||||
"Unit_Device___hisnan2_Accuracy_Positive",
|
||||
"Unit_Device___hbequ2_Accuracy_Positive",
|
||||
"Unit_Device___hne_Accuracy_Positive",
|
||||
"Unit_Device___hne2_Accuracy_Positive",
|
||||
"Unit_Device___hbne2_Accuracy_Positive",
|
||||
"Unit_Device___hbgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hbgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hbleu2_Accuracy_Positive",
|
||||
"Unit_Device___hbltu2_Accuracy_Positive",
|
||||
"=== Below 4 tests are disable due to defect EXSWHTEC-355 ===",
|
||||
"Unit_Device___hadd_Sanity_Positive",
|
||||
"Unit_Device___uhadd_Sanity_Positive",
|
||||
"Unit_Device___rhadd_Sanity_Positive",
|
||||
"Unit_Device___urhadd_Sanity_Positive",
|
||||
"Unit_hipGraphAddMemcpyNode_Negative_Parameters",
|
||||
"=== Below 2 tests are disable due to defect EXSWHTEC-369 ===",
|
||||
"Unit_Device_ilogbf_Accuracy_Positive",
|
||||
"Unit_Device_ilogb_Accuracy_Positive",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-245",
|
||||
"Unit_hipMemCreate_MapNonContiguousChunks",
|
||||
"Unit_hipMemMap_PhysicalMemoryReuse_MultiDev",
|
||||
"Unit_hipMemMap_VMMMemoryReuse_MultiGPU",
|
||||
"Unit_hipMemSetAccess_FuncTstOnMultDev",
|
||||
"Unit_hipMemSetAccess_Vmm2PeerDevMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy",
|
||||
"Unit_hipMemSetAccess_GrowVMM",
|
||||
"Unit_hipMemMap_PhysicalMemory_Map2MultVMMs",
|
||||
"Unit_hipMemSetAccess_MultiProc",
|
||||
"=== SWDEV-434171: Below tests took long time to complete in stress test on 17/11/23 ===",
|
||||
"Unit_Warp_Shfl_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned int",
|
||||
"Unit_Warp_Shfl_Positive_Basic - long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - long long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned long long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - float",
|
||||
"Unit_Warp_Shfl_Positive_Basic - double",
|
||||
"Unit_Warp_Shfl_Positive_Basic - __half",
|
||||
"Unit_Warp_Shfl_Positive_Basic - __half2",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned int",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - long long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - float",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - double",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - __half",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - __half2",
|
||||
"=== SWDEV-434878: Below tests failed in stress test on 24/11/23 ===",
|
||||
"Unit_hipGraphUpload_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Positive_RangeValidation",
|
||||
"=== SWDEV-435667: Below tests failing randomly in stress test on 01/12/23 ===",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - int",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - float",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - double",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - float",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - double",
|
||||
"=== SWDEV-439004: Below tests failing randomly in CQE staging ===",
|
||||
"Unit_hipGraphicsMapResources_Negative_Parameters",
|
||||
"Unit_hipGraphicsSubResourceGetMappedArray_Negative_Parameters",
|
||||
"Unit_hipGraphicsResourceGetMappedPointer_Positive_Parameters",
|
||||
"Unit_hipGraphicsResourceGetMappedPointer_Negative_Parameters",
|
||||
"Unit_hipGraphicsUnmapResources_Negative_Parameters",
|
||||
"Unit_hipGraphicsUnregisterResource_Negative_Parameters",
|
||||
"SWDEV-443760: This test fails when device memory is used for kernel args",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-151",
|
||||
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
|
||||
"Note: Following two tests disabled due to defect - EXSWHTEC-153",
|
||||
"Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String",
|
||||
"Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-163",
|
||||
"Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-164",
|
||||
"Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-165",
|
||||
"Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-166",
|
||||
"Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-167",
|
||||
"Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String",
|
||||
"SWDEV-441785: Below tests failing in stress test on 05/01/24 ===",
|
||||
"Unit_hipMemcpyParam2DAsync_Positive_Basic",
|
||||
"SWDEV-442583: Below tests failing in stress test on 12/01/24 ===",
|
||||
"Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters",
|
||||
"Unit_hipLaunchCooperativeKernelMultiDevice_Negative_MultiKernelSameDevice",
|
||||
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_MultiKernelSameDevice",
|
||||
"=== Below tests are failing PSDB ===",
|
||||
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipLaunchCooperativeKernel_Negative_Parameters",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_2D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_1D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMalloc_1D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMallocManaged",
|
||||
"Unit_hipExtModuleLaunchKernel_Negative_Parameters",
|
||||
"Unit_hipLaunchKernel_Negative_Parameters",
|
||||
"Unit_hipModuleLaunchCooperativeKernel_Negative_Parameters",
|
||||
"Unit_Device_modf_modff_Negative_RTC",
|
||||
"SWDEV-446588 - Disable graph multi gpu testcases until graph has support for it",
|
||||
"Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed",
|
||||
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice",
|
||||
"Unit_hipGraphUpload_Functional_multidevice_test",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint8_t",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint16_t",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint32_t",
|
||||
"=== SWDEV-444987 - Below tests fail in stress testing on 25/01/2023 ===",
|
||||
"Unit_floatTM",
|
||||
"Unit_TestMathFuncComplex",
|
||||
"Unit_AtomicsWithRandomActiveLanesInWavefront_UniformInteger",
|
||||
"Unit_AtomicsWithRandomActiveLanesInWavefront_DivergentInteger",
|
||||
"Unit_hipGraphAddMemcpyNodeToSymbol_Positive_Basic",
|
||||
"Unit_hipStreamBeginCapture_Positive_Functional",
|
||||
"Unit_atomicAnd_Negative_Parameters_RTC",
|
||||
"Unit_atomicOr_Negative_Parameters_RTC",
|
||||
"Unit_atomicXor_Negative_Parameters_RTC",
|
||||
"Unit_atomicMin_Negative_Parameters_RTC",
|
||||
"Unit_atomicMax_Negative_Parameters_RTC",
|
||||
"Unit_Kernel_Launch_bounds_Negative_OutOfBounds",
|
||||
"Unit_Kernel_Launch_bounds_Negative_Parameters_RTC",
|
||||
"Unit_Device_sin_Accuracy_Positive - float",
|
||||
"Unit_Device_sin_Accuracy_Positive - double",
|
||||
"Unit_Device_cos_Accuracy_Positive - float",
|
||||
"Unit_Device_cos_Accuracy_Positive - double",
|
||||
"Unit_Device_tan_Accuracy_Positive - float",
|
||||
"Unit_Device_tan_Accuracy_Positive - double",
|
||||
"Unit_Device_asin_Accuracy_Positive - float",
|
||||
"Unit_Device_asin_Accuracy_Positive - double",
|
||||
"Unit_Device_acos_Accuracy_Positive - float",
|
||||
"Unit_Device_acos_Accuracy_Positive - double",
|
||||
"Unit_Device_atan_Accuracy_Positive - float",
|
||||
"Unit_Device_atan_Accuracy_Positive - double",
|
||||
"Unit_Device_sinh_Accuracy_Positive - float",
|
||||
"Unit_Device_sinh_Accuracy_Positive - double",
|
||||
"Unit_Device_cosh_Accuracy_Positive - float",
|
||||
"Unit_Device_cosh_Accuracy_Positive - double",
|
||||
"Unit_Device_tanh_Accuracy_Positive - float",
|
||||
"Unit_Device_tanh_Accuracy_Positive - double",
|
||||
"Unit_Device_asinh_Accuracy_Positive - float",
|
||||
"Unit_Device_asinh_Accuracy_Positive - double",
|
||||
"Unit_Device_acosh_Accuracy_Positive - float",
|
||||
"Unit_Device_acosh_Accuracy_Positive - double",
|
||||
"Unit_Device_atanh_Accuracy_Positive - float",
|
||||
"Unit_Device_atanh_Accuracy_Positive - double",
|
||||
"Unit_Device_sinpi_Accuracy_Positive - float",
|
||||
"Unit_Device_sinpi_Accuracy_Positive - double",
|
||||
"Unit_Device_cospi_Accuracy_Positive - float",
|
||||
"Unit_Device_cospi_Accuracy_Positive - double",
|
||||
"Unit_Device_tanpi_Accuracy_Positive - float",
|
||||
"Unit_Device_tanpi_Accuracy_Positive - double",
|
||||
"Unit_Device_atan2_Accuracy_Positive - float",
|
||||
"Unit_Device_atan2_Accuracy_Positive - double",
|
||||
"Unit_Device_sincos_Accuracy_Positive - float",
|
||||
"Unit_Device_sincos_Accuracy_Positive - double",
|
||||
"Unit_Device_sincospi_Accuracy_Positive - float",
|
||||
"Unit_Device_sincospi_Accuracy_Positive - double",
|
||||
"Unit_Device_fabs_Accuracy_Positive - float",
|
||||
"Unit_Device_fabs_Accuracy_Positive - double",
|
||||
"Unit_Device_copysign_Accuracy_Positive - float",
|
||||
"Unit_Device_copysign_Accuracy_Positive - double",
|
||||
"Unit_Device_fmax_Accuracy_Positive - float",
|
||||
"Unit_Device_fmax_Accuracy_Positive - double",
|
||||
"Unit_Device_fmin_Accuracy_Positive - float",
|
||||
"Unit_Device_fmin_Accuracy_Positive - double",
|
||||
"Unit_Device_nextafter_Accuracy_Positive - float",
|
||||
"Unit_Device_nextafter_Accuracy_Positive - double",
|
||||
"Unit_Device_fma_Accuracy_Positive - float",
|
||||
"Unit_Device_fma_Accuracy_Positive - double",
|
||||
"Unit_Device_fdividef_Accuracy_Positive",
|
||||
"Unit_Device_isfinite_Accuracy_Positive - float",
|
||||
"Unit_Device_isfinite_Accuracy_Positive - double",
|
||||
"Unit_Device_isinf_Accuracy_Positive - float",
|
||||
"Unit_Device_isinf_Accuracy_Positive - double",
|
||||
"Unit_Device_isnan_Accuracy_Positive - float",
|
||||
"Unit_Device_isnan_Accuracy_Positive - double",
|
||||
"Unit_Device_signbit_Accuracy_Positive - float",
|
||||
"Unit_Device_signbit_Accuracy_Positive - double",
|
||||
"Unit_Device_fmod_Accuracy_Positive - float",
|
||||
"Unit_Device_fmod_Accuracy_Positive - double",
|
||||
"Unit_Device_remainder_Accuracy_Positive - float",
|
||||
"Unit_Device_remainder_Accuracy_Positive - double",
|
||||
"Unit_Device_fdim_Accuracy_Positive - float",
|
||||
"Unit_Device_fdim_Accuracy_Positive - double",
|
||||
"Unit_Device_trunc_Accuracy_Positive - float",
|
||||
"Unit_Device_trunc_Accuracy_Positive - double",
|
||||
"Unit_Device_round_Accuracy_Positive - float",
|
||||
"Unit_Device_round_Accuracy_Positive - double",
|
||||
"Unit_Device_rint_Accuracy_Positive - float",
|
||||
"Unit_Device_rint_Accuracy_Positive - double",
|
||||
"Unit_Device_nearbyint_Accuracy_Positive - float",
|
||||
"Unit_Device_nearbyint_Accuracy_Positive - double",
|
||||
"Unit_Device_ceil_Accuracy_Positive - float",
|
||||
"Unit_Device_ceil_Accuracy_Positive - double",
|
||||
"Unit_Device_floor_Accuracy_Positive - float",
|
||||
"Unit_Device_floor_Accuracy_Positive - double",
|
||||
"Unit_Device_lrint_Accuracy_Positive - float",
|
||||
"Unit_Device_lrint_Accuracy_Positive - double",
|
||||
"Unit_Device_lround_Accuracy_Positive - float",
|
||||
"Unit_Device_lround_Accuracy_Positive - double",
|
||||
"Unit_Device_llrint_Accuracy_Positive - float",
|
||||
"Unit_Device_llrint_Accuracy_Positive - double",
|
||||
"Unit_Device_llround_Accuracy_Positive - float",
|
||||
"Unit_Device_llround_Accuracy_Positive - double",
|
||||
"Unit_Device_remquo_Accuracy_Positive - float",
|
||||
"Unit_Device_remquo_Accuracy_Positive - double",
|
||||
"Unit_Device_modf_Accuracy_Positive - float",
|
||||
"Unit_Device_modf_Accuracy_Positive - double",
|
||||
"=== Below tests cause timeout in stress test of 09/02/24 ===",
|
||||
"Unit_Device___half2half2_Accuracy_Positive",
|
||||
"Unit_Device_make_half2_Accuracy_Positive",
|
||||
"Unit_Device___halves2half2_Accuracy_Positive",
|
||||
"Unit_Device___low2half_Accuracy_Positive",
|
||||
"Unit_Device___high2half_Accuracy_Positive",
|
||||
"Unit_Device___low2half2_Accuracy_Positive",
|
||||
"Unit_Device___high2half2_Accuracy_Positive",
|
||||
"Unit_Device___lowhigh2highlow_Accuracy_Positive",
|
||||
"Unit_Device___lows2half2_Accuracy_Positive",
|
||||
"Unit_Device___highs2half2_Accuracy_Positive",
|
||||
"Unit_Device___float2half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___floats2half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___float22half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___low2float_Accuracy_Positive",
|
||||
"Unit_Device___high2float_Accuracy_Positive",
|
||||
"Unit_Device___half22float2_Accuracy_Positive",
|
||||
"Unit_Device_hcos_Accuracy_Positive",
|
||||
"Unit_Device_h2cos_Accuracy_Positive",
|
||||
"Unit_Device_hsin_Accuracy_Positive",
|
||||
"Unit_Device_h2sin_Accuracy_Positive",
|
||||
"Unit_Device_hexp_Accuracy_Positive",
|
||||
"Unit_Device_h2exp_Accuracy_Positive",
|
||||
"Unit_Device_hexp10_Accuracy_Positive",
|
||||
"Unit_Device_h2exp10_Accuracy_Positive",
|
||||
"Unit_Device_hexp2_Accuracy_Positive",
|
||||
"Unit_Device_h2exp2_Accuracy_Positive",
|
||||
"Unit_Device_hlog_Accuracy_Positive",
|
||||
"Unit_Device_h2log_Accuracy_Positive",
|
||||
"Unit_Device_hlog10_Accuracy_Positive",
|
||||
"Unit_Device_h2log10_Accuracy_Positive",
|
||||
"Unit_Device_hlog2_Accuracy_Positive",
|
||||
"Unit_Device_h2log2_Accuracy_Positive",
|
||||
"Unit_Device_hsqrt_Accuracy_Positive",
|
||||
"Unit_Device_h2sqrt_Accuracy_Positive",
|
||||
"Unit_Device_hceil_Accuracy_Positive",
|
||||
"Unit_Device_h2ceil_Accuracy_Positive",
|
||||
"Unit_Device_hfloor_Accuracy_Positive",
|
||||
"Unit_Device_h2floor_Accuracy_Positive",
|
||||
"Unit_Device_htrunc_Accuracy_Positive",
|
||||
"Unit_Device_h2trunc_Accuracy_Positive",
|
||||
"Unit_Device_hrcp_Accuracy_Positive",
|
||||
"Unit_Device_h2rcp_Accuracy_Positive",
|
||||
"Unit_Device_hrsqrt_Accuracy_Positive",
|
||||
"Unit_Device_h2rsqrt_Accuracy_Positive",
|
||||
"Unit_Device_hrint_Accuracy_Positive",
|
||||
"Unit_Device_h2rint_Accuracy_Positive",
|
||||
"Unit_Device___habs_Accuracy_Positive",
|
||||
"Unit_Device___habs2_Accuracy_Positive",
|
||||
"Unit_Device___hneg_Accuracy_Positive",
|
||||
"Unit_Device___hneg2_Accuracy_Positive",
|
||||
"Unit_Device___hadd_wrapper_Accuracy_Positive",
|
||||
"Unit_Device___hadd2_Accuracy_Positive",
|
||||
"Unit_Device___hadd_sat_Accuracy_Positive",
|
||||
"Unit_Device___hadd2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hsub_Accuracy_Positive",
|
||||
"Unit_Device___hsub2_Accuracy_Positive",
|
||||
"Unit_Device___hsub_sat_Accuracy_Positive",
|
||||
"Unit_Device___hsub2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hmul_Accuracy_Positive",
|
||||
"Unit_Device___hmul2_Accuracy_Positive",
|
||||
"Unit_Device___hmul_sat_Accuracy_Positive",
|
||||
"Unit_Device___hmul2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hdiv_Accuracy_Positive",
|
||||
"Unit_Device___h2div_Accuracy_Positive",
|
||||
"Unit_Device___hfma_Accuracy_Positive",
|
||||
"Unit_Device___hfma2_Accuracy_Positive",
|
||||
"Unit_Device___hfma_sat_Accuracy_Positive",
|
||||
"Unit_Device___hfma2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hisinf_Accuracy_Positive",
|
||||
"Unit_Device___hisinf2_Accuracy_Positive",
|
||||
"Unit_Device___hisnan_Accuracy_Positive",
|
||||
"Unit_Device___hisnan2_Accuracy_Positive",
|
||||
"Unit_Device___heq_Accuracy_Positive",
|
||||
"Unit_Device___hbeq2_Accuracy_Positive",
|
||||
"Unit_Device___hequ_Accuracy_Positive",
|
||||
"Unit_Device___hbequ2_Accuracy_Positive",
|
||||
"Unit_Device___heq2_Accuracy_Positive",
|
||||
"Unit_Device___hequ2_Accuracy_Positive",
|
||||
"Unit_Device___hne_Accuracy_Positive",
|
||||
"Unit_Device___hbne2_Accuracy_Positive",
|
||||
"Unit_Device___hneu_Accuracy_Positive",
|
||||
"Unit_Device___hbneu2_Accuracy_Positive",
|
||||
"Unit_Device___hne2_Accuracy_Positive",
|
||||
"Unit_Device___hneu2_Accuracy_Positive",
|
||||
"Unit_Device___hge_Accuracy_Positive",
|
||||
"Unit_Device___hbge2_Accuracy_Positive",
|
||||
"Unit_Device___hgeu_Accuracy_Positive",
|
||||
"Unit_Device___hbgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hge2_Accuracy_Positive",
|
||||
"Unit_Device___hgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hgt_Accuracy_Positive",
|
||||
"Unit_Device___hbgt2_Accuracy_Positive",
|
||||
"Unit_Device___hgtu_Accuracy_Positive",
|
||||
"Unit_Device___hbgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hgt2_Accuracy_Positive",
|
||||
"Unit_Device___hgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hle_Accuracy_Positive",
|
||||
"Unit_Device___hble2_Accuracy_Positive",
|
||||
"Unit_Device___hleu_Accuracy_Positive",
|
||||
"Unit_Device___hbleu2_Accuracy_Positive",
|
||||
"Unit_Device___hle2_Accuracy_Positive",
|
||||
"Unit_Device___hleu2_Accuracy_Positive",
|
||||
"Unit_Device___hlt_Accuracy_Positive",
|
||||
"Unit_Device___hblt2_Accuracy_Positive",
|
||||
"Unit_Device___hltu_Accuracy_Positive",
|
||||
"Unit_Device___hbltu2_Accuracy_Positive",
|
||||
"Unit_Device___hlt2_Accuracy_Positive",
|
||||
"Unit_Device___hltu2_Accuracy_Positive",
|
||||
"Unit_Device___hmax_Accuracy_Positive",
|
||||
"Unit_Device___hmin_Accuracy_Positive",
|
||||
"Unit_Device___hmax_nan_Accuracy_Positive",
|
||||
"Unit_Device___hmin_nan_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2int_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2short_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_ru_Accuracy_Positive",
|
||||
"Unit_Device___half_as_short_Accuracy_Positive",
|
||||
"Unit_Device___half_as_ushort_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___int2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___short2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___short_as_half_Accuracy_Positive",
|
||||
"Unit_Device___ushort_as_half_Accuracy_Positive",
|
||||
"Unit_Device___float2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___float2half_Accuracy_Positive",
|
||||
"Unit_Device___half2float_Accuracy_Positive",
|
||||
"Unit_Device___frcp_rn_Accuracy_Positive",
|
||||
"Unit_Device___fsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___frsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___expf_Accuracy_Positive",
|
||||
"Unit_Device___exp10f_Accuracy_Positive",
|
||||
"Unit_Device___logf_Accuracy_Positive",
|
||||
"Unit_Device___log2f_Accuracy_Positive",
|
||||
"Unit_Device___log10f_Accuracy_Positive",
|
||||
"Unit_Device___sinf_Accuracy_Positive",
|
||||
"Unit_Device___sincosf_sin_Accuracy_Positive",
|
||||
"Unit_Device___cosf_Accuracy_Positive",
|
||||
"Unit_Device___sincosf_cos_Accuracy_Positive",
|
||||
"Unit_Device___fadd_rn_Accuracy_Positive",
|
||||
"Unit_Device___fsub_rn_Accuracy_Positive",
|
||||
"Unit_Device___fmul_rn_Accuracy_Positive",
|
||||
"Unit_Device___fdiv_rn_Accuracy_Positive",
|
||||
"Unit_Device___fdividef_Accuracy_Positive",
|
||||
"Unit_Device___fmaf_rn_Accuracy_Positive",
|
||||
"Unit_Device___drcp_rn_Accuracy_Positive",
|
||||
"Unit_Device___dsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___dadd_rn_Accuracy_Positive",
|
||||
"Unit_Device___dsub_rn_Accuracy_Positive",
|
||||
"Unit_Device___dmul_rn_Accuracy_Positive",
|
||||
"Unit_Device___ddiv_rn_Accuracy_Positive",
|
||||
"Unit_Device___fma_rn_Accuracy_Positive",
|
||||
"Unit_Device_sqrtf_Accuracy_Positive",
|
||||
"Unit_Device_sqrt_Accuracy_Positive",
|
||||
"Unit_Device_rsqrtf_Accuracy_Positive",
|
||||
"Unit_Device_rsqrt_Accuracy_Positive",
|
||||
"Unit_Device_cbrt_Accuracy_Positive - float",
|
||||
"Unit_Device_cbrt_Accuracy_Positive - double",
|
||||
"Unit_Device_rcbrtf_Accuracy_Positive",
|
||||
"Unit_Device_rcbrt_Accuracy_Positive",
|
||||
"Unit_Device_hypot_Accuracy_Positive - float",
|
||||
"Unit_Device_hypot_Accuracy_Positive - double",
|
||||
"Unit_Device_rhypot_Accuracy_Positive - float",
|
||||
"Unit_Device_rhypot_Accuracy_Positive - double",
|
||||
"Unit_Device_norm3d_Accuracy_Positive - float",
|
||||
"Unit_Device_norm3d_Accuracy_Positive - double",
|
||||
"Unit_Device_rnorm3d_Accuracy_Positive - float",
|
||||
"Unit_Device_rnorm3d_Accuracy_Positive - double",
|
||||
"Unit_Device_norm4d_Accuracy_Positive - float",
|
||||
"Unit_Device_norm4d_Accuracy_Positive - double",
|
||||
"Unit_Device_rnorm4d_Accuracy_Positive - float",
|
||||
"Unit_Device_rnorm4d_Accuracy_Positive - double",
|
||||
"Unit_Device_exp_Accuracy_Positive - float",
|
||||
"Unit_Device_exp_Accuracy_Positive - double",
|
||||
"Unit_Device_exp2_Accuracy_Positive - float",
|
||||
"Unit_Device_exp2_Accuracy_Positive - double",
|
||||
"Unit_Device_expm1_Accuracy_Positive - float",
|
||||
"Unit_Device_expm1_Accuracy_Positive - double",
|
||||
"Unit_Device_exp10f_Accuracy_Positive",
|
||||
"Unit_Device_exp10_Accuracy_Positive",
|
||||
"Unit_Device_frexpf_Accuracy_Positive",
|
||||
"Unit_Device_frexp_Accuracy_Positive",
|
||||
"Unit_Device_pow_Accuracy_Positive - float",
|
||||
"Unit_Device_pow_Accuracy_Positive - double",
|
||||
"Unit_Device_ldexp_Accuracy_Positive - float",
|
||||
"Unit_Device_ldexp_Accuracy_Positive - double",
|
||||
"Unit_Device_powi_Accuracy_Positive - float",
|
||||
"Unit_Device_powi_Accuracy_Positive - double",
|
||||
"Unit_Device_scalbn_Accuracy_Positive - float",
|
||||
"Unit_Device_scalbn_Accuracy_Positive - double",
|
||||
"Unit_Device_scalbln_Accuracy_Positive - float",
|
||||
"Unit_Device_scalbln_Accuracy_Positive - double",
|
||||
"Unit_Device_log_Accuracy_Positive - float",
|
||||
"Unit_Device_log_Accuracy_Positive - double",
|
||||
"Unit_Device_log2_Accuracy_Positive - float",
|
||||
"Unit_Device_log2_Accuracy_Positive - double",
|
||||
"Unit_Device_log10_Accuracy_Positive - float",
|
||||
"Unit_Device_log10_Accuracy_Positive - double",
|
||||
"Unit_Device_log1p_Accuracy_Positive - float",
|
||||
"Unit_Device_log1p_Accuracy_Positive - double",
|
||||
"Unit_Device_logb_Accuracy_Positive - float",
|
||||
"Unit_Device_logb_Accuracy_Positive - double",
|
||||
"Unit_Device_ilogbf_Accuracy_Positive",
|
||||
"Unit_Device_ilogb_Accuracy_Positive",
|
||||
"Unit_Device_erf_Accuracy_Positive - float",
|
||||
"Unit_Device_erf_Accuracy_Positive - double",
|
||||
"Unit_Device_erfc_Accuracy_Positive - float",
|
||||
"Unit_Device_erfc_Accuracy_Positive - double",
|
||||
"Unit_Device_erfinvf_Accuracy_Positive",
|
||||
"Unit_Device_erfinv_Accuracy_Positive",
|
||||
"Unit_Device_erfcinvf_Accuracy_Positive",
|
||||
"Unit_Device_erfcinv_Accuracy_Positive",
|
||||
"Unit_Device_normcdff_Accuracy_Positive",
|
||||
"Unit_Device_normcdf_Accuracy_Positive",
|
||||
"Unit_Device_tgammaf_Accuracy_Limited_Positive",
|
||||
"Unit_Device_tgamma_Accuracy_Limited_Positive",
|
||||
"Unit_Device_lgammaf_Accuracy_Limited_Positive",
|
||||
"Unit_Device_lgamma_Accuracy_Limited_Positive",
|
||||
"Unit_Device_cyl_bessel_i0f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_cyl_bessel_i0_Accuracy_Limited_Positive",
|
||||
"Unit_Device_cyl_bessel_i1f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_cyl_bessel_i1_Accuracy_Limited_Positive",
|
||||
"Unit_Device_y0f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_y0_Accuracy_Limited_Positive",
|
||||
"Unit_Device_y1f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_y1_Accuracy_Limited_Positive",
|
||||
"Unit_Device_ynf_Accuracy_Limited_Positive",
|
||||
"Unit_Device_yn_Accuracy_Limited_Positive",
|
||||
"Unit_Device_j0f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_j0_Accuracy_Limited_Positive",
|
||||
"Unit_Device_j1f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_j1_Accuracy_Limited_Positive",
|
||||
"Unit_Device_jnf_Accuracy_Limited_Positive",
|
||||
"Unit_Device_jn_Accuracy_Limited_Positive",
|
||||
"Unit_Device___double2int_rd_Positive",
|
||||
"Unit_Device___double2int_rn_Positive",
|
||||
"Unit_Device___double2int_ru_Positive",
|
||||
"Unit_Device___double2int_rz_Positive",
|
||||
"Unit_Device___double2int_Negative_RTC",
|
||||
"Unit_Device___double2uint_rd_Positive",
|
||||
"Unit_Device___double2uint_rn_Positive",
|
||||
"Unit_Device___double2uint_ru_Positive",
|
||||
"Unit_Device___double2uint_rz_Positive",
|
||||
"Unit_Device___double2uint_Negative_RTC",
|
||||
"Unit_Device___double2ll_rd_Positive",
|
||||
"Unit_Device___double2ll_rn_Positive",
|
||||
"Unit_Device___double2ll_ru_Positive",
|
||||
"Unit_Device___double2ll_rz_Positive",
|
||||
"Unit_Device___double2ll_Negative_RTC",
|
||||
"Unit_Device___double2ull_rd_Positive",
|
||||
"Unit_Device___double2ull_rn_Positive",
|
||||
"Unit_Device___double2ull_ru_Positive",
|
||||
"Unit_Device___double2ull_rz_Positive",
|
||||
"Unit_Device___double2ull_Negative_RTC",
|
||||
"Unit_Device___double2float_rd_Positive",
|
||||
"Unit_Device___double2float_rn_Positive",
|
||||
"Unit_Device___double2float_ru_Positive",
|
||||
"Unit_Device___double2float_rz_Positive",
|
||||
"Unit_Device___double2float_Negative_RTC",
|
||||
"Unit_Device___double2hiint_Positive",
|
||||
"Unit_Device___double2hiint_Negative_RTC",
|
||||
"Unit_Device___double2loint_Positive",
|
||||
"Unit_Device___double2loint_Negative_RTC",
|
||||
"Unit_Device___double_as_longlong_Positive",
|
||||
"Unit_Device___double_as_longlong_Negative_RTC",
|
||||
"Unit_Device___float2int_rd_Positive",
|
||||
"Unit_Device___float2int_rn_Positive",
|
||||
"Unit_Device___float2int_ru_Positive",
|
||||
"Unit_Device___float2int_rz_Positive",
|
||||
"Unit_Device___float2int_Negative_RTC",
|
||||
"Unit_Device___float2uint_rd_Positive",
|
||||
"Unit_Device___float2uint_rn_Positive",
|
||||
"Unit_Device___float2uint_ru_Positive",
|
||||
"Unit_Device___float2uint_rz_Positive",
|
||||
"Unit_Device___float2uint_Negative_RTC",
|
||||
"Unit_Device___float2ll_rd_Positive",
|
||||
"Unit_Device___float2ll_rn_Positive",
|
||||
"Unit_Device___float2ll_ru_Positive",
|
||||
"Unit_Device___float2ll_rz_Positive",
|
||||
"Unit_Device___float2ll_Negative_RTC",
|
||||
"Unit_Device___float2ull_rd_Positive",
|
||||
"Unit_Device___float2ull_rn_Positive",
|
||||
"Unit_Device___float2ull_ru_Positive",
|
||||
"Unit_Device___float2ull_rz_Positive",
|
||||
"Unit_Device___float2ull_Negative_RTC",
|
||||
"Unit_Device___float_as_int_Positive",
|
||||
"Unit_Device___float_as_int_Negative_RTC",
|
||||
"Unit_Device___float_as_uint_Positive",
|
||||
"Unit_Device___float_as_uint_Negative_RTC",
|
||||
"Unit_Device___int2float_rd_Positive",
|
||||
"Unit_Device___int2float_rn_Positive",
|
||||
"Unit_Device___int2float_ru_Positive",
|
||||
"Unit_Device___int2float_rz_Positive",
|
||||
"Unit_Device_int2float___Negative_RTC",
|
||||
"Unit_Device___uint2float_rd_Positive",
|
||||
"Unit_Device___uint2float_rn_Positive",
|
||||
"Unit_Device___uint2float_ru_Positive",
|
||||
"Unit_Device___uint2float_rz_Positive",
|
||||
"Unit_Device___uint2float_Negative_RTC",
|
||||
"Unit_Device___int2double_rn_Positive",
|
||||
"Unit_Device___int2double_Negative_RTC",
|
||||
"Unit_Device___uint2double_rn_Positive",
|
||||
"Unit_Device___uint2double_Negative_RTC",
|
||||
"Unit_Device___ll2float_rd_Positive",
|
||||
"Unit_Device___ll2float_rn_Positive",
|
||||
"Unit_Device___ll2float_ru_Positive",
|
||||
"Unit_Device___ll2float_rz_Positive",
|
||||
"Unit_Device___ll2float_Negative_RTC",
|
||||
"Unit_Device___ull2float_rd_Positive",
|
||||
"Unit_Device___ull2float_rn_Positive",
|
||||
"Unit_Device___ull2float_ru_Positive",
|
||||
"Unit_Device___ull2float_rz_Positive",
|
||||
"Unit_Device___ull2float_Negative_RTC",
|
||||
"Unit_Device___ll2double_rd_Positive",
|
||||
"Unit_Device___ll2double_rn_Positive",
|
||||
"Unit_Device___ll2double_ru_Positive",
|
||||
"Unit_Device___ll2double_rz_Positive",
|
||||
"Unit_Device___ll2double_Negative_RTC",
|
||||
"Unit_Device___ull2double_rd_Positive",
|
||||
"Unit_Device___ull2double_rn_Positive",
|
||||
"Unit_Device___ull2double_ru_Positive",
|
||||
"Unit_Device___ull2double_rz_Positive",
|
||||
"Unit_Device___ull2double_Negative_RTC",
|
||||
"Unit_Device___int_as_float_Positive",
|
||||
"Unit_Device___int_as_float_Negative_RTC",
|
||||
"Unit_Device___uint_as_float_Positive",
|
||||
"Unit_Device___uint_as_float_Negative_RTC",
|
||||
"Unit_Device___longlong_as_double_Positive",
|
||||
"Unit_Device___longlong_as_double_Negative_RTC",
|
||||
"Unit_Device___hiloint2double_Positive",
|
||||
"Unit_Device___hiloint2double_Negative_RTC",
|
||||
"Unit_atomicAdd_Negative_Parameters_RTC",
|
||||
"Unit_atomicSub_Negative_Parameters_RTC",
|
||||
"Unit_atomicInc_Negative_Parameters_RTC",
|
||||
"Unit_atomicDec_Negative_Parameters_RTC",
|
||||
"Unit_atomicCAS_Negative_Parameters_RTC",
|
||||
"SWDEV-447384, SWDEV-447932: These tests fail in gfx1100, gfx1101 & gfx1102",
|
||||
"Unit_hipFreeAsync_Negative_Parameters",
|
||||
"SWDEV-445928: These tests fail in PSDB stress test on 09/02/2024",
|
||||
"Unit_hipCreateSurfaceObject_Negative_Parameters",
|
||||
"Unit_hipDestroySurfaceObject_Negative_Parameters",
|
||||
"Unit_Device___float2half_rd_Accuracy_Limited_Positive",
|
||||
"Unit_Device___float2half_ru_Accuracy_Limited_Positive",
|
||||
"Unit_Device___float2half_rz_Accuracy_Limited_Positive",
|
||||
"Unit_hipGraphInstantiateWithFlags_StreamCaptureDeviceContextChg",
|
||||
"=== SWDEV-457316 Below tests are disabled temporarily to avoid combined PSDB ===",
|
||||
"Unit_hipGraphAddMemFreeNode_Negative_NotSupported",
|
||||
"=== These tests fail on linux PSDB 21/11/24 ===",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - double",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - float",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - double",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - float",
|
||||
"Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address - double",
|
||||
"Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address - float",
|
||||
"Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address - double",
|
||||
"Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address - float",
|
||||
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address - double",
|
||||
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address - float",
|
||||
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address - double",
|
||||
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address - float",
|
||||
"=== SWDEV-475482 - Disable tests to merge clr change ===",
|
||||
"Unit_hipCreateTextureObject_LinearResource",
|
||||
"Unit_hipCreateTextureObject_Pitch2DResource",
|
||||
"=== SWDEV-454316 : Below tests fail in stress test ===",
|
||||
"Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address - float",
|
||||
"Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address - double",
|
||||
"Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address - float",
|
||||
"Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address - double",
|
||||
"=== SWDEV-511679 : Below tests fail in stress test ===",
|
||||
"Unit_hipIpcOpenMemHandle_Negative_Open_In_Two_Contexts_Same_Device",
|
||||
"Unit_hipIpcCloseMemHandle_Positive_Reference_Counting",
|
||||
"=== SWDEV-517063 Below tests are temporarily disabled due to PSDB failure",
|
||||
"Unit_hipGraphInstantiateWithFlags_FlagAutoFreeOnLaunch_check",
|
||||
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchInLoop",
|
||||
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchFillKernel",
|
||||
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchDoubleKernel",
|
||||
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchMultiProcess",
|
||||
"Unit_hipGraphInstantiateWithFlags_WithDefaultAndAutoFreeOnLaunch",
|
||||
"=== SWDEV-457316 Below test is skipped due ref count logic (Discussed with German) ===",
|
||||
"Unit_hipGraphAddMemAllocNode_Negative_Free_Alloc_Memory_Again",
|
||||
"=== SWDEV-530762 : This test fails in Linux PSDB ===",
|
||||
"Unit_hipDeviceGetGraphMemAttribute_Positive_DoubleMemory",
|
||||
"=== SWDEV-538600 : This test fails in Linux PSDB ===",
|
||||
"Unit_hipMemPoolMaxAlloc",
|
||||
"Unit_hipStreamPerThread_ChildProc",
|
||||
"=== SWDEV-536226 : Below three tests were disabled due to hang issue ===",
|
||||
"Unit_hipGetLastError_KernelFailure_ValidAndInvalidOperations",
|
||||
"Unit_hipGetLastError_KernelFailure_TwoDevices",
|
||||
"Unit_hipGetLastError_KernelFailure_TwoStreams",
|
||||
"=== Enable the below test when multi-device graph launches are fully supported ===",
|
||||
"Unit_hipGraphInstantiateWithFlags_DependencyGraphDeviceCtxtChg",
|
||||
#endif
|
||||
#if defined gfx90a || defined gfx942 || defined gfx950
|
||||
"=== SWDEV-443630 : Below test failed in stress test on 19/01/24 ===",
|
||||
"Unit_Multi_Grid_Group_Positive_Sync",
|
||||
"Unit_Warp_Shfl_Up_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_Up_Positive_Basic - unsigned int",
|
||||
"Unit_Warp_Shfl_Up_Positive_Basic - long",
|
||||
"Unit_Warp_Shfl_Up_Positive_Basic - unsigned long",
|
||||
"Unit_Warp_Shfl_Up_Positive_Basic - long long",
|
||||
"Unit_Warp_Shfl_Up_Positive_Basic - unsigned long long",
|
||||
"Unit_Warp_Shfl_Up_Positive_Basic - float",
|
||||
"Unit_Warp_Shfl_Up_Positive_Basic - double",
|
||||
"Unit_Warp_Shfl_Up_Positive_Basic - __half",
|
||||
"Unit_Warp_Shfl_Up_Positive_Basic - __half2",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - unsigned int",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - long",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - unsigned long",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - long long",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - unsigned long long",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - float",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - double",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - __half",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - __half2",
|
||||
"Unit_Device_norm_Sanity_Positive - float",
|
||||
"Unit_Device_norm_Sanity_Positive - double",
|
||||
"Unit_Device_rnorm_Sanity_Positive - float",
|
||||
"Unit_Device_rnorm_Sanity_Positive - double",
|
||||
"Unit_Device___float2half_rd_SmallVals_Sanity_Positive",
|
||||
"Unit_Device___float2half_ru_SmallVals_Sanity_Positive",
|
||||
"Unit_Device___float2half_rz_SmallVals_Sanity_Positive",
|
||||
"Unit_safeAtomicMin_Positive_SameAddress - float",
|
||||
#endif
|
||||
#if defined gfx1030
|
||||
"=== SWDEV-445961: These tests hang in PSDB stress test on 09/02/2024 ===",
|
||||
"Unit_hipStreamBeginCapture_hipStreamPerThread",
|
||||
#endif
|
||||
#if defined gfx1200 || defined gfx1201
|
||||
"=== SWDEV-470751 : Fine Grain memory is MTYPE_NC due to HW bug.",
|
||||
"Unit_hipEventCreateWithFlags_DisableSystemFence_CohHstMem",
|
||||
#endif
|
||||
"=== Following tests disabled as it should be a local perf test",
|
||||
"Performance_hipExtLaunchKernelGGL_QueryGPUFrequency",
|
||||
"End of json"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,998 @@
|
||||
#define COMMON
|
||||
{
|
||||
"Info": [
|
||||
"File generated for commit on below mentioned date and time",
|
||||
__DATE__,
|
||||
__TIME__,
|
||||
GITHASH
|
||||
],
|
||||
"DisabledTests": [
|
||||
#if defined COMMON
|
||||
"Unit_hipMallocFromPoolAsync_MThread_MaxThresh",
|
||||
"Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool",
|
||||
"Unit_hipMemPoolTrimTo_Multithreaded",
|
||||
"Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU",
|
||||
"Unit_hipMalloc_CoherentTst",
|
||||
"Unit_hipGraphAddHostNode_ClonedGraphwithHostNode",
|
||||
"Unit_hipEventIpc",
|
||||
"Unit_hipMalloc3D_Negative",
|
||||
"Unit_hipMemPoolApi_BasicAlloc",
|
||||
"Unit_hipMemPoolApi_BasicTrim",
|
||||
"Unit_hipMemPoolApi_BasicReuse",
|
||||
"Unit_hipMemPoolApi_Opportunistic",
|
||||
"Unit_hipMalloc3D_ValidatePitch",
|
||||
"Unit_hipMemAllocPitch_ValidatePitch",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional",
|
||||
"Unit_hipMallocManaged_CoherentTstWthAdvise",
|
||||
"Unit_hipMallocManaged_Advanced",
|
||||
"Unit_hipMemRangeGetAttribute_NegativeTests",
|
||||
"Unit_hipMemRangeGetAttribute_AccessedBy1",
|
||||
"Unit_hipMemRangeGetAttribte_3",
|
||||
"Unit_hipMemRangeGetAttribute_4",
|
||||
"Unit_hipMemRangeGetAttribute_PrefetchAndGtAttr",
|
||||
"Unit_hipMemAdvise_TstFlags",
|
||||
"Unit_hipMemAdvise_PrefrdLoc",
|
||||
"Unit_hipMemAdvise_ReadMostly",
|
||||
"Unit_hipMemAdvise_TstFlgOverrideEffect",
|
||||
"Unit_hipMemAdvise_TstAccessedByFlg",
|
||||
"Unit_hipMemAdvise_TstAccessedByFlg4",
|
||||
"Unit_hipMemAdvise_TstMemAdvisePrefrdLoc",
|
||||
"Unit_hipMemAdvise_TstMemAdviseMultiFlag",
|
||||
"Unit_hipMemAdvise_ReadMosltyMgpuTst",
|
||||
"Unit_hipMemAdvise_TstSetUnsetPrfrdLoc",
|
||||
"Unit_hipMallocManaged_DeviceContextChange - unsigned char",
|
||||
"Unit_hipMallocManaged_DeviceContextChange - int",
|
||||
"Unit_hipMallocManaged_DeviceContextChange - float",
|
||||
"Unit_hipMallocManaged_DeviceContextChange - double",
|
||||
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
|
||||
"Unit_hipStreamPerThread_StrmWaitEvt",
|
||||
"Unit_hipMemGetInfo_DifferentMallocSmall",
|
||||
"Unit_hipMemGetInfo_MallocArray - int",
|
||||
"Unit_hipMemGetInfo_MallocArray - int4",
|
||||
"Unit_hipMemGetInfo_MallocArray - char",
|
||||
"Unit_hipMemGetInfo_Malloc3D",
|
||||
"Unit_hipMemGetInfo_Malloc3DArray - char",
|
||||
"Unit_hipMemGetInfo_Malloc3DArray - int",
|
||||
"Unit_hipMemGetInfo_Malloc3DArray - int4",
|
||||
"Unit_hipMemGetInfo_ParaSmall",
|
||||
"Unit_hipMemGetInfo_ParaMultiSmall",
|
||||
"Unit_hipMultiThreadDevice_NearZero",
|
||||
"Unit_hipStreamPerThread_DeviceReset_1",
|
||||
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
|
||||
"Unit_hipStreamPerThread_StrmWaitEvt",
|
||||
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional",
|
||||
"Unit_hipStreamWaitEvent_DifferentStreams",
|
||||
"Unit_hipStreamQuery_WithFinishedWork",
|
||||
"SWDEV-347670 - blocking tests have TDR, causing hangs",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_Mask_Gte",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_1",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_2",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_Mask_And",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_NoMask_Eq",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_NoMask_Gte",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_NoMask_And",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_Mask_Gte_1",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_Mask_Gte_2",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_Mask_Eq_1",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_Mask_Eq_2",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_Mask_And",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Gte",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Eq",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_NoMask_Nor",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_NoMask_And",
|
||||
"Unit_hipGetDeviceFlags_Positive_Context",
|
||||
"Unit_hipDeviceGetPCIBusId_Negative_PartialFill",
|
||||
"Unit_hipDeviceGetSharedMemConfig_Positive_Basic",
|
||||
"Unit_hipDeviceGetSharedMemConfig_Positive_Threaded",
|
||||
"Unit_hipDeviceReset_Positive_Basic",
|
||||
"Unit_hipDeviceReset_Positive_Threaded",
|
||||
"Unit_hipInit_Negative",
|
||||
"Unit_hipGraphMemcpyNodeSetParams_Functional",
|
||||
"Unit_hipGraphNodeGetDependentNodes_Functional",
|
||||
"Unit_hipGraphNodeGetDependencies_Functional",
|
||||
"Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology",
|
||||
"Unit_hipGraphAddEventRecordNode_MultipleRun",
|
||||
"Unit_hipGraphAddEventRecordNode_Functional_ElapsedTime",
|
||||
"Unit_hipStreamBeginCapture_captureComplexGraph",
|
||||
"Note: needs to be enabled when streamPerThread issues are fixed",
|
||||
"Unit_hipStreamSynchronize_NullStreamAndStreamPerThread",
|
||||
"Note: intermittent Seg fault failure ",
|
||||
"Unit_hipGraphAddEventRecordNode_Functional_WithoutFlags",
|
||||
"Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph",
|
||||
"Unit_hipFuncSetCacheConfig_Positive_Basic",
|
||||
"Unit_hipFuncSetCacheConfig_Negative_Parameters",
|
||||
"Unit_hipFuncSetSharedMemConfig_Positive_Basic",
|
||||
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout",
|
||||
"Unit_hipEventCreateWithFlags_DisableSystemFence_HstVisMem",
|
||||
"Unit_hipEventCreateWithFlags_DefaultFlg_HstVisMem",
|
||||
"Unit_hipEventCreateWithFlags_DisableSystemFence_NonCohHstMem",
|
||||
"Unit_hipEventCreateWithFlags_DefaultFlg_NonCohHstMem",
|
||||
"Unit_hipEventCreateWithFlags_DisableSystemFence_CohHstMem",
|
||||
"Unit_hipEventCreateWithFlags_DefaultFlg_CohHstMem",
|
||||
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
|
||||
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Positive_Basic",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Positive_Basic",
|
||||
"Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Positive_Basic",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Positive_Basic",
|
||||
"Unit_hipKernelNameRef_Negative_Parameters",
|
||||
"Unit_hipKernelNameRef_Positive_Basic",
|
||||
"Unit_hipMemAdvise_No_Flag_Interference",
|
||||
"Unit_hipGraphAddEventRecordNode_Functional_WithoutFlags",
|
||||
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep",
|
||||
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ClonedGrph",
|
||||
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ChldNode",
|
||||
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Array",
|
||||
"Unit_hipDrvMemcpy3DAsync_Positive_Array",
|
||||
"Unit_hipMemGetAddressRange_Positive",
|
||||
"Note: devicelib hangs and failures",
|
||||
"Unit_deviceAllocation_Malloc_PerThread_PrimitiveDataType",
|
||||
"Unit_deviceAllocation_New_PerThread_PrimitiveDataType",
|
||||
"Unit_deviceAllocation_Malloc_PerThread_StructDataType",
|
||||
"Unit_deviceAllocation_New_PerThread_StructDataType",
|
||||
"Unit_deviceAllocation_Malloc_AcrossKernels",
|
||||
"Unit_deviceAllocation_New_AcrossKernels",
|
||||
"Unit_deviceAllocation_Malloc_SingleCodeObj",
|
||||
"Unit_deviceAllocation_New_SingleCodeObj",
|
||||
"Unit_deviceAllocation_Malloc_PerThread_Graph",
|
||||
"Unit_deviceAllocation_New_PerThread_Graph",
|
||||
"Unit_deviceAllocation_Malloc_DeviceFunc",
|
||||
"Unit_deviceAllocation_VirtualFunction",
|
||||
"Unit_deviceAllocation_Malloc_MulKernels_MulThreads",
|
||||
"Unit_deviceAllocation_New_MulKernels_MulThreads",
|
||||
"Unit_deviceAllocation_Malloc_MulCodeObj",
|
||||
"Unit_deviceAllocation_New_MulCodeObj",
|
||||
"Unit_deviceAllocation_New_DeviceFunc",
|
||||
"====================================================",
|
||||
"Note: this tests were disabled because some seemed to hang the machine on Windows with Navi32;",
|
||||
"all the ones calling TestMemoryAcrossMulKernels()/TestMemoryAcrossMulKernelsUsingGraph() were disabled",
|
||||
"Unit_deviceAllocation_InOneThread_AccessInAllThreads",
|
||||
"Unit_deviceAllocation_Malloc_AcrossKernels",
|
||||
"Unit_deviceAllocation_New_AcrossKernels",
|
||||
"Unit_deviceAllocation_Malloc_ComplexDataType",
|
||||
"Unit_deviceAllocation_New_ComplexDataType",
|
||||
"Unit_deviceAllocation_Malloc_UnionType",
|
||||
"Unit_deviceAllocation_New_UnionType",
|
||||
"Unit_deviceAllocation_Malloc_SingleCodeObj",
|
||||
"Unit_deviceAllocation_New_SingleCodeObj",
|
||||
"Unit_deviceAllocation_Malloc_PerThread_Graph",
|
||||
"Unit_deviceAllocation_New_PerThread_Graph",
|
||||
"====================================================",
|
||||
"Unit_hipGraphAddEventRecordNode_MultipleRun",
|
||||
"Unit_hipDeviceGetPCIBusId_Negative_PartialFill",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Nor",
|
||||
"Unit_hipStreamQuery_WithFinishedWork",
|
||||
"Unit_hipLaunchHostFunc_Graph",
|
||||
"Unit_hipLaunchHostFunc_KernelHost",
|
||||
"Unit_ChannelDescriptor_Positive_16BitFloatingPoint",
|
||||
"Unit_hipStreamSetCaptureDependencies_Positive_Functional",
|
||||
"Note: Following four tests disabled due to defect - EXSWHTEC-203",
|
||||
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint16_t",
|
||||
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint32_t",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-207",
|
||||
"Unit_hipGraphExecMemsetNodeSetParams_Negative_Updating_Non1D_Node",
|
||||
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint8_t",
|
||||
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint16_t",
|
||||
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint32_t",
|
||||
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint8_t",
|
||||
"Unit_hipDeviceGetUuid_Positive",
|
||||
"Disabling test tracked SWDEV-394199",
|
||||
"Unit_hipStreamCreateWithPriority_MulthreadNonblockingflag",
|
||||
"SWDEV-396617 ExecMemcpyNodeSetParamsFromSymbol fails in direction",
|
||||
"SWDEV-396616 hipMemMap returns invalid error",
|
||||
"Unit_hipMemVmm_Basic",
|
||||
"SWDEV-396615 mGPUs not considered correctly",
|
||||
"Unit_hipManagedKeyword_MultiGpu",
|
||||
"Disabling test tracked SWDEV-391555",
|
||||
"Unit_hipMemcpyPeer_Positive_ZeroSize",
|
||||
"Unit_hipMemcpyPeerAsync_Positive_ZeroSize",
|
||||
"SWDEV-400049 tdr intermittently",
|
||||
"Unit_hipMemsetDSync – init16_t",
|
||||
"Unit_hipStreamAddCallback_StrmSyncTiming",
|
||||
"SWDEV-402082 - PAL Backend fails to reserve address on GPU except first one",
|
||||
"Unit_hipGraphInstantiateWithFlags_FlagAutoFreeOnLaunch_check",
|
||||
"SWDEV-398981 fails in stress test",
|
||||
"Unit_hipStreamCreateWithPriority_MulthreadDefaultflag",
|
||||
"Disabling below tests temporarily due to change in API behavior",
|
||||
"Unit_hipMemPrefetchAsync_NonPageSz",
|
||||
"Unit_hipStreamCreateWithFlags_DefaultStreamInteraction",
|
||||
"SWDEV-402054 fails in external github build",
|
||||
"Unit_hipEventDestroy_WithWaitingStream",
|
||||
"Note: UUID returned empty on some windows nodes",
|
||||
"Unit_hipDeviceGetUuid_Positive",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/96 ===",
|
||||
"Unit_hipHostGetDevicePointer_Negative",
|
||||
"Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/18 ===",
|
||||
"Unit_hipMemcpyAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyDtoHAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyHtoDAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyDtoDAsync_Negative_Parameters",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/92 ===",
|
||||
"Unit_hipGetChannelDesc_Negative_Parameters",
|
||||
"=== SWDEV-431191:Below tests failed in stress test on 03/11/23 ===",
|
||||
"Unit_hipHostMalloc_AllocateMoreThanAvailGPUMemory",
|
||||
"Unit_hipHostMalloc_AllocateUseMoreThanAvailGPUMemory",
|
||||
"=== SWDEV-432250:Below tests failed in stress test on 10/11/23 ===",
|
||||
"Unit_hipVectorTypes_test_on_device",
|
||||
"=== Below test is disabled due to defect EXSWHTEC-347 ===",
|
||||
"Unit_hipPointerSetAttribute_Positive_SyncMemops",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-241",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-242",
|
||||
"Unit_hipFuncGetAttributes_Positive_Basic",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-243",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-244",
|
||||
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
|
||||
"Unit_hipMemAddressFree_negative",
|
||||
"=== Below 2 tests are disable due to defect EXSWHTEC-369 ===",
|
||||
"Unit_Device_ilogbf_Accuracy_Positive",
|
||||
"Unit_Device_ilogb_Accuracy_Positive",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-245",
|
||||
"Unit_hipMemAddressFree_negative",
|
||||
"Unit_hipMemAddressReserve_AlignmentTest",
|
||||
"Unit_hipGraphAddMemcpyNode_Negative_Parameters",
|
||||
"Unit_hipMemCreate_ChkWithKerLaunch",
|
||||
"Unit_hipMemCreate_MapNonContiguousChunks",
|
||||
"Unit_hipMemMap_MapPartialPhysicalMem",
|
||||
"Unit_hipMemMap_VMMMemoryReuse_MultiGPU",
|
||||
"Unit_hipMemSetAccess_SetGet",
|
||||
"Unit_hipMemSetAccess_MultDevSetGet",
|
||||
"Unit_hipMemSetAccess_EntireVMMRangeSetGet",
|
||||
"Unit_hipMemGetAccess_NegTst",
|
||||
"Unit_hipMemSetAccess_FuncTstOnMultDev",
|
||||
"Unit_hipMemSetAccess_Vmm2UnifiedMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2DevMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy",
|
||||
"Unit_hipMemSetAccess_GrowVMM",
|
||||
"Unit_hipMemSetAccess_negative",
|
||||
"=== SWDEV-434171: Below tests took long time to complete in stress test on 17/11/23 ===",
|
||||
"Unit_Warp_Shfl_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned int",
|
||||
"Unit_Warp_Shfl_Positive_Basic - long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - long long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned long long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - float",
|
||||
"Unit_Warp_Shfl_Positive_Basic - double",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned int",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - long long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - float",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - double",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - __half",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - __half2",
|
||||
"Unit_Coalesced_Group_Sync_Positive_Basic - uint16_t",
|
||||
"Unit_Coalesced_Group_Sync_Positive_Basic - uint32_t",
|
||||
"=== SWDEV-434878: Below tests failed in stress test on 24/11/23 ===",
|
||||
"Unit_hipGraphUpload_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Positive_RangeValidation",
|
||||
"=== SWDEV-435667: Below tests failing randomly in stress test on 01/12/23 ===",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - int",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - float",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - double",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - float",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - double",
|
||||
"=== SWDEV-435667: Below tests failing randomly in stress test on 08/12/23 ===",
|
||||
"Unit_hipMemPoolSetAccess_Negative_Parameters",
|
||||
"SWDEV-438524: Below tests taking long time to run in stress test on 15/12/23 ===",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - double",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - double",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - double",
|
||||
"SWDEV-438524: Below tests causing TDR & machine down in stress test on 15/12/23 ===",
|
||||
"Unit_hipExtModuleLaunchKernel_Functional",
|
||||
"Unit_hipExtLaunchKernelGGL_Functional",
|
||||
"SWDEV-413997: VMM test still failing in windows",
|
||||
"Unit_hipMemSetAccess_ChangeAccessProp",
|
||||
"SWDEV-444041: These tests fail randomly in gfx1030 MGU",
|
||||
"Unit_hipMemMap_SameMemoryReuse",
|
||||
"Unit_hipMemMap_negative",
|
||||
"Unit_hipMemSetAccess_Vmm2PeerDevMemCpy",
|
||||
"SWDEV-444041: These tests fail in gfx1100 MGPU",
|
||||
"Unit_hipMemMap_PhysicalMemoryReuse_SingleGPU",
|
||||
"Unit_hipMemMap_PhysicalMemory_Map2MultVMMs",
|
||||
"Unit_hipMemMap_PhysicalMemoryReuse_MultiDev",
|
||||
"Unit_hipMemSetAccess_Vmm2VMMMemCpy",
|
||||
"SWDEV-444031: This test fails in gfx1101 MGPU",
|
||||
"Unit_hipMemSetAccess_Multithreaded",
|
||||
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/274 ===",
|
||||
"Unit_Printf_flags_Sanity_Positive",
|
||||
"Unit_Printf_length_Sanity_Positive",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
|
||||
"=== Below tests are failing PSDB ===",
|
||||
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_3",
|
||||
"Unit_hipGraphAddMemAllocNode_Positive_FreeInGraph",
|
||||
"Unit_hipFreeAsync_Negative_Parameters",
|
||||
"Unit_hipMallocMipmappedArray_DiffSizes",
|
||||
"Unit_hipMallocMipmappedArray_MultiThread",
|
||||
"Unit_hipMallocMipmappedArray_Negative_InvalidFlags",
|
||||
"Unit_hipGetMipmappedArrayLevel_Negative",
|
||||
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
|
||||
"Unit_hipFreeMipmappedArrayMultiTArray - int",
|
||||
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_Thread_Block_Tile_Dynamic_Getters_Positive_Basic",
|
||||
"Performance_hipMemcpy2D_HostToHost",
|
||||
"Performance_hipMemcpy2DAsync_HostToHost",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_2D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_1D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_2D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_1D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMalloc_1D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMallocManaged",
|
||||
"Unit_hipModuleLaunchKernel_Negative_Parameters",
|
||||
"Unit_hipExtModuleLaunchKernel_Negative_Parameters",
|
||||
"Unit_hipLaunchKernel_Negative_Parameters",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - char",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - short",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - int",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - float",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - char",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - short",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - int",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - float",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_hipModuleLaunchKernel_Negative_Parameters",
|
||||
"Unit_hipModuleGetTexRef_Positive_Basic",
|
||||
"Unit_Kernel_Launch_bounds_Negative_OutOfBounds",
|
||||
"Unit_Kernel_Launch_bounds_Negative_Parameters_RTC",
|
||||
"Unit_AtomicBuiltins_Negative_Parameters_RTC",
|
||||
"Unit_hipMemcpy2D_H2D-D2D-D2H - int",
|
||||
"Unit_hipMemcpy2D_H2D-D2D-D2H - float",
|
||||
"Unit_hipMemcpy2D_H2D-D2D-D2H - double",
|
||||
"Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset - int",
|
||||
"Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset - float",
|
||||
"Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset - double",
|
||||
"Unit_hipMemcpy2D_H2D-D2D-D2H_Managed_WithOffset - int",
|
||||
"Unit_hipMemcpy2D_H2D-D2D-D2H_Managed_WithOffset - float",
|
||||
"Unit_hipMemcpy2D_H2D-D2D-D2H_Managed_WithOffset - double",
|
||||
"Unit_hipMemcpy2DAsync_Host&PinnedMem - int",
|
||||
"Unit_hipMemcpy2DAsync_Host&PinnedMem - float",
|
||||
"Unit_hipMemcpy2DAsync_Host&PinnedMem - double",
|
||||
"Unit_hipMemPoolGetAccess_Negative_Parameters",
|
||||
"Unit_hipMemPoolSetAttribute_Negative_Parameters",
|
||||
"Unit_hipMemPoolGetAttribute_Negative_Parameters",
|
||||
"Unit_Thread_Block_Getters_Positive_Basic",
|
||||
"Unit_hipMemset3DAsync_capturehipMemset3DAsync",
|
||||
"Unit_hipMemset2DAsync_capturehipMemset2DAsync",
|
||||
"Unit_hipOccupancyMaxPotBlkSizeVariableSMemWithFlags_Functional",
|
||||
"Unit_hipDynamicShared",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - double",
|
||||
"Unit_atomicExch_Positive - int",
|
||||
"Unit_atomicExch_Positive - unsigned int",
|
||||
"Unit_atomicExch_Positive - unsigned long",
|
||||
"Unit_atomicExch_Positive - unsigned long long",
|
||||
"Unit_atomicExch_Positive - float",
|
||||
"Unit_atomicExch_Positive - double",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - int",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned int",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long long",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - float",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - double",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - int",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned int",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long long",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - float",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - double",
|
||||
"Unit___syncthreads_Positive_Basic",
|
||||
"Unit___syncthreads_count_Positive_Basic",
|
||||
"Unit___syncthreads_and_Positive_Basic",
|
||||
"Unit___syncthreads_or_Positive_Basic",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-151",
|
||||
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-152",
|
||||
"Unit_hipModuleUnload_Negative_Module_Is_Nullptr",
|
||||
"Note: Following two tests disabled due to defect - EXSWHTEC-153",
|
||||
"Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String",
|
||||
"Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-163",
|
||||
"Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-164",
|
||||
"Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-165",
|
||||
"Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-166",
|
||||
"Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-167",
|
||||
"Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String",
|
||||
"Below tests hang in Jenkins PSDB",
|
||||
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint8_t",
|
||||
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint16_t",
|
||||
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint32_t",
|
||||
"Unit_coalesced_groups",
|
||||
"Unit_coalesced_groups_shfl_down",
|
||||
"Unit_coalesced_groups_shfl_up",
|
||||
"=== SWDEV-441604: Below tests take long time to run in stress test on 12/01/24 ===",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - float",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - double",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - float",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - double",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - int",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned int",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - long",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned long",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - float",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - double",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - float",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - double",
|
||||
"Unit_Thread_Block_Tile_Getters_Positive_Basic",
|
||||
"SWDEV-446588 - Disable graph multi gpu testcases until graph has support for it",
|
||||
"Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed",
|
||||
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice",
|
||||
"Unit_hipGraphUpload_Functional_multidevice_test",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
|
||||
"Unit_StaticAssert_Positive_Basic_RTC",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
"=== Below tests are disabled due to defect EXSWHTEC-356 ===",
|
||||
"Unit_Device___hisinf2_Accuracy_Positive",
|
||||
"Unit_Device___hisnan2_Accuracy_Positive",
|
||||
"Unit_Device___hbequ2_Accuracy_Positive",
|
||||
"Unit_Device___hne_Accuracy_Positive",
|
||||
"Unit_Device___hne2_Accuracy_Positive",
|
||||
"Unit_Device___hbne2_Accuracy_Positive",
|
||||
"Unit_Device___hbgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hbgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hbleu2_Accuracy_Positive",
|
||||
"Unit_Device___hbltu2_Accuracy_Positive",
|
||||
"=== Below 4 tests are disable due to defect EXSWHTEC-355 ===",
|
||||
"Unit_Device___hadd_Sanity_Positive",
|
||||
"Unit_Device___uhadd_Sanity_Positive",
|
||||
"Unit_Device___rhadd_Sanity_Positive",
|
||||
"Unit_Device___urhadd_Sanity_Positive",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint8_t",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint16_t",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint32_t",
|
||||
"Below tests failed in stress test of 25/01/24 ===",
|
||||
"Unit_atomicAnd_Negative_Parameters_RTC",
|
||||
"Unit_atomicOr_Negative_Parameters_RTC",
|
||||
"Unit_atomicXor_Negative_Parameters_RTC",
|
||||
"Unit_atomicMin_Negative_Parameters_RTC",
|
||||
"Unit_atomicMax_Negative_Parameters_RTC",
|
||||
"=== Below tests cause timeout in stress test of 09/02/24 ===",
|
||||
"Unit_Device___half2half2_Accuracy_Positive",
|
||||
"Unit_Device_make_half2_Accuracy_Positive",
|
||||
"Unit_Device___halves2half2_Accuracy_Positive",
|
||||
"Unit_Device___low2half_Accuracy_Positive",
|
||||
"Unit_Device___high2half_Accuracy_Positive",
|
||||
"Unit_Device___low2half2_Accuracy_Positive",
|
||||
"Unit_Device___high2half2_Accuracy_Positive",
|
||||
"Unit_Device___lowhigh2highlow_Accuracy_Positive",
|
||||
"Unit_Device___lows2half2_Accuracy_Positive",
|
||||
"Unit_Device___highs2half2_Accuracy_Positive",
|
||||
"Unit_Device___float2half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___floats2half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___float22half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___low2float_Accuracy_Positive",
|
||||
"Unit_Device___high2float_Accuracy_Positive",
|
||||
"Unit_Device___half22float2_Accuracy_Positive",
|
||||
"Unit_Device_hcos_Accuracy_Positive",
|
||||
"Unit_Device_h2cos_Accuracy_Positive",
|
||||
"Unit_Device_hsin_Accuracy_Positive",
|
||||
"Unit_Device_h2sin_Accuracy_Positive",
|
||||
"Unit_Device_hexp_Accuracy_Positive",
|
||||
"Unit_Device_h2exp_Accuracy_Positive",
|
||||
"Unit_Device_hexp10_Accuracy_Positive",
|
||||
"Unit_Device_h2exp10_Accuracy_Positive",
|
||||
"Unit_Device_hexp2_Accuracy_Positive",
|
||||
"Unit_Device_h2exp2_Accuracy_Positive",
|
||||
"Unit_Device_hlog_Accuracy_Positive",
|
||||
"Unit_Device_h2log_Accuracy_Positive",
|
||||
"Unit_Device_hlog10_Accuracy_Positive",
|
||||
"Unit_Device_h2log10_Accuracy_Positive",
|
||||
"Unit_Device_hlog2_Accuracy_Positive",
|
||||
"Unit_Device_h2log2_Accuracy_Positive",
|
||||
"Unit_Device_hsqrt_Accuracy_Positive",
|
||||
"Unit_Device_h2sqrt_Accuracy_Positive",
|
||||
"Unit_Device_hceil_Accuracy_Positive",
|
||||
"Unit_Device_h2ceil_Accuracy_Positive",
|
||||
"Unit_Device_hfloor_Accuracy_Positive",
|
||||
"Unit_Device_h2floor_Accuracy_Positive",
|
||||
"Unit_Device_htrunc_Accuracy_Positive",
|
||||
"Unit_Device_h2trunc_Accuracy_Positive",
|
||||
"Unit_Device_hrcp_Accuracy_Positive",
|
||||
"Unit_Device_h2rcp_Accuracy_Positive",
|
||||
"Unit_Device_hrsqrt_Accuracy_Positive",
|
||||
"Unit_Device_h2rsqrt_Accuracy_Positive",
|
||||
"Unit_Device_hrint_Accuracy_Positive",
|
||||
"Unit_Device_h2rint_Accuracy_Positive",
|
||||
"Unit_Device___habs_Accuracy_Positive",
|
||||
"Unit_Device___habs2_Accuracy_Positive",
|
||||
"Unit_Device___hneg_Accuracy_Positive",
|
||||
"Unit_Device___hneg2_Accuracy_Positive",
|
||||
"Unit_Device___hadd_wrapper_Accuracy_Positive",
|
||||
"Unit_Device___hadd2_Accuracy_Positive",
|
||||
"Unit_Device___hadd_sat_Accuracy_Positive",
|
||||
"Unit_Device___hadd2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hsub_Accuracy_Positive",
|
||||
"Unit_Device___hsub2_Accuracy_Positive",
|
||||
"Unit_Device___hsub_sat_Accuracy_Positive",
|
||||
"Unit_Device___hsub2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hmul_Accuracy_Positive",
|
||||
"Unit_Device___hmul2_Accuracy_Positive",
|
||||
"Unit_Device___hmul_sat_Accuracy_Positive",
|
||||
"Unit_Device___hmul2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hdiv_Accuracy_Positive",
|
||||
"Unit_Device___h2div_Accuracy_Positive",
|
||||
"Unit_Device___hfma_Accuracy_Positive",
|
||||
"Unit_Device___hfma2_Accuracy_Positive",
|
||||
"Unit_Device___hfma_sat_Accuracy_Positive",
|
||||
"Unit_Device___hfma2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hisinf_Accuracy_Positive",
|
||||
"Unit_Device___hisinf2_Accuracy_Positive",
|
||||
"Unit_Device___hisnan_Accuracy_Positive",
|
||||
"Unit_Device___hisnan2_Accuracy_Positive",
|
||||
"Unit_Device___heq_Accuracy_Positive",
|
||||
"Unit_Device___hbeq2_Accuracy_Positive",
|
||||
"Unit_Device___hequ_Accuracy_Positive",
|
||||
"Unit_Device___hbequ2_Accuracy_Positive",
|
||||
"Unit_Device___heq2_Accuracy_Positive",
|
||||
"Unit_Device___hequ2_Accuracy_Positive",
|
||||
"Unit_Device___hne_Accuracy_Positive",
|
||||
"Unit_Device___hbne2_Accuracy_Positive",
|
||||
"Unit_Device___hneu_Accuracy_Positive",
|
||||
"Unit_Device___hbneu2_Accuracy_Positive",
|
||||
"Unit_Device___hne2_Accuracy_Positive",
|
||||
"Unit_Device___hneu2_Accuracy_Positive",
|
||||
"Unit_Device___hge_Accuracy_Positive",
|
||||
"Unit_Device___hbge2_Accuracy_Positive",
|
||||
"Unit_Device___hgeu_Accuracy_Positive",
|
||||
"Unit_Device___hbgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hge2_Accuracy_Positive",
|
||||
"Unit_Device___hgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hgt_Accuracy_Positive",
|
||||
"Unit_Device___hbgt2_Accuracy_Positive",
|
||||
"Unit_Device___hgtu_Accuracy_Positive",
|
||||
"Unit_Device___hbgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hgt2_Accuracy_Positive",
|
||||
"Unit_Device___hgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hle_Accuracy_Positive",
|
||||
"Unit_Device___hble2_Accuracy_Positive",
|
||||
"Unit_Device___hleu_Accuracy_Positive",
|
||||
"Unit_Device___hbleu2_Accuracy_Positive",
|
||||
"Unit_Device___hle2_Accuracy_Positive",
|
||||
"Unit_Device___hleu2_Accuracy_Positive",
|
||||
"Unit_Device___hlt_Accuracy_Positive",
|
||||
"Unit_Device___hblt2_Accuracy_Positive",
|
||||
"Unit_Device___hltu_Accuracy_Positive",
|
||||
"Unit_Device___hbltu2_Accuracy_Positive",
|
||||
"Unit_Device___hlt2_Accuracy_Positive",
|
||||
"Unit_Device___hltu2_Accuracy_Positive",
|
||||
"Unit_Device___hmax_Accuracy_Positive",
|
||||
"Unit_Device___hmin_Accuracy_Positive",
|
||||
"Unit_Device___hmax_nan_Accuracy_Positive",
|
||||
"Unit_Device___hmin_nan_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2int_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2short_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_ru_Accuracy_Positive",
|
||||
"Unit_Device___half_as_short_Accuracy_Positive",
|
||||
"Unit_Device___half_as_ushort_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___int2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___short2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___short_as_half_Accuracy_Positive",
|
||||
"Unit_Device___ushort_as_half_Accuracy_Positive",
|
||||
"Unit_Device___float2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___float2half_Accuracy_Positive",
|
||||
"Unit_Device___half2float_Accuracy_Positive",
|
||||
"Unit_Device___frcp_rn_Accuracy_Positive",
|
||||
"Unit_Device___fsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___frsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___expf_Accuracy_Positive",
|
||||
"Unit_Device___exp10f_Accuracy_Positive",
|
||||
"Unit_Device___logf_Accuracy_Positive",
|
||||
"Unit_Device___log2f_Accuracy_Positive",
|
||||
"Unit_Device___log10f_Accuracy_Positive",
|
||||
"Unit_Device___sinf_Accuracy_Positive",
|
||||
"Unit_Device___sincosf_sin_Accuracy_Positive",
|
||||
"Unit_Device___cosf_Accuracy_Positive",
|
||||
"Unit_Device___sincosf_cos_Accuracy_Positive",
|
||||
"Unit_Device___fadd_rn_Accuracy_Positive",
|
||||
"Unit_Device___fsub_rn_Accuracy_Positive",
|
||||
"Unit_Device___fmul_rn_Accuracy_Positive",
|
||||
"Unit_Device___fdiv_rn_Accuracy_Positive",
|
||||
"Unit_Device___fdividef_Accuracy_Positive",
|
||||
"Unit_Device___fmaf_rn_Accuracy_Positive",
|
||||
"Unit_Device___drcp_rn_Accuracy_Positive",
|
||||
"Unit_Device___dsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___dadd_rn_Accuracy_Positive",
|
||||
"Unit_Device___dsub_rn_Accuracy_Positive",
|
||||
"Unit_Device___dmul_rn_Accuracy_Positive",
|
||||
"Unit_Device___ddiv_rn_Accuracy_Positive",
|
||||
"Unit_Device___fma_rn_Accuracy_Positive",
|
||||
"Unit_atomicAdd_Negative_Parameters_RTC",
|
||||
"Unit_atomicSub_Negative_Parameters_RTC",
|
||||
"Unit_atomicInc_Negative_Parameters_RTC",
|
||||
"Unit_atomicDec_Negative_Parameters_RTC",
|
||||
"Unit_atomicCAS_Negative_Parameters_RTC",
|
||||
"SWDEV-450909: Test failed in stress testing",
|
||||
"Unit_RTC_LinkDestroy_Default",
|
||||
"=== SWDEV-457316 Below tests are disabled temporarily to avoid combined PSDB ===",
|
||||
"Unit_hipGraphAddMemFreeNode_Negative_NotSupported",
|
||||
"=== SWDEV-454245, SWDEV-454247 : Below tests fail on 29/03/24 ===",
|
||||
"Unit_hipStreamBeginCaptureToGraph_IndepGraphsThreads",
|
||||
"Unit_hipStreamBeginCaptureToGraph_CaptureDepGraph",
|
||||
"=== SWDEV-486448 - Following tests disabled due to taking too much time to execute, ~700s per test",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Getters_Positive_Basic",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - double",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - double",
|
||||
"=== SWDEV-454316 : Below tests fail in stress test ===",
|
||||
"Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address - float",
|
||||
"Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address - double",
|
||||
"Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address - float",
|
||||
"Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address - float",
|
||||
"Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address - double",
|
||||
"=== SWDEV-475482 - Disable tests to merge clr change",
|
||||
"Unit_hipCreateTextureObject_LinearResource",
|
||||
"Unit_hipCreateTextureObject_Pitch2DResource",
|
||||
"========================================================================================",
|
||||
"=== SWDEV-468258 Below tests are temporarily disabled - windows PSDB failed",
|
||||
"Unit_hipHostAlloc_Basic",
|
||||
"Unit_hipHostAlloc_Default",
|
||||
"Unit_hipHostAlloc_Negative_NonCoherent",
|
||||
"Unit_hipHostAlloc_Negative_Coherent",
|
||||
"Unit_hipHostAlloc_Negative_NumaUser",
|
||||
"=== Following tests disabled due to SWDEV-486363",
|
||||
"Unit_hipStreamQuery_spt_WithFinishedWork",
|
||||
"Unit_hipStreamQuery_spt_NegativeCases",
|
||||
"Unit_hipStreamQuery_spt_WithPendingWork",
|
||||
"Unit_hipStreamSynchronize_spt_FinishWork",
|
||||
"Unit_hipStreamSynchronize_spt_SynchronizeStreamAndQueryNullStream",
|
||||
"====================================================",
|
||||
"Test Unit_hipGraphUserObj_ClonedGraph disabled due to SWDEV-483112",
|
||||
"Unit_hipGraphUserObj_ClonedGraph",
|
||||
"====================================================",
|
||||
"=== SWDEV-517063 Below tests are temporarily disabled due to PSDB failure",
|
||||
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchInLoop",
|
||||
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchFillKernel",
|
||||
"Unit_hipGraphInstantiateWithFlags_AutoFreeOnLaunchDoubleKernel",
|
||||
"Unit_hipGraphInstantiateWithFlags_WithDefaultAndAutoFreeOnLaunch",
|
||||
"=== SWDEV-457316 Below test is skipped due ref count logic (Discussed with German) ===",
|
||||
"Unit_hipGraphAddMemAllocNode_Negative_Free_Alloc_Memory_Again",
|
||||
#endif
|
||||
"=== Following tests disabled as it should be a local perf test",
|
||||
"Performance_hipExtLaunchKernelGGL_QueryGPUFrequency",
|
||||
"End of json"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,637 @@
|
||||
{
|
||||
"DisabledTests":
|
||||
[
|
||||
"Note: Windows disabled",
|
||||
"Unit_hipMalloc_CoherentTst",
|
||||
"Unit_hipTextureMipmapObj2D_Check",
|
||||
"Unit_hipGraphAddHostNode_ClonedGraphwithHostNode",
|
||||
"Unit_hipEventIpc",
|
||||
"Unit_hipMalloc3D_Negative",
|
||||
"Unit_hipMemPoolApi_BasicAlloc",
|
||||
"Unit_hipMemPoolApi_BasicTrim",
|
||||
"Unit_hipMemPoolApi_BasicReuse",
|
||||
"Unit_hipMemPoolApi_Opportunistic",
|
||||
"Unit_hipMalloc3D_ValidatePitch",
|
||||
"Unit_hipMemAllocPitch_ValidatePitch",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional",
|
||||
"Unit_hipMallocManaged_CoherentTstWthAdvise",
|
||||
"Unit_hipMallocManaged_Advanced",
|
||||
"Unit_hipMemRangeGetAttribute_NegativeTests",
|
||||
"Unit_hipMemRangeGetAttribute_AccessedBy1",
|
||||
"Unit_hipMemRangeGetAttribte_3",
|
||||
"Unit_hipMemRangeGetAttribute_4",
|
||||
"Unit_hipMemRangeGetAttribute_PrefetchAndGtAttr",
|
||||
"Unit_hipMemAdvise_TstFlags",
|
||||
"Unit_hipMemAdvise_PrefrdLoc",
|
||||
"Unit_hipMemAdvise_ReadMostly",
|
||||
"Unit_hipMemAdvise_TstFlgOverrideEffect",
|
||||
"Unit_hipMemAdvise_TstAccessedByFlg",
|
||||
"Unit_hipMemAdvise_TstAccessedByFlg4",
|
||||
"Unit_hipMemAdvise_TstMemAdvisePrefrdLoc",
|
||||
"Unit_hipMemAdvise_TstMemAdviseMultiFlag",
|
||||
"Unit_hipMemAdvise_ReadMosltyMgpuTst",
|
||||
"Unit_hipMemAdvise_TstSetUnsetPrfrdLoc",
|
||||
"Unit_hipMallocManaged_DeviceContextChange - unsigned char",
|
||||
"Unit_hipMallocManaged_DeviceContextChange - int",
|
||||
"Unit_hipMallocManaged_DeviceContextChange - float",
|
||||
"Unit_hipMallocManaged_DeviceContextChange - double",
|
||||
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
|
||||
"Unit_hipStreamPerThread_StrmWaitEvt",
|
||||
"Unit_hipMemGetInfo_DifferentMallocSmall",
|
||||
"Unit_hipMemGetInfo_MallocArray - int",
|
||||
"Unit_hipMemGetInfo_MallocArray - int4",
|
||||
"Unit_hipMemGetInfo_MallocArray - char",
|
||||
"Unit_hipMemGetInfo_Malloc3D",
|
||||
"Unit_hipMemGetInfo_Malloc3DArray - char",
|
||||
"Unit_hipMemGetInfo_Malloc3DArray - int",
|
||||
"Unit_hipMemGetInfo_Malloc3DArray - int4",
|
||||
"Unit_hipMemGetInfo_ParaSmall",
|
||||
"Unit_hipMemGetInfo_ParaMultiSmall",
|
||||
"Unit_hipMultiThreadDevice_NearZero",
|
||||
"Unit_hipStreamPerThread_DeviceReset_1",
|
||||
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
|
||||
"Unit_hipStreamPerThread_StrmWaitEvt",
|
||||
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional",
|
||||
"Unit_hipStreamWaitEvent_DifferentStreams",
|
||||
"Unit_hipStreamQuery_WithFinishedWork",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_Mask_Gte",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_1",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_2",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_Mask_And",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_NoMask_Eq",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_NoMask_Gte",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_NoMask_And",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_Mask_Gte_1",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_Mask_Gte_2",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_Mask_Eq_1",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_Mask_Eq_2",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_Mask_And",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Gte",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Eq",
|
||||
"Unit_hipGetDeviceFlags_Positive_Context",
|
||||
"Unit_hipIpcCloseMemHandle_Negative_Close_In_Originating_Process",
|
||||
"Unit_hipIpcOpenMemHandle_Negative_Open_In_Creating_Process",
|
||||
"Unit_hipDeviceGetPCIBusId_Negative_PartialFill",
|
||||
"Unit_hipDeviceGetSharedMemConfig_Positive_Basic",
|
||||
"Unit_hipDeviceGetSharedMemConfig_Positive_Threaded",
|
||||
"Unit_hipDeviceReset_Positive_Basic",
|
||||
"Unit_hipDeviceReset_Positive_Threaded",
|
||||
"Unit_hipInit_Negative",
|
||||
"Unit_hipGraphMemcpyNodeSetParams_Functional",
|
||||
"Unit_hipGraphNodeGetDependentNodes_Functional",
|
||||
"Unit_hipGraphNodeGetDependencies_Functional",
|
||||
"Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology",
|
||||
"Unit_hipGraphAddEventRecordNode_MultipleRun",
|
||||
"Unit_hipGraphAddEventRecordNode_Functional_ElapsedTime",
|
||||
"Unit_hipStreamBeginCapture_captureComplexGraph",
|
||||
"Note: needs to be enabled when streamPerThread issues are fixed",
|
||||
"Unit_hipStreamSynchronize_NullStreamAndStreamPerThread",
|
||||
"Note: intermittent Seg fault failure ",
|
||||
"Unit_hipGraphAddEventRecordNode_Functional_WithoutFlags",
|
||||
"Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph",
|
||||
"Unit_hipFuncSetCacheConfig_Positive_Basic",
|
||||
"Unit_hipFuncSetCacheConfig_Negative_Parameters",
|
||||
"Unit_hipFuncSetSharedMemConfig_Positive_Basic",
|
||||
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout",
|
||||
"Unit_hipEventCreateWithFlags_DisableSystemFence_HstVisMem",
|
||||
"Unit_hipEventCreateWithFlags_DefaultFlg_HstVisMem",
|
||||
"Unit_hipEventCreateWithFlags_DisableSystemFence_NonCohHstMem",
|
||||
"Unit_hipEventCreateWithFlags_DefaultFlg_NonCohHstMem",
|
||||
"Unit_hipEventCreateWithFlags_DisableSystemFence_CohHstMem",
|
||||
"Unit_hipEventCreateWithFlags_DefaultFlg_CohHstMem",
|
||||
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
|
||||
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Positive_Basic",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Positive_Basic",
|
||||
"Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Positive_Basic",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Positive_Basic",
|
||||
"Unit_hipKernelNameRef_Negative_Parameters",
|
||||
"Unit_hipKernelNameRef_Positive_Basic",
|
||||
"Unit_hipMemAdvise_AccessedBy_All_Devices",
|
||||
"Unit_hipMemAdvise_No_Flag_Interference",
|
||||
"Unit_hipGraphAddEventRecordNode_Functional_WithoutFlags",
|
||||
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep",
|
||||
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ClonedGrph",
|
||||
"Unit_hipGraphDestroyNode_Complx_ChkNumOfNodesNDep_ChldNode",
|
||||
"Unit_hipMemGetAddressRange_Negative",
|
||||
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Array",
|
||||
"Unit_hipDrvMemcpy3DAsync_Positive_Array",
|
||||
"Unit_hipMemGetAddressRange_Positive",
|
||||
"Note: devicelib hangs and failures",
|
||||
"Unit_deviceAllocation_Malloc_PerThread_PrimitiveDataType",
|
||||
"Unit_deviceAllocation_New_PerThread_PrimitiveDataType",
|
||||
"Unit_deviceAllocation_Malloc_PerThread_StructDataType",
|
||||
"Unit_deviceAllocation_New_PerThread_StructDataType",
|
||||
"Unit_deviceAllocation_Malloc_AcrossKernels",
|
||||
"Unit_deviceAllocation_New_AcrossKernels",
|
||||
"Unit_deviceAllocation_Malloc_SingleCodeObj",
|
||||
"Unit_deviceAllocation_New_SingleCodeObj",
|
||||
"Unit_deviceAllocation_Malloc_PerThread_Graph",
|
||||
"Unit_deviceAllocation_New_PerThread_Graph",
|
||||
"Unit_deviceAllocation_Malloc_DeviceFunc",
|
||||
"Unit_deviceAllocation_VirtualFunction",
|
||||
"Unit_deviceAllocation_Malloc_MulKernels_MulThreads",
|
||||
"Unit_deviceAllocation_New_MulKernels_MulThreads",
|
||||
"Unit_deviceAllocation_Malloc_MulCodeObj",
|
||||
"Unit_deviceAllocation_New_MulCodeObj",
|
||||
"Unit_deviceAllocation_New_DeviceFunc",
|
||||
"Unit_hipGraphAddEventRecordNode_MultipleRun",
|
||||
"Unit_hipDeviceGetPCIBusId_Negative_PartialFill",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Nor",
|
||||
"Unit_hipStreamQuery_WithFinishedWork",
|
||||
"Unit_hipLaunchHostFunc_Graph",
|
||||
"Unit_hipLaunchHostFunc_KernelHost",
|
||||
"Unit_ChannelDescriptor_Positive_16BitFloatingPoint",
|
||||
"Unit_hipStreamSetCaptureDependencies_Positive_Functional",
|
||||
"Note: Following four tests disabled due to defect - EXSWHTEC-203",
|
||||
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint16_t",
|
||||
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint32_t",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-207",
|
||||
"Unit_hipGraphExecMemsetNodeSetParams_Negative_Updating_Non1D_Node",
|
||||
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint8_t",
|
||||
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint16_t",
|
||||
"Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic - uint32_t",
|
||||
"Unit_hipGraphMemsetNodeSetParams_Positive_Basic - uint8_t",
|
||||
"Unit_hipDeviceGetUuid_Positive",
|
||||
"Disabling test tracked SWDEV-394199",
|
||||
"Unit_hipStreamCreateWithPriority_MulthreadNonblockingflag",
|
||||
"SWDEV-396617 ExecMemcpyNodeSetParamsFromSymbol fails in direction",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative_Parameters",
|
||||
"SWDEV-396616 hipMemMap returns invalid error",
|
||||
"Unit_hipMemVmm_Basic",
|
||||
"SWDEV-396615 mGPUs not considered correctly",
|
||||
"Unit_hipManagedKeyword_MultiGpu",
|
||||
"Disabling test tracked SWDEV-391555",
|
||||
"Unit_hipMemcpyPeer_Positive_ZeroSize",
|
||||
"Unit_hipMemcpyPeerAsync_Positive_ZeroSize",
|
||||
"SWDEV-400049 tdr intermittently",
|
||||
"Unit_hipMemsetDSync – init16_t",
|
||||
"Unit_hipStreamAddCallback_StrmSyncTiming",
|
||||
"SWDEV-402082 - PAL Backend fails to reserve address on GPU except first one",
|
||||
"Unit_hipGraphInstantiateWithFlags_FlagAutoFreeOnLaunch_check",
|
||||
"SWDEV-398981 fails in stress test",
|
||||
"Unit_hipStreamCreateWithPriority_MulthreadDefaultflag",
|
||||
"Disabling below tests temporarily due to change in API behavior",
|
||||
"Unit_hipMemPrefetchAsync_NonPageSz",
|
||||
"Unit_hipStreamCreateWithFlags_DefaultStreamInteraction",
|
||||
"SWDEV-402054 fails in external github build",
|
||||
"Unit_hipEventDestroy_WithWaitingStream",
|
||||
"Note: UUID returned empty on some windows nodes",
|
||||
"Unit_hipDeviceGetUuid_Positive",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/96 ===",
|
||||
"Unit_hipHostGetDevicePointer_Negative",
|
||||
"Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/18 ===",
|
||||
"Unit_hipMemcpyAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyDtoHAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyHtoDAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyDtoDAsync_Negative_Parameters",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/92 ===",
|
||||
"Unit_hipGetChannelDesc_Negative_Parameters",
|
||||
"Unit_hipTextureMipmapRef2D_Positive_Check",
|
||||
"Unit_hipTextureMipmapRef2D_Negative_Parameters",
|
||||
"=== SWDEV-430116:Below tests failed in stress test on 27/10/23 ===",
|
||||
"Unit_hipFreeAsync_negative",
|
||||
"Unit_hipLaunchHostFunc_multistreams",
|
||||
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Negative",
|
||||
"=== SWDEV-431191:Below tests failed in stress test on 03/11/23 ===",
|
||||
"Unit_hipHostMalloc_AllocateMoreThanAvailGPUMemory",
|
||||
"Unit_hipHostMalloc_AllocateUseMoreThanAvailGPUMemory",
|
||||
"Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - char1",
|
||||
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - char2",
|
||||
"Unit_Layered1DTexture_Check_HostBufferToFromLayered1DArray - uint",
|
||||
"Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - uchar4",
|
||||
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - unsigned char",
|
||||
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - float",
|
||||
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - uint2",
|
||||
"=== SWDEV-432250:Below tests failed in stress test on 10/11/23 ===",
|
||||
"Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - ushort1",
|
||||
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - int2",
|
||||
"Unit_hipVectorTypes_test_on_device",
|
||||
"Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - ushort4",
|
||||
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - float4",
|
||||
"=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===",
|
||||
"Unit_hiprtc_stdheaders",
|
||||
"Unit_hipMemAddressFree_negative",
|
||||
"Unit_hipMemAddressReserve_AlignmentTest",
|
||||
"Unit_hipMemAddressReserve_Negative",
|
||||
"Unit_hipMemCreate_BasicAllocateDeAlloc_MultGranularity",
|
||||
"Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPostUnmap",
|
||||
"Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPreUse",
|
||||
"Unit_hipMemCreate_ChkWithKerLaunch",
|
||||
"Unit_hipMemCreate_MapNonContiguousChunks",
|
||||
"Unit_hipMemCreate_ChkWithMemset",
|
||||
"Unit_hipMemCreate_Negative",
|
||||
"Unit_hipMemGetAllocationGranularity_MinGranularity",
|
||||
"Unit_hipMemGetAllocationGranularity_RecommendedGranularity",
|
||||
"Unit_hipMemGetAllocationGranularity_AllGPUs",
|
||||
"Unit_hipMemGetAllocationGranularity_NegativeTests",
|
||||
"Unit_hipMemGetAllocationPropertiesFromHandle_functional",
|
||||
"Unit_hipMemGetAllocationPropertiesFromHandle_Negative",
|
||||
"Unit_hipMemMap_SameMemoryReuse",
|
||||
"Unit_hipMemMap_PhysicalMemoryReuse_SingleGPU",
|
||||
"Unit_hipMemMap_PhysicalMemory_Map2MultVMMs",
|
||||
"Unit_hipMemMap_PhysicalMemoryReuse_MultiDev",
|
||||
"Unit_hipMemMap_VMMMemoryReuse_SingleGPU",
|
||||
"Unit_hipMemMap_VMMMemoryReuse_MultiGPU",
|
||||
"Unit_hipMemMap_MapPartialPhysicalMem",
|
||||
"Unit_hipMemMap_MapPartialVMMMem",
|
||||
"Unit_hipMemMap_negative",
|
||||
"Unit_hipMemRelease_negative",
|
||||
"Unit_hipMemRetainAllocationHandle_SetGet",
|
||||
"Unit_hipMemRetainAllocationHandle_NegTst",
|
||||
"Unit_hipMemSetAccess_SetGet",
|
||||
"Unit_hipMemSetAccess_MultDevSetGet",
|
||||
"Unit_hipMemSetAccess_EntireVMMRangeSetGet",
|
||||
"Unit_hipMemGetAccess_NegTst",
|
||||
"Unit_hipMemSetAccess_FuncTstOnMultDev",
|
||||
"Unit_hipMemSetAccess_ChangeAccessProp",
|
||||
"Unit_hipMemSetAccess_Vmm2UnifiedMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2DevMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2PeerDevMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2PeerPeerMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2VMMMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy",
|
||||
"Unit_hipMemSetAccess_GrowVMM",
|
||||
"Unit_hipMemSetAccess_Multithreaded",
|
||||
"Unit_hipMemSetAccess_MultiProc",
|
||||
"Unit_hipMemSetAccess_negative",
|
||||
"Unit_hipMemUnmap_negative",
|
||||
"=== SWDEV-434171: Below tests took long time to complete in stress test on 17/11/23 ===",
|
||||
"Unit_Warp_Shfl_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned int",
|
||||
"Unit_Warp_Shfl_Positive_Basic - long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - long long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned long long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - float",
|
||||
"Unit_Warp_Shfl_Positive_Basic - double",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned int",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - long long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - float",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - double",
|
||||
"=== SWDEV-434878: Below tests failed in stress test on 24/11/23 ===",
|
||||
"Unit_hipGraphUpload_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Positive_RangeValidation",
|
||||
"=== SWDEV-435667: Below tests failing randomly in stress test on 01/12/23 ===",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - int",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned int",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned long",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned long long",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - float",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - double",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - int",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - unsigned int",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - unsigned long",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - unsigned long long",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - float",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - double",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - int",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - float",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - double",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - float",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - double",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - float",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - double",
|
||||
"SWDEV-438524: Below tests taking long time to run in stress test on 15/12/23 ===",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Shfl_Up_Positive_Basic - double",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Shfl_Down_Positive_Basic - double",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Shfl_Positive_Basic - double",
|
||||
"SWDEV-438524: Below tests causing TDR & machine down in stress test on 15/12/23 ===",
|
||||
"Unit_hipExtModuleLaunchKernel_Functional",
|
||||
"Unit_hipExtLaunchKernelGGL_Functional",
|
||||
"SWDEV-438524:Below tests failed in stress test on 15/12/23 ===",
|
||||
"Unit_Device_memcpy_Negative",
|
||||
"Unit_Device_memset_Negative",
|
||||
"Unit_Device_Complex_make_Negative",
|
||||
"Unit_Device_Complex_Cast_Negative",
|
||||
"Unit_Device_Complex_Unary_float_Negative",
|
||||
"Unit_Device_Complex_Unary_double_Negative",
|
||||
"Unit_Device_Complex_Binary_float_Negative",
|
||||
"Unit_Device_Complex_Binary_double_Negative",
|
||||
"Unit_Device_Complex_hipCfma_Negative",
|
||||
"Unit_Device__hip_hc_8pk_Negative",
|
||||
"Note: Linux disabled",
|
||||
"Unit_hipStreamPerThread_DeviceReset_1",
|
||||
"Unit_hipDeviceGetSharedMemConfig_Positive_Basic",
|
||||
"Unit_hipDeviceGetSharedMemConfig_Positive_Threaded",
|
||||
"Unit_hipGetDeviceFlags_Positive_Context",
|
||||
"Unit_hipIpcCloseMemHandle_Negative_Close_In_Originating_Process",
|
||||
"Unit_hipIpcOpenMemHandle_Negative_Open_In_Creating_Process",
|
||||
"Unit_hipInit_Negative",
|
||||
"Unit_hipDeviceReset_Positive_Basic",
|
||||
"Unit_hipDeviceReset_Positive_Threaded",
|
||||
"Unit_hipFuncSetCacheConfig_Positive_Basic",
|
||||
"Unit_hipFuncSetCacheConfig_Negative_Parameters",
|
||||
"Unit_hipFuncSetSharedMemConfig_Positive_Basic",
|
||||
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout",
|
||||
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
|
||||
"Unit_hipGraphMemcpyNodeSetParamsToSymbol_Positive_Basic",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Positive_Basic",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Positive_Basic",
|
||||
"Unit_hipKernelNameRef_Negative_Parameters",
|
||||
"Unit_hipMemAdvise_AccessedBy_All_Devices",
|
||||
"Unit_hipMemAdvise_No_Flag_Interference",
|
||||
"Unit_hipMemGetAddressRange_Negative",
|
||||
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Array",
|
||||
"Unit_hipDrvMemcpy3DAsync_Positive_Array",
|
||||
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Basic",
|
||||
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Partial_Range",
|
||||
"Unit_hipMemGetAddressRange_Positive",
|
||||
"Unit_hipGraphAddMemcpyNode1D_Negative_Basic",
|
||||
"Unit_ChannelDescriptor_Positive_16BitFloatingPoint",
|
||||
"intermittent issue: failure expected but success returned",
|
||||
"Unit_hipMemAdvise_NegtveTsts",
|
||||
"Note: Following four tests disabled due to defect - EXSWHTEC-203",
|
||||
"Unit_hipStreamSetCaptureDependencies_Positive_Functional",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-207",
|
||||
"Unit_hipIpcGetMemHandle_Positive_Unique_Handles_Separate_Allocations",
|
||||
"Unit_hipStreamCreateWithFlags_DefaultStreamInteraction",
|
||||
"Unit_hipMemset3DSync",
|
||||
"Unit_hipStreamAddCallback_StrmSyncTiming",
|
||||
"Disabling test tracked SWDEV-394199",
|
||||
"Unit_hipStreamCreateWithPriority_MulthreadNonblockingflag",
|
||||
"Disabling test tracked SWDEV-395683",
|
||||
"Unit_hipStreamPerThread_MultiThread",
|
||||
"SWDEV-396963",
|
||||
"Unit_hipMemcpy2DFromArrayAsync_Positive_Synchronization_Behavior",
|
||||
"Disabling tests tracked with SWDEV-389647..",
|
||||
"Unit_hipMemcpy2DToArrayAsync_Positive_Synchronization_Behavior",
|
||||
"Disabling test tracked SWDEV-391555",
|
||||
"Unit_hipMemcpyPeer_Positive_ZeroSize",
|
||||
"Unit_hipMemcpyPeerAsync_Positive_ZeroSize",
|
||||
"Fails in Stress test SWDEV-398971",
|
||||
"SWDEV-398977 fails in stress tests",
|
||||
"Unit_hipMemset2DSync",
|
||||
"SWDEV-398981 fails in stress test",
|
||||
"Unit_hipStreamCreateWithPriority_MulthreadDefaultflag",
|
||||
"SWDEV-402054 fails in external github build",
|
||||
"Unit_hipEventDestroy_WithWaitingStream",
|
||||
"=== Below tests fail in stress test on 23/06/23 ===",
|
||||
"Unit_hipIpcMemAccess_ParameterValidation",
|
||||
"Unit_hipMemcpy2DFromArrayAsync_Positive_Synchronization_Behavior",
|
||||
"Unit_hipGraphClone_Test_hipGraphExecMemcpyNodeSetParams",
|
||||
"Unit_hipGraphClone_Test_hipGraphMemcpyNodeSetParams1D_and_exec",
|
||||
"=== Below tests fail in stress test on 30/06/23 ===",
|
||||
"Unit_hipStreamValue_Write - TestParams<uint32_t, PtrType::HostPtr>",
|
||||
"Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/96 ===",
|
||||
"Unit_hipHostGetDevicePointer_Negative",
|
||||
"Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/18 ===",
|
||||
"Unit_hipMemcpyAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyDtoHAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyHtoDAsync_Negative_Parameters",
|
||||
"Unit_hipMemcpyDtoDAsync_Negative_Parameters",
|
||||
"Unit_hipStreamValue_Wait32_Blocking_Mask_Eq_1",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/327 ===",
|
||||
"Unit_hiprtcGpuRdcComplrOptnTst",
|
||||
"Unit_hiprtcDisabledSlpVectorizeComplrOptnTst",
|
||||
"Unit_hiprtcRpassInlineComplrOptnTst",
|
||||
"Unit_hiprtcCombiComplrOptnTst",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/92 ===",
|
||||
"Unit_hipGetChannelDesc_Negative_Parameters",
|
||||
"Unit_hipGraphAddChildGraphNode_CmplxNstGrph_UpdKerFun_Clone",
|
||||
"=== Below tests fail in stress test on 24/07/23 ===",
|
||||
"Unit_hipStreamCreateWithPriority_ValidateWithEvents",
|
||||
"Unit_hipEventIpc",
|
||||
"=== SWDEV-427101:Below test fails randomly in PSDB ===",
|
||||
"Unit_deviceAllocation_InOneThread_AccessInAllThreads",
|
||||
"=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===",
|
||||
"Unit_hiprtc_stdheaders",
|
||||
"Unit_hipMemAddressFree_negative",
|
||||
"Unit_hipMemAddressReserve_AlignmentTest",
|
||||
"Unit_hipMemAddressReserve_Negative",
|
||||
"Unit_hipMemCreate_BasicAllocateDeAlloc_MultGranularity",
|
||||
"Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPostUnmap",
|
||||
"Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPreUse",
|
||||
"Unit_hipMemCreate_ChkWithKerLaunch",
|
||||
"Unit_hipMemCreate_MapNonContiguousChunks",
|
||||
"Unit_hipMemCreate_ChkWithMemset",
|
||||
"Unit_hipMemCreate_Negative",
|
||||
"Unit_hipMemGetAllocationGranularity_MinGranularity",
|
||||
"Unit_hipMemGetAllocationGranularity_RecommendedGranularity",
|
||||
"Unit_hipMemGetAllocationGranularity_AllGPUs",
|
||||
"Unit_hipMemGetAllocationGranularity_NegativeTests",
|
||||
"Unit_hipMemGetAllocationPropertiesFromHandle_functional",
|
||||
"Unit_hipMemGetAllocationPropertiesFromHandle_Negative",
|
||||
"Unit_hipMemMap_SameMemoryReuse",
|
||||
"Unit_hipMemMap_PhysicalMemoryReuse_SingleGPU",
|
||||
"Unit_hipMemMap_PhysicalMemory_Map2MultVMMs",
|
||||
"Unit_hipMemMap_PhysicalMemoryReuse_MultiDev",
|
||||
"Unit_hipMemMap_VMMMemoryReuse_SingleGPU",
|
||||
"Unit_hipMemMap_VMMMemoryReuse_MultiGPU",
|
||||
"Unit_hipMemMap_MapPartialPhysicalMem",
|
||||
"Unit_hipMemMap_MapPartialVMMMem",
|
||||
"Unit_hipMemMap_negative",
|
||||
"Unit_hipMemRelease_negative",
|
||||
"Unit_hipMemRetainAllocationHandle_SetGet",
|
||||
"Unit_hipMemRetainAllocationHandle_NegTst",
|
||||
"Unit_hipMemSetAccess_SetGet",
|
||||
"Unit_hipMemSetAccess_MultDevSetGet",
|
||||
"Unit_hipMemSetAccess_EntireVMMRangeSetGet",
|
||||
"Unit_hipMemGetAccess_NegTst",
|
||||
"Unit_hipMemSetAccess_FuncTstOnMultDev",
|
||||
"Unit_hipMemSetAccess_ChangeAccessProp",
|
||||
"Unit_hipMemSetAccess_Vmm2UnifiedMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2DevMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2PeerDevMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2PeerPeerMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2VMMMemCpy",
|
||||
"Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy",
|
||||
"Unit_hipMemSetAccess_GrowVMM",
|
||||
"Unit_hipMemSetAccess_Multithreaded",
|
||||
"Unit_hipMemSetAccess_MultiProc",
|
||||
"Unit_hipMemSetAccess_negative",
|
||||
"Unit_hipMemUnmap_negative",
|
||||
"=== SWDEV-434171: Below tests took long time to complete in stress test on 17/11/23 ===",
|
||||
"Unit_Warp_Shfl_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned int",
|
||||
"Unit_Warp_Shfl_Positive_Basic - long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - long long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned long long",
|
||||
"Unit_Warp_Shfl_Positive_Basic - float",
|
||||
"Unit_Warp_Shfl_Positive_Basic - double",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned int",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - long long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - unsigned long long",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - float",
|
||||
"Unit_Warp_Shfl_XOR_Positive_Basic - double",
|
||||
"=== SWDEV-434878: Below tests failed in stress test on 24/11/23 ===",
|
||||
"Unit_hipGraphUpload_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSize_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_Positive_RangeValidation",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Positive_RangeValidation",
|
||||
"=== SWDEV-435667: Below tests failing randomly in stress test on 01/12/23 ===",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - int",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned int",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned long",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned long long",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - float",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - double",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - int",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - unsigned int",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - unsigned long",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - unsigned long long",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - float",
|
||||
"Unit_atomicExch_Positive_Multi_Kernel - double",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - int",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - float",
|
||||
"Unit_atomicExch_system_Positive_Peer_GPUs - double",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - float",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - double",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - float",
|
||||
"Unit_atomicExch_system_Positive_Host_And_Peer_GPUs - double",
|
||||
"=== SWDEV-439004: Below tests failing randomly in CQE staging ===",
|
||||
"Unit_hipLaunchCooperativeKernel_Streams",
|
||||
"Unit_hipGLGetDevices_Positive_Basic",
|
||||
"Unit_hipGLGetDevices_Positive_Parameters",
|
||||
"Unit_hipGLGetDevices_Negative_Parameters",
|
||||
"Unit_hipGraphicsGLRegisterBuffer_Positive_Basic",
|
||||
"Unit_hipGraphicsGLRegisterBuffer_Positive_Register_Twice",
|
||||
"Unit_hipGraphicsGLRegisterBuffer_Negative_Parameters",
|
||||
"Unit_hipGraphicsGLRegisterImage_Positive_Basic",
|
||||
"Unit_hipGraphicsGLRegisterImage_Positive_Register_Twice",
|
||||
"Unit_hipGraphicsGLRegisterImage_Negative_Parameters",
|
||||
"Unit_hipGraphicsMapResources_Positive_Basic",
|
||||
"Unit_hipGraphicsMapResources_Negative_Parameters",
|
||||
"Unit_hipGraphicsSubResourceGetMappedArray_Positive_Basic",
|
||||
"Unit_hipGraphicsSubResourceGetMappedArray_Negative_Parameters",
|
||||
"Unit_hipGraphicsResourceGetMappedPointer_Positive_Basic",
|
||||
"Unit_hipGraphicsResourceGetMappedPointer_Positive_Parameters",
|
||||
"Unit_hipGraphicsResourceGetMappedPointer_Negative_Parameters",
|
||||
"Unit_hipGraphicsUnmapResources_Negative_Parameters",
|
||||
"Unit_hipGraphicsUnregisterResource_Negative_Parameters",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParams1D_Negative",
|
||||
"Note: gfx1100, gfx1101, gfx1102",
|
||||
"=== Below tests soft hang in stress test on 13/09/23 ===",
|
||||
"Unit_hipMemsetFunctional_ZeroValue_hipMemsetD16",
|
||||
"Unit_hipIpcMemAccess_Semaphores",
|
||||
"Unit_hipStreamAttachMemAsync_Negative_Parameters",
|
||||
"hipStreamPerThread_CoopLaunch",
|
||||
"hipCGMultiGridGroupType",
|
||||
"Grid_Group_Getters_Positive_Basic",
|
||||
"Grid_Group_Getters_Via_Non_Member_Functions_Positive_Basic",
|
||||
"Grid_Group_Sync_Positive_Basic",
|
||||
"dynamic_loading_device_kernels_from_library",
|
||||
"Note: Image extension disabled",
|
||||
"Unit_hipMemset2DSync",
|
||||
"Unit_hipMemset3DSync",
|
||||
"Note: CONFIG_NUMA disabled",
|
||||
"Unit_hipHostMalloc_WthEnv0Flg3",
|
||||
"Unit_hipHostGetFlags_flagCombos",
|
||||
"Unit_hipHostGetFlags_DifferentThreads",
|
||||
"Unit_hipHostMalloc_WthEnv1Flg3",
|
||||
"Note: no valid pci bdf in wsl",
|
||||
"Unit_hipDeviceGetPCIBusId_CheckPciBusIDWithLspci",
|
||||
"Note: TDR",
|
||||
"Unit_deviceAllocation_InOneThread_AccessInAllThreads",
|
||||
"Unit_deviceAllocation_Malloc_UnionType",
|
||||
"Unit_deviceAllocation_New_ComplexDataType",
|
||||
"Unit_deviceAllocation_New_UnionType",
|
||||
"Unit_hipFreeImplicitSyncDev - char",
|
||||
"Unit_hipFreeImplicitSyncDev - float",
|
||||
"Unit_hipFreeImplicitSyncDev - float2",
|
||||
"Unit_hipFreeImplicitSyncDev - float4",
|
||||
"Unit_hipFreeImplicitSyncHost - char",
|
||||
"Unit_hipFreeImplicitSyncHost - float",
|
||||
"Unit_hipFreeImplicitSyncHost - float2",
|
||||
"Unit_hipFreeImplicitSyncHost - float4",
|
||||
"Unit_hipStreamDestroy_WithPendingWork",
|
||||
"Unit_printf_specifier",
|
||||
"Unit_tiled_partition",
|
||||
"Note: TDR (pass)",
|
||||
"Unit_hipStreamSynchronize_FinishWork",
|
||||
"Unit_hipStreamSynchronize_NullStreamSynchronization",
|
||||
"Unit_hipStreamQuery_NullStreamQuery",
|
||||
"Unit_hipStreamQuery_SubmitWorkOnStreamAndQueryNullStream",
|
||||
"Unit_hipStreamQuery_WithPendingWork",
|
||||
"SWDEV-411303 fails in WSL. Profiling not support in WSL",
|
||||
"Unit_hipEvent",
|
||||
"Unit_hipEventDestroy_Unfinished",
|
||||
"Unit_hipEventElapsedTime_NotReady_Negative",
|
||||
"Note: TDR (random fail)",
|
||||
"Unit_hipEventDestroy_WithWaitingStream",
|
||||
"Unit_hipMemsetSync",
|
||||
"Unit_hipMemsetDSync - int8_t",
|
||||
"Unit_hipMemsetDSync - int16_t",
|
||||
"Unit_hipMemsetDSync - uint32_t",
|
||||
"Unit_hipStreamValue_Wait32_NonBlacking_Mask_Gte",
|
||||
"Note: hsa_amd_ipc_ is dummy",
|
||||
"Unit_hipIpcOpenMemHandle_Negative_Open_In_Two_Contexts_Same_Device",
|
||||
"Unit_hipIpcGetMemHandle_Positive_Unique_Handles_Reused_Memory",
|
||||
"Unit_hipIpcCloseMemHandle_Positive_Reference_Counting",
|
||||
"Unit_hipIpcMemAccess_Semaphores",
|
||||
"Unit_hipIpcMemAccess_ParameterValidation",
|
||||
"Note: test dropped in latest gerritgit",
|
||||
"Unit_hipStreamCreate_WithPriorityPerformance_Default_high",
|
||||
"Unit_hipStreamCreate_WithPriorityPerformance_Nonblocking_high",
|
||||
"Unit_hipStreamCreate_WithPriorityPerformance_Default_low",
|
||||
"=== Following tests disabled as it should be a local perf test",
|
||||
"Performance_hipExtLaunchKernelGGL_QueryGPUFrequency"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,239 @@
|
||||
{
|
||||
"DisabledTests": [
|
||||
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/85 ===",
|
||||
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/215 ===",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_1D - long",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_1D - unsigned long",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_1D - ulong1",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_1D - signed long",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_1D - long1",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_2D - ulong2",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_2D - long2",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_3D - ulong3",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_3D - long3",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_4D - ulong4",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_4D - long4",
|
||||
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/38 ===",
|
||||
"Unit_hipFreeAsync_Negative_Parameters",
|
||||
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/92 ===",
|
||||
"Unit_hipGetTexObjectResourceDesc_positive",
|
||||
"Unit_hipGetTexObjectResourceDesc_Negative_Parameters",
|
||||
"Unit_hipGetTexObjectTextureDesc_positive",
|
||||
"Unit_hipGetTexObjectTextureDesc_Negative_Parameters",
|
||||
"Unit_hipTexObjectDestroy_positive",
|
||||
"=== Below tests tests fail randomly in PSDB ===",
|
||||
"Unit_hipGraphInstantiateWithFlags_DependencyGraphDeviceCtxtChg",
|
||||
"Unit_hipGraphUpload_Functional_multidevice_test",
|
||||
"Unit_hipMemcpyParam2D_multiDevice-D2D - char",
|
||||
"Unit_hipMemcpyParam2D_multiDevice-D2D - float",
|
||||
"Unit_hipMemcpyParam2D_multiDevice-D2D - int",
|
||||
"Unit_hipMemcpyParam2D_multiDevice-D2D - double",
|
||||
"Unit_hipMemcpyParam2D_multiDevice-D2D - long double",
|
||||
"Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice - char",
|
||||
"Unit_hipMemsetFunctional_ZeroValue_hipMemsetD16",
|
||||
"Unit_hipStreamAttachMemAsync_Negative_Parameters",
|
||||
"hipStreamPerThread_CoopLaunch",
|
||||
"hipCGMultiGridGroupType",
|
||||
"Grid_Group_Getters_Positive_Basic",
|
||||
"Grid_Group_Getters_Via_Non_Member_Functions_Positive_Basic",
|
||||
"Grid_Group_Sync_Positive_Basic",
|
||||
"dynamic_loading_device_kernels_from_library",
|
||||
"Unit_tiled_partition",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - int",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned int",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - unsigned long long",
|
||||
"Unit_atomicExch_Positive_Same_Address_Compile_Time - float",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - float",
|
||||
"Unit_hipModuleUnload_Negative_Double_Unload",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
|
||||
"Unit_Device_Complex_Unary_Negative_Parameters_RTC",
|
||||
"Unit_Device_Complex_Binary_Negative_Parameters_RTC",
|
||||
"Unit_Device_Complex_hipCfma_Negative_Parameters_RTC",
|
||||
"Unit_Device_make_Complex_Negative_Parameters_RTC",
|
||||
"Unit_Device_Complex_Cast_Negative_Parameters_RTC",
|
||||
"=== Below 2 tests are disabled due to defect EXSWHTEC-342 ===",
|
||||
"Unit_hipDeviceSetLimit_Negative_Parameters",
|
||||
"Unit_hipDeviceGetLimit_Negative_Parameters",
|
||||
"=== Below tests are failing PSDB ===",
|
||||
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_3",
|
||||
"Unit_hipMemPoolSetAccess_Negative_Parameters",
|
||||
"Unit_hipMallocMipmappedArray_Negative_NumLevels",
|
||||
"Unit_hipFreeMipmappedArray_Negative_Nullptr",
|
||||
"Unit_hipFreeMipmappedArrayMultiTArray - int",
|
||||
"Unit_hipFreeMipmappedArray_Negative_Parameters",
|
||||
"Unit_hipCreateSurfaceObject_Negative_Parameters",
|
||||
"Unit_hipDestroySurfaceObject_Negative_Parameters",
|
||||
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
|
||||
"Unit_hipModuleLoad_Positive_Basic",
|
||||
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
|
||||
"Unit_hipModuleLoadData_Positive_Basic",
|
||||
"Unit_hipModuleLoadData_Negative_Parameters",
|
||||
"Unit_hipModuleLoadDataEx_Positive_Basic",
|
||||
"Unit_hipModuleLoadDataEx_Negative_Parameters",
|
||||
"Performance_hipMemsetD16",
|
||||
"Performance_hipMemsetD16Async",
|
||||
"Performance_hipMemsetD32",
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy_Positive_Synchronization_Behavior",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_hipLaunchKernel_Negative_Parameters",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
|
||||
"Unit_hipMemImportFromShareableHandle_Positive_MultiProc",
|
||||
"Unit_hipMemMapArrayAsync_Positive_Basic",
|
||||
"=== Disabling failing nvidia tests ===",
|
||||
"Unit_hipDeviceSetLimit_Negative_PrintfFifoSize",
|
||||
"Unit_hipDeviceSetLimit_Negative_MallocHeapSize",
|
||||
"=== Disabling tests which no longer behave the same on nvidia platform ===",
|
||||
"Unit_hipGraphInstantiateWithParams_Negative",
|
||||
"Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph",
|
||||
"Unit_hipDeviceSynchronize_Positive_Nullstream",
|
||||
"Unit_hipDeviceSynchronize_Functional",
|
||||
"Unit_hipDeviceReset_Positive_Basic",
|
||||
"Unit_hipDeviceReset_Positive_Threaded",
|
||||
"Unit_hipModuleGetTexRef_Positive_Basic"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
{
|
||||
"DisabledTests": [
|
||||
"=== Below test fails in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/215 ===",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_1D - long",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_1D - unsigned long",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_1D - ulong1",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_1D - signed long",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_1D - long1",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_2D - ulong2",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_2D - long2",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_3D - ulong3",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_3D - long3",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_4D - ulong4",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_4D - long4",
|
||||
"Unit_hipModuleUnload_Negative_Double_Unload",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
|
||||
"Unit_Device_Complex_Unary_Negative_Parameters_RTC",
|
||||
"Unit_Device_Complex_Binary_Negative_Parameters_RTC",
|
||||
"Unit_Device_Complex_hipCfma_Negative_Parameters_RTC",
|
||||
"Unit_Device_make_Complex_Negative_Parameters_RTC",
|
||||
"Unit_Device_Complex_Cast_Negative_Parameters_RTC",
|
||||
"=== Below 2 tests are disabled due to defect EXSWHTEC-342 ===",
|
||||
"Unit_hipDeviceSetLimit_Negative_Parameters",
|
||||
"Unit_hipDeviceGetLimit_Negative_Parameters",
|
||||
"=== Below tests tests fail in PSDB ===",
|
||||
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
|
||||
"Unit_hipModuleLoad_Positive_Basic",
|
||||
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
|
||||
"Unit_hipModuleLoadData_Positive_Basic",
|
||||
"Unit_hipModuleLoadData_Negative_Parameters",
|
||||
"Unit_hipModuleLoadDataEx_Positive_Basic",
|
||||
"Unit_hipModuleLoadDataEx_Negative_Parameters",
|
||||
"Performance_hipMemsetD16",
|
||||
"Performance_hipMemsetD16Async",
|
||||
"Performance_hipMemsetD32",
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemMapArrayAsync_Positive_Basic",
|
||||
"=== SWDEV-475987 : Disable tests to merge hipother change 12/08/2024 ===",
|
||||
"Unit_hipMalloc3DArray_Negative_InvalidFormat",
|
||||
"Unit_hipMalloc3DArray_Negative_BadChannelLayout",
|
||||
"Unit_hipMalloc3DArray_Negative_8BitFloat",
|
||||
"Unit_hipMalloc3DArray_Negative_DifferentChannelSizes",
|
||||
"Unit_hipMalloc3DArray_Negative_BadChannelSize",
|
||||
"Unit_hipMallocMipmappedArray_Negative_InvalidFormat",
|
||||
"Unit_hipMallocMipmappedArray_Negative_BadChannelLayout",
|
||||
"Unit_hipMallocMipmappedArray_Negative_8BitFloat",
|
||||
"Unit_hipMallocMipmappedArray_Negative_DifferentChannelSizes",
|
||||
"Unit_hipMallocMipmappedArray_Negative_BadChannelSize",
|
||||
"Unit_hipMallocArray_Negative_DifferentChannelSizes",
|
||||
"Unit_hipMallocArray_Negative_8bitFloat - float",
|
||||
"Unit_hipMallocArray_Negative_8bitFloat - float2",
|
||||
"Unit_hipMallocArray_Negative_8bitFloat - float4",
|
||||
"Unit_hipMallocArray_Negative_BadNumberOfBits",
|
||||
"Unit_hipMallocArray_Negative_3ChannelElement",
|
||||
"Unit_hipMallocArray_Negative_ChannelAfterZeroChannel",
|
||||
"Unit_hipMallocArray_Negative_InvalidChannelFormat",
|
||||
"=== Disabling tests which no longer behave the same on nvidia platform ===",
|
||||
"Unit_hipGraphInstantiateWithParams_Negative",
|
||||
"Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph",
|
||||
"Unit_hipDeviceSynchronize_Positive_Nullstream",
|
||||
"Unit_hipDeviceSynchronize_Functional",
|
||||
"Unit_hipDeviceReset_Positive_Basic",
|
||||
"Unit_hipDeviceReset_Positive_Threaded",
|
||||
"Unit_hipModuleGetTexRef_Positive_Basic"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,341 @@
|
||||
#include <cstdlib>
|
||||
#include <hip_test_common.hh>
|
||||
#include <picojson.h>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <regex>
|
||||
#include "hip_test_context.hh"
|
||||
#include "hip_test_filesystem.hh"
|
||||
#include "hip_test_features.hh"
|
||||
|
||||
void TestContext::detectOS() {
|
||||
#if (HT_WIN == 1)
|
||||
p_windows = true;
|
||||
#elif (HT_LINUX == 1)
|
||||
p_linux = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
void TestContext::detectPlatform() {
|
||||
#if (HT_AMD == 1)
|
||||
amd = true;
|
||||
#elif (HT_NVIDIA == 1)
|
||||
nvidia = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string TestContext::substringFound(std::vector<std::string> list, std::string filename) {
|
||||
std::string match = "";
|
||||
for (unsigned int i = 0; i < list.size(); i++) {
|
||||
if (filename.find(list.at(i)) != std::string::npos) {
|
||||
match = list.at(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
std::string TestContext::getCurrentArch() {
|
||||
#if HT_LINUX
|
||||
const char* cmd =
|
||||
"/opt/rocm/bin/rocm_agent_enumerator | awk '$0 != \"gfx000\"' | xargs | sed -e 's/ /;/g' | "
|
||||
"tr -d '\n'";
|
||||
std::array<char, 1024> buffer;
|
||||
std::string result;
|
||||
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
|
||||
if (!pipe) {
|
||||
printf("popen() failed!");
|
||||
return "";
|
||||
}
|
||||
while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
|
||||
std::string res = buffer.data();
|
||||
result = res;
|
||||
}
|
||||
|
||||
std::string s_visible_devices = TestContext::getEnvVar("HIP_VISIBLE_DEVICES");
|
||||
|
||||
auto parser = [](std::string input, char c) -> std::vector<std::string> {
|
||||
std::vector<std::string> ret;
|
||||
auto loc = input.find(c);
|
||||
while (loc != std::string::npos) {
|
||||
auto t_str = input.substr(0, loc);
|
||||
ret.push_back(t_str);
|
||||
input.erase(0, loc + 1);
|
||||
loc = input.find(c);
|
||||
}
|
||||
if (input.size() > 0) {
|
||||
ret.push_back(input);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
std::vector<std::string> archs = parser(result, ';');
|
||||
std::vector<std::string> v_visible_devices = parser(s_visible_devices, ',');
|
||||
std::vector<int> visible_devices;
|
||||
std::for_each(v_visible_devices.begin(), v_visible_devices.end(),
|
||||
[&](const std::string& in) { visible_devices.push_back(std::stoi(in)); });
|
||||
|
||||
if (archs.size() == 0) {
|
||||
return ""; // rocm_agent_enum gave us garbage
|
||||
}
|
||||
|
||||
auto first_arch = archs[0];
|
||||
if (!std::all_of(archs.begin(), archs.end(),
|
||||
[&](const std::string& in) { return in == first_arch; })) {
|
||||
// We have multiple archs in rocm_agent_enum
|
||||
// Check if they are same or not by applying HIP_VISIBLE_DEVICES filter
|
||||
std::vector<std::string> filtered_archs;
|
||||
if (visible_devices.size() > 0) {
|
||||
for (size_t i = 0; i < visible_devices.size(); i++) {
|
||||
filtered_archs.push_back(archs[visible_devices[i]]);
|
||||
}
|
||||
} else {
|
||||
filtered_archs = archs;
|
||||
}
|
||||
auto first_filtered_arch = filtered_archs[0];
|
||||
if (!std::all_of(filtered_archs.begin(), filtered_archs.end(),
|
||||
[&](const std::string& in) { return in == first_filtered_arch; })) {
|
||||
LogPrintf("%s",
|
||||
"[ERROR] Cannot run tests on Hetrogenous Architecture. Please set "
|
||||
"HIP_VISIBLE_DEVICES with devices of same arch");
|
||||
std::abort();
|
||||
}
|
||||
return first_filtered_arch;
|
||||
}
|
||||
return first_arch;
|
||||
#else
|
||||
return "";
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string TestContext::getMatchingConfigFile(std::string config_dir) {
|
||||
std::string configFileToUse = "";
|
||||
if (isLinux() && isAmd()) {
|
||||
std::string cur_arch = getCurrentArch();
|
||||
LogPrintf("The arch present: %s", cur_arch.c_str());
|
||||
configFileToUse = config_dir + "/config_" + getConfig().platform + "_" + getConfig().os + "_" +
|
||||
cur_arch + ".json";
|
||||
} else {
|
||||
configFileToUse =
|
||||
config_dir + "/config_" + getConfig().platform + "_" + getConfig().os + ".json";
|
||||
}
|
||||
if (fs::exists(configFileToUse)) {
|
||||
return configFileToUse;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string& TestContext::getCommonJsonFile() {
|
||||
fs::path config_dir = exe_path;
|
||||
config_dir = config_dir.parent_path();
|
||||
int levels = 0;
|
||||
bool configFolderFound = false;
|
||||
std::vector<std::string> configList;
|
||||
std::string configFile;
|
||||
// check a max of 5 levels down the executable path
|
||||
while (levels < 5) {
|
||||
fs::path temp_path = config_dir;
|
||||
temp_path /= "config";
|
||||
if (fs::exists(temp_path)) {
|
||||
config_dir = fs::absolute(temp_path);
|
||||
configFolderFound = true;
|
||||
break;
|
||||
} else {
|
||||
config_dir = config_dir.parent_path();
|
||||
levels++;
|
||||
}
|
||||
}
|
||||
|
||||
// get config.json files if config folder.
|
||||
if (configFolderFound) {
|
||||
json_file_ = getMatchingConfigFile(config_dir.string());
|
||||
}
|
||||
return json_file_;
|
||||
}
|
||||
|
||||
|
||||
void TestContext::getConfigFiles() {
|
||||
config_.platform = (amd ? "amd" : (nvidia ? "nvidia" : "unknown"));
|
||||
config_.os = (p_windows ? "windows" : (p_linux ? "linux" : "unknown"));
|
||||
|
||||
if (config_.os == "unknown" || config_.platform == "unknown") {
|
||||
LogPrintf("%s", "Either Config or Os is unknown, this wont end well");
|
||||
abort();
|
||||
}
|
||||
|
||||
std::string env_config = TestContext::getEnvVar("HIP_CATCH_EXCLUDE_FILE");
|
||||
LogPrintf("Env Config file: %s",
|
||||
(!env_config.empty()) ? env_config.c_str() : "Not found");
|
||||
// HIP_CATCH_EXCLUDE_FILE is set for custom file path
|
||||
if (!env_config.empty()) {
|
||||
if(fs::exists(env_config)) {
|
||||
config_.json_files.push_back(env_config);
|
||||
}
|
||||
} else {
|
||||
std::string jsonFile = getCommonJsonFile();
|
||||
// get common json file
|
||||
if (jsonFile != "") {
|
||||
config_.json_files.push_back(getCommonJsonFile());
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& fl : config_.json_files) {
|
||||
LogPrintf("Config file path: %s", fl.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
TestContext::TestContext(int argc, char** argv) {
|
||||
detectOS();
|
||||
detectPlatform();
|
||||
setExePath(argc, argv);
|
||||
getConfigFiles();
|
||||
parseJsonFiles();
|
||||
parseOptions(argc, argv);
|
||||
}
|
||||
|
||||
void TestContext::setExePath(int argc, char** argv) {
|
||||
if (argc == 0) return;
|
||||
fs::path p = std::string(argv[0]);
|
||||
if (p.has_filename()) p.remove_filename();
|
||||
exe_path = p.string();
|
||||
}
|
||||
|
||||
bool TestContext::isWindows() const { return p_windows; }
|
||||
bool TestContext::isLinux() const { return p_linux; }
|
||||
|
||||
bool TestContext::isNvidia() const { return nvidia; }
|
||||
bool TestContext::isAmd() const { return amd; }
|
||||
|
||||
void TestContext::parseOptions(int argc, char** argv) {
|
||||
// Test name is at [1] position
|
||||
if (argc != 2) return;
|
||||
current_test = std::string(argv[1]);
|
||||
}
|
||||
|
||||
bool TestContext::skipTest() const {
|
||||
// Direct Match
|
||||
auto flags = std::regex::ECMAScript;
|
||||
for (const auto& i : skip_test) {
|
||||
auto regex = std::regex(i.c_str(), flags);
|
||||
if (std::regex_match(current_test, regex)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// TODO add test case skip as well
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string TestContext::currentPath() const { return fs::current_path().string(); }
|
||||
|
||||
bool TestContext::parseJsonFiles() {
|
||||
// Check if file exists
|
||||
for (const auto& fl : config_.json_files) {
|
||||
if (!fs::exists(fl)) {
|
||||
LogPrintf("Unable to find the file: %s", fl.c_str());
|
||||
return true;
|
||||
}
|
||||
// Open the file
|
||||
std::ifstream js_file(fl);
|
||||
std::string json_str((std::istreambuf_iterator<char>(js_file)), std::istreambuf_iterator<char>());
|
||||
LogPrintf("Json contents:: %s", json_str.data());
|
||||
|
||||
picojson::value v;
|
||||
std::string err = picojson::parse(v, json_str);
|
||||
if (err.size() > 1) {
|
||||
LogPrintf("Error from PicoJson: %s", err.data());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!v.is<picojson::object>()) {
|
||||
LogPrintf("%s", "Data in json is not in correct format, it should be an object");
|
||||
return false;
|
||||
}
|
||||
|
||||
const picojson::object& o = v.get<picojson::object>();
|
||||
for (picojson::object::const_iterator i = o.begin(); i != o.end(); ++i) {
|
||||
// Processing for DisabledTests
|
||||
if (i->first == "DisabledTests") {
|
||||
// Value should contain list of values
|
||||
if (!i->second.is<picojson::array>()) return false;
|
||||
|
||||
auto& val = i->second.get<picojson::array>();
|
||||
for (auto ai = val.begin(); ai != val.end(); ai++) {
|
||||
std::string tmp = ai->get<std::string>();
|
||||
std::string newRegexName;
|
||||
for (const auto& c : tmp) {
|
||||
if (c == '*')
|
||||
newRegexName += ".*";
|
||||
else
|
||||
newRegexName += c;
|
||||
}
|
||||
skip_test.insert(newRegexName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void TestContext::cleanContext() {
|
||||
for (auto& pair : compiledKernels) {
|
||||
hipError_t error = hipModuleUnload(pair.second.module);
|
||||
if (error != hipSuccess) {
|
||||
throw std::runtime_error("Unable to unload rtc module");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestContext::trackRtcState(std::string kernelNameExpression, hipModule_t loadedModule,
|
||||
hipFunction_t kernelFunction) {
|
||||
rtcState state{loadedModule, kernelFunction};
|
||||
compiledKernels[kernelNameExpression] = state;
|
||||
}
|
||||
|
||||
hipFunction_t TestContext::getFunction(const std::string kernelNameExpression) {
|
||||
auto it{compiledKernels.find(kernelNameExpression)};
|
||||
|
||||
if (it != compiledKernels.end()) {
|
||||
return it->second.kernelFunction;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void TestContext::addResults(HCResult r) {
|
||||
std::unique_lock<std::mutex> lock(resultMutex);
|
||||
results.push_back(r);
|
||||
if ((!r.conditionsResult) ||
|
||||
((r.result != hipSuccess) && (r.result != hipErrorPeerAccessAlreadyEnabled))) {
|
||||
hasErrorOccured_.store(true);
|
||||
}
|
||||
}
|
||||
|
||||
void TestContext::finalizeResults() {
|
||||
std::unique_lock<std::mutex> lock(resultMutex);
|
||||
// clear the results whatever happens
|
||||
std::shared_ptr<void> emptyVec(nullptr, [this](auto) { results.clear(); });
|
||||
|
||||
for (const auto& i : results) {
|
||||
INFO("HIP API Result check\n File:: "
|
||||
<< i.file << "\n Line:: " << i.line << "\n API:: " << i.call
|
||||
<< "\n Result:: " << i.result << "\n Result Str:: " << hipGetErrorString(i.result));
|
||||
REQUIRE(((i.result == hipSuccess) || (i.result == hipErrorPeerAccessAlreadyEnabled) ||
|
||||
(i.result == hipErrorNotSupported)));
|
||||
REQUIRE(i.conditionsResult);
|
||||
}
|
||||
hasErrorOccured_.store(false); // Clear the flag
|
||||
}
|
||||
|
||||
bool TestContext::hasErrorOccured() { return hasErrorOccured_.load(); }
|
||||
|
||||
TestContext::~TestContext() {
|
||||
// Show this message when there are unchecked results
|
||||
if (results.size() != 0) {
|
||||
std::cerr << "HIP_CHECK_THREAD_FINALIZE() has not been called after HIP_CHECK_THREAD\n"
|
||||
<< "Please call HIP_CHECK_THREAD_FINALIZE after joining threads\n"
|
||||
<< "There is/are " << results.size() << " unchecked results from threads."
|
||||
<< std::endl;
|
||||
std::abort(); // Crash to bring users attention to this message and avoid accidental passing of
|
||||
// tests without checking for errors
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,138 @@
|
||||
#include "hip_test_features.hh"
|
||||
|
||||
#include <iostream>
|
||||
#include <assert.h>
|
||||
#include <map>
|
||||
#include "hip_test_context.hh"
|
||||
|
||||
std::vector<std::unordered_set<std::string>> GCNArchFeatMap = {
|
||||
{"gfx90a", "gfx942", "gfx950"}, // CT_FEATURE_FINEGRAIN_HWSUPPORT
|
||||
{"gfx90a", "gfx942", "gfx950"}, // CT_FEATURE_HMM
|
||||
{"gfx90a", "gfx942", "gfx950"}, // CT_FEATURE_TEXTURES_NOT_SUPPORTED
|
||||
};
|
||||
|
||||
#if HT_AMD
|
||||
std::string TrimAndGetGFXName(const std::string& full_gfx_name) {
|
||||
std::string gfx_name("");
|
||||
|
||||
// Split the first part of the delimiter
|
||||
std::string delimiter = ":";
|
||||
auto pos = full_gfx_name.find(delimiter);
|
||||
if (pos == std::string::npos) {
|
||||
gfx_name = full_gfx_name;
|
||||
} else {
|
||||
gfx_name = full_gfx_name.substr(0, pos);
|
||||
}
|
||||
|
||||
assert(gfx_name.substr(0,3) == "gfx");
|
||||
return gfx_name;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Check if the GCN Maps
|
||||
bool CheckIfFeatSupported(enum CTFeatures test_feat, std::string gcn_arch) {
|
||||
#if HT_NVIDIA
|
||||
return true; // returning true since feature check does not exist for NV.
|
||||
#elif HT_AMD
|
||||
assert(test_feat >= 0 && test_feat < CTFeatures::CT_FEATURE_LAST);
|
||||
gcn_arch = TrimAndGetGFXName(gcn_arch);
|
||||
assert(gcn_arch != "");
|
||||
return (GCNArchFeatMap[test_feat].find(gcn_arch) != GCNArchFeatMap[test_feat].cend());
|
||||
#else
|
||||
std::cout<<"Platform has to be either AMD or NVIDIA, asserting..."<<std::endl;
|
||||
assert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Return true if agentTarget has corresponding generic target which will be returned in
|
||||
// genericTarget;
|
||||
// false, otherwise.
|
||||
// Note: it will naturely return false on Nvidia device
|
||||
bool getGenericTarget(const std::string& agentTarget, std::string& genericTarget) {
|
||||
// The map is subject to change per removing policy
|
||||
static std::map<std::string, std::string> genericTargetMap{
|
||||
// "gfx9-generic"
|
||||
{"gfx900", "gfx9-generic"},
|
||||
{"gfx902", "gfx9-generic"},
|
||||
{"gfx904", "gfx9-generic"},
|
||||
{"gfx906", "gfx9-generic"},
|
||||
{"gfx909", "gfx9-generic"},
|
||||
{"gfx90c", "gfx9-generic"},
|
||||
// "gfx9-4-generic
|
||||
{"gfx940", "gfx9-4-generic"},
|
||||
{"gfx941", "gfx9-4-generic"},
|
||||
{"gfx942", "gfx9-4-generic"},
|
||||
{"gfx950", "gfx9-4-generic"},
|
||||
// "gfx10-1-generic"
|
||||
{"gfx1010", "gfx10-1-generic"},
|
||||
{"gfx1011", "gfx10-1-generic"},
|
||||
{"gfx1012", "gfx10-1-generic"},
|
||||
{"gfx1013", "gfx10-1-generic"},
|
||||
// "gfx10-3-generic"
|
||||
{"gfx1030", "gfx10-3-generic"},
|
||||
{"gfx1031", "gfx10-3-generic"},
|
||||
{"gfx1032", "gfx10-3-generic"},
|
||||
{"gfx1033", "gfx10-3-generic"},
|
||||
{"gfx1034", "gfx10-3-generic"},
|
||||
{"gfx1035", "gfx10-3-generic"},
|
||||
{"gfx1036", "gfx10-3-generic"},
|
||||
// "gfx11-generic"
|
||||
{"gfx1100", "gfx11-generic"},
|
||||
{"gfx1101", "gfx11-generic"},
|
||||
{"gfx1102", "gfx11-generic"},
|
||||
{"gfx1103", "gfx11-generic"},
|
||||
{"gfx1150", "gfx11-generic"},
|
||||
{"gfx1151", "gfx11-generic"},
|
||||
// "gfx12-generic"
|
||||
{"gfx1200", "gfx12-generic"},
|
||||
{"gfx1201", "gfx12-generic"},
|
||||
};
|
||||
auto search = genericTargetMap.find(agentTarget);
|
||||
if (search == genericTargetMap.end()) return false;
|
||||
genericTarget = search->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
Return true, if gcnArchName has corresponding generic target;
|
||||
false, otherwise.
|
||||
If gcnArchName is nullptr, it will be queried from deviceId;
|
||||
otherwise, deviceId will be ignored.
|
||||
|
||||
The specific arches have the following mapping to generic targets,
|
||||
|
||||
Generic GFX11
|
||||
|
||||
--offload-arch=gfx11-generic - includes [gfx1100-gfx1103], gfx1150, gfx1151
|
||||
|
||||
Generic GFX10.3
|
||||
|
||||
--offload-arch=gfx10.3-generic - includes [gfx1030-gfx1036]
|
||||
|
||||
Generic GFX10.1
|
||||
|
||||
--offload-arch=gfx10.1-generic - includes [gfx1010-gfx1013]
|
||||
|
||||
Generic GFX9 / Consumer
|
||||
|
||||
--offload-arch=gfx9-generic - includes gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c
|
||||
|
||||
Generic GFX9.4 / Data center
|
||||
|
||||
--offload-arch=gfx9-4-generic - includes gfx940, gfx941, gfx942, gfx950
|
||||
*/
|
||||
bool isGenericTargetSupported(char* gcnArchName, int deviceId) {
|
||||
hipDeviceProp_t props{};
|
||||
if (gcnArchName == nullptr) {
|
||||
if (hipGetDeviceProperties(&props, deviceId) != hipSuccess) return false;
|
||||
gcnArchName = props.gcnArchName;
|
||||
}
|
||||
std::string target{gcnArchName};
|
||||
std::string genericTarget{};
|
||||
auto pos = target.find(':');
|
||||
if (pos != std::string::npos) {
|
||||
target[pos] = 0;
|
||||
target.resize(pos);
|
||||
}
|
||||
return getGenericTarget(target, genericTarget);
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
#define CATCH_CONFIG_RUNNER
|
||||
#include <cmd_options.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <iostream>
|
||||
|
||||
CmdOptions cmd_options;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
auto& context = TestContext::get(argc, argv);
|
||||
if (context.skipTest()) {
|
||||
// CTest uses this regex to figure out if the test has been skipped
|
||||
std::cout << "HIP_SKIP_THIS_TEST" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Catch::Session session;
|
||||
|
||||
using namespace Catch::clara;
|
||||
// clang-format off
|
||||
auto cli = session.cli()
|
||||
| Opt(cmd_options.iterations, "iterations")
|
||||
["-I"]["--iterations"]
|
||||
("Number of iterations used for performance tests (default: 1000)")
|
||||
| Opt(cmd_options.warmups, "warmups")
|
||||
["-W"]["--warmups"]
|
||||
("Number of warmup iterations used for performance tests (default: 100)")
|
||||
| Opt(cmd_options.no_display)
|
||||
["-S"]["--no-display"]
|
||||
("Do not display the output of performance tests")
|
||||
| Opt(cmd_options.progress)
|
||||
["-P"]["--progress"]
|
||||
("Show progress bar when running performance tests")
|
||||
| Opt(cmd_options.cg_iterations, "cg_iterations")
|
||||
["-C"]["--cg-iterations"]
|
||||
("Number of iterations used for cooperative groups sync tests (default: 5)")
|
||||
| Opt(cmd_options.accuracy_iterations, "accuracy_iterations")
|
||||
["-A"]["--accuracy-iterations"]
|
||||
("Number of iterations used for math accuracy tests with randomly generated inputs (default: 2^32)")
|
||||
| Opt(cmd_options.accuracy_max_memory, "accuracy_max_memory")
|
||||
["-M"]["--accuracy-max-memory"]
|
||||
("Percentage of global device memory allowed for math accuracy tests (default: 80%)")
|
||||
| Opt(cmd_options.reduce_iterations, "reduce_iterations")
|
||||
["-R"]["--reduce-iterations"]
|
||||
("Number of iterations for fuzzing reduce operations (default: 1)")
|
||||
| Opt(cmd_options.reduce_input_size, "reduce_input_size")
|
||||
["-Z"]["--reduce-input-size"]
|
||||
("Size of the input for the reduce sync operations performance test (megabytes) (default: 50)")
|
||||
;
|
||||
// clang-format on
|
||||
|
||||
session.cli(cli);
|
||||
|
||||
int out = session.run(argc, argv);
|
||||
TestContext::get().cleanContext();
|
||||
return out;
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
#define CATCH_CONFIG_MAIN
|
||||
#include <catch.hpp>
|
||||
#include <cmd_options.hh>
|
||||
|
||||
CmdOptions cmd_options;
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
struct CmdOptions {
|
||||
int iterations = 10;
|
||||
int warmups = 100;
|
||||
int cg_iterations = 5;
|
||||
bool no_display = false;
|
||||
bool progress = false;
|
||||
uint64_t accuracy_iterations = std::numeric_limits<uint32_t>::max() + 1ull;
|
||||
uint64_t reduce_iterations = 1;
|
||||
uint64_t reduce_input_size = 50;
|
||||
int accuracy_max_memory = 80;
|
||||
};
|
||||
|
||||
extern CmdOptions cmd_options;
|
||||
@@ -0,0 +1,184 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
struct CPUGrid {
|
||||
CPUGrid() = default;
|
||||
|
||||
CPUGrid(const dim3 grid_dim, const dim3 block_dim)
|
||||
: grid_dim_{grid_dim},
|
||||
block_dim_{block_dim},
|
||||
block_count_{grid_dim.x * grid_dim.y * grid_dim.z},
|
||||
threads_in_block_count_{block_dim.x * block_dim.y * block_dim.z},
|
||||
thread_count_{block_count_ * threads_in_block_count_} {}
|
||||
|
||||
inline std::optional<unsigned int> thread_rank_in_block(
|
||||
const unsigned int thread_rank_in_grid) const {
|
||||
if (thread_rank_in_grid > thread_count_) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return thread_rank_in_grid % threads_in_block_count_;
|
||||
}
|
||||
|
||||
inline std::optional<dim3> block_idx(const unsigned int thread_rank_in_grid) const {
|
||||
if (thread_rank_in_grid > thread_count_) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
dim3 block_idx;
|
||||
const auto block_rank_in_grid = thread_rank_in_grid / threads_in_block_count_;
|
||||
block_idx.x = block_rank_in_grid % grid_dim_.x;
|
||||
block_idx.y = (block_rank_in_grid / grid_dim_.x) % grid_dim_.y;
|
||||
block_idx.z = block_rank_in_grid / (grid_dim_.x * grid_dim_.y);
|
||||
|
||||
return block_idx;
|
||||
}
|
||||
|
||||
inline std::optional<dim3> thread_idx(const unsigned int thread_rank_in_grid) const {
|
||||
if (thread_rank_in_grid > thread_count_) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
dim3 thread_idx;
|
||||
const auto thread_rank_in_block = thread_rank_in_grid % threads_in_block_count_;
|
||||
thread_idx.x = thread_rank_in_block % block_dim_.x;
|
||||
thread_idx.y = (thread_rank_in_block / block_dim_.x) % block_dim_.y;
|
||||
thread_idx.z = thread_rank_in_block / (block_dim_.x * block_dim_.y);
|
||||
|
||||
return thread_idx;
|
||||
}
|
||||
|
||||
dim3 grid_dim_;
|
||||
dim3 block_dim_;
|
||||
unsigned int block_count_;
|
||||
unsigned int threads_in_block_count_;
|
||||
unsigned int thread_count_;
|
||||
};
|
||||
|
||||
struct CPUMultiGrid {
|
||||
CPUMultiGrid(const unsigned int num_grids, const dim3 grid_dims[], const dim3 block_dims[]) {
|
||||
thread_count_ = 0;
|
||||
grid_count_ = num_grids;
|
||||
grids_.reserve(grid_count_);
|
||||
for (int i = 0; i < grid_count_; i++) {
|
||||
grids_.emplace_back(grid_dims[i], block_dims[i]);
|
||||
thread_count_ += grids_[i].thread_count_;
|
||||
}
|
||||
}
|
||||
|
||||
inline unsigned int thread0_rank_in_multi_grid(const unsigned int grid_rank) const {
|
||||
unsigned int multi_grid_thread_rank_0 = 0;
|
||||
unsigned int multi_grid_thread_count = 0;
|
||||
for (int i = 0; i <= grid_rank; i++) {
|
||||
multi_grid_thread_rank_0 = multi_grid_thread_count;
|
||||
multi_grid_thread_count += grids_[i].thread_count_;
|
||||
}
|
||||
return multi_grid_thread_rank_0;
|
||||
}
|
||||
|
||||
std::vector<CPUGrid> grids_;
|
||||
unsigned int grid_count_;
|
||||
unsigned int thread_count_;
|
||||
};
|
||||
|
||||
/* Generate dimensions for 1D, 2D and 3D blocks of threads */
|
||||
inline dim3 GenerateThreadDimensions() {
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, 0));
|
||||
const auto multipliers = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3,
|
||||
1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5};
|
||||
return GENERATE_COPY(
|
||||
dim3(1, 1, 1), dim3(props.maxThreadsDim[0], 1, 1), dim3(1, props.maxThreadsDim[1], 1),
|
||||
dim3(1, 1, props.maxThreadsDim[2]),
|
||||
map([max = props.maxThreadsDim[0], warp_size = props.warpSize](
|
||||
double i) { return dim3(std::min(static_cast<int>(i * warp_size), max), 1, 1); },
|
||||
values(multipliers)),
|
||||
map([max = props.maxThreadsDim[1], warp_size = props.warpSize](
|
||||
double i) { return dim3(1, std::min(static_cast<int>(i * warp_size), max), 1); },
|
||||
values(multipliers)),
|
||||
map([max = props.maxThreadsDim[2], warp_size = props.warpSize](
|
||||
double i) { return dim3(1, 1, std::min(static_cast<int>(i * warp_size), max)); },
|
||||
values(multipliers)),
|
||||
dim3(16, 8, 8), dim3(32, 32, 1), dim3(64, 8, 2), dim3(16, 16, 3), dim3(props.warpSize - 1, 3, 3),
|
||||
dim3(props.warpSize + 1, 3, 3));
|
||||
}
|
||||
|
||||
/* Generate dimensions for 1D, 2D and 3D grids of blocks */
|
||||
inline dim3 GenerateBlockDimensions() {
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, 0));
|
||||
const auto multipliers = {0.5, 0.9, 1.0, 1.1, 1.5, 1.9, 2.0, 3.0, 4.0};
|
||||
return GENERATE_COPY(dim3(1, 1, 1),
|
||||
map([sm = props.multiProcessorCount](
|
||||
double i) { return dim3(static_cast<int>(i * sm), 1, 1); },
|
||||
values(multipliers)),
|
||||
map([sm = props.multiProcessorCount](
|
||||
double i) { return dim3(1, static_cast<int>(i * sm), 1); },
|
||||
values(multipliers)),
|
||||
map([sm = props.multiProcessorCount](
|
||||
double i) { return dim3(1, 1, static_cast<int>(i * sm)); },
|
||||
values(multipliers)),
|
||||
dim3(5, 5, 5));
|
||||
}
|
||||
|
||||
/* Generate dimensions for 1D, 2D and 3D blocks of threads - reduced set */
|
||||
inline dim3 GenerateThreadDimensionsForShuffle() {
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, 0));
|
||||
const auto multipliers = {0.5, 0.9, 1.0, 1.5, 2.0};
|
||||
return GENERATE_COPY(
|
||||
dim3(1, 1, 1), dim3(props.maxThreadsDim[0], 1, 1), dim3(1, props.maxThreadsDim[1], 1),
|
||||
dim3(1, 1, props.maxThreadsDim[2]),
|
||||
map([max = props.maxThreadsDim[0], warp_size = props.warpSize](
|
||||
double i) { return dim3(std::min(static_cast<int>(i * warp_size), max), 1, 1); },
|
||||
values(multipliers)),
|
||||
map([max = props.maxThreadsDim[1], warp_size = props.warpSize](
|
||||
double i) { return dim3(1, std::min(static_cast<int>(i * warp_size), max), 1); },
|
||||
values(multipliers)),
|
||||
map([max = props.maxThreadsDim[2], warp_size = props.warpSize](
|
||||
double i) { return dim3(1, 1, std::min(static_cast<int>(i * warp_size), max)); },
|
||||
values(multipliers)),
|
||||
dim3(16, 8, 8), dim3(32, 32, 1), dim3(64, 8, 2), dim3(16, 16, 3), dim3(props.warpSize - 1, 3, 3),
|
||||
dim3(props.warpSize + 1, 3, 3));
|
||||
}
|
||||
|
||||
/* Generate dimensions for 1D, 2D and 3D grids of blocks - reduced set */
|
||||
inline dim3 GenerateBlockDimensionsForShuffle() {
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, 0));
|
||||
const auto multipliers = {0.5, 1.0};
|
||||
return GENERATE_COPY(dim3(1, 1, 1),
|
||||
map([sm = props.multiProcessorCount](
|
||||
double i) { return dim3(static_cast<int>(i * sm), 1, 1); },
|
||||
values(multipliers)),
|
||||
map([sm = props.multiProcessorCount](
|
||||
double i) { return dim3(1, static_cast<int>(i * sm), 1); },
|
||||
values(multipliers)),
|
||||
map([sm = props.multiProcessorCount](
|
||||
double i) { return dim3(1, 1, static_cast<int>(i * sm)); },
|
||||
values(multipliers)),
|
||||
dim3(5, 5, 5));
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
static int HmmAttrPrint() {
|
||||
int managed = 0;
|
||||
INFO(
|
||||
"The following are the attribute values related to HMM for"
|
||||
" device 0:\n");
|
||||
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeDirectManagedMemAccessFromHost, 0));
|
||||
INFO("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed);
|
||||
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeConcurrentManagedAccess, 0));
|
||||
INFO("hipDeviceAttributeConcurrentManagedAccess: " << managed);
|
||||
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributePageableMemoryAccess, 0));
|
||||
INFO("hipDeviceAttributePageableMemoryAccess: " << managed);
|
||||
HIP_CHECK(
|
||||
hipDeviceGetAttribute(&managed, hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0));
|
||||
INFO("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" << managed);
|
||||
|
||||
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0));
|
||||
INFO("hipDeviceAttributeManagedMemory: " << managed);
|
||||
if (managed != 1) {
|
||||
WARN(
|
||||
"GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute so defaulting to system "
|
||||
"memory.");
|
||||
}
|
||||
return managed;
|
||||
}
|
||||
@@ -0,0 +1,334 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
template <class T, size_t N, hipArray_Format Format> struct type_and_size_and_format {
|
||||
using type = T;
|
||||
static constexpr size_t size = N;
|
||||
static constexpr hipArray_Format format = Format;
|
||||
};
|
||||
|
||||
// Create a map of type to scalar type, vector size and scalar type format enum.
|
||||
// This is useful for creating simpler function that depend on the vector size.
|
||||
template <typename T> struct vector_info;
|
||||
template <>
|
||||
struct vector_info<int> : type_and_size_and_format<int, 1, HIP_AD_FORMAT_SIGNED_INT32> {};
|
||||
template <> struct vector_info<float> : type_and_size_and_format<float, 1, HIP_AD_FORMAT_FLOAT> {};
|
||||
template <>
|
||||
struct vector_info<short> : type_and_size_and_format<short, 1, HIP_AD_FORMAT_SIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<char> : type_and_size_and_format<char, 1, HIP_AD_FORMAT_SIGNED_INT8> {};
|
||||
template <>
|
||||
struct vector_info<unsigned int>
|
||||
: type_and_size_and_format<unsigned int, 1, HIP_AD_FORMAT_UNSIGNED_INT32> {};
|
||||
template <>
|
||||
struct vector_info<unsigned short>
|
||||
: type_and_size_and_format<unsigned short, 1, HIP_AD_FORMAT_UNSIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<unsigned char>
|
||||
: type_and_size_and_format<unsigned char, 1, HIP_AD_FORMAT_UNSIGNED_INT8> {};
|
||||
|
||||
template <>
|
||||
struct vector_info<int2> : type_and_size_and_format<int, 2, HIP_AD_FORMAT_SIGNED_INT32> {};
|
||||
template <> struct vector_info<float2> : type_and_size_and_format<float, 2, HIP_AD_FORMAT_FLOAT> {};
|
||||
template <>
|
||||
struct vector_info<short2> : type_and_size_and_format<short, 2, HIP_AD_FORMAT_SIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<char2> : type_and_size_and_format<char, 2, HIP_AD_FORMAT_SIGNED_INT8> {};
|
||||
template <>
|
||||
struct vector_info<uint2>
|
||||
: type_and_size_and_format<unsigned int, 2, HIP_AD_FORMAT_UNSIGNED_INT32> {};
|
||||
template <>
|
||||
struct vector_info<ushort2>
|
||||
: type_and_size_and_format<unsigned short, 2, HIP_AD_FORMAT_UNSIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<uchar2>
|
||||
: type_and_size_and_format<unsigned char, 2, HIP_AD_FORMAT_UNSIGNED_INT8> {};
|
||||
|
||||
template <>
|
||||
struct vector_info<int4> : type_and_size_and_format<int, 4, HIP_AD_FORMAT_SIGNED_INT32> {};
|
||||
template <> struct vector_info<float4> : type_and_size_and_format<float, 4, HIP_AD_FORMAT_FLOAT> {};
|
||||
template <>
|
||||
struct vector_info<short4> : type_and_size_and_format<short, 4, HIP_AD_FORMAT_SIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<char4> : type_and_size_and_format<char, 4, HIP_AD_FORMAT_SIGNED_INT8> {};
|
||||
template <>
|
||||
struct vector_info<uint4>
|
||||
: type_and_size_and_format<unsigned int, 4, HIP_AD_FORMAT_UNSIGNED_INT32> {};
|
||||
template <>
|
||||
struct vector_info<ushort4>
|
||||
: type_and_size_and_format<unsigned short, 4, HIP_AD_FORMAT_UNSIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<uchar4>
|
||||
: type_and_size_and_format<unsigned char, 4, HIP_AD_FORMAT_UNSIGNED_INT8> {};
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename std::enable_if<std::is_scalar<T>::value == false>::type* = nullptr>
|
||||
static inline __host__ __device__ constexpr int rank() {
|
||||
return sizeof(T) / sizeof(decltype(T::x));
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<rank<T>() == 1>::type* = nullptr>
|
||||
static inline bool isEqual(const T &val0, const T &val1) {
|
||||
return val0.x == val1.x;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<rank<T>() == 2>::type* = nullptr>
|
||||
static inline bool isEqual(const T &val0, const T &val1) {
|
||||
return val0.x == val1.x &&
|
||||
val0.y == val1.y;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<rank<T>() == 4>::type* = nullptr>
|
||||
static inline bool isEqual(const T &val0, const T &val1) {
|
||||
return val0.x == val1.x &&
|
||||
val0.y == val1.y &&
|
||||
val0.z == val1.z &&
|
||||
val0.w == val1.w;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<std::is_scalar<T>::value>::type* = nullptr>
|
||||
static inline bool isEqual(const T &val0, const T &val1) {
|
||||
return val0 == val1;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<rank<T>() == 1>::type* = nullptr>
|
||||
const std::string getString(const T& t)
|
||||
{
|
||||
std::ostringstream os;
|
||||
if constexpr (std::is_same<decltype(T::x), char>::value ||
|
||||
std::is_same<decltype(T::x), unsigned char>::value) {
|
||||
os << "(" << static_cast<int>(t.x) << ")";
|
||||
} else {
|
||||
os << "(" << t.x << ")";
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<rank<T>() == 2>::type* = nullptr>
|
||||
const std::string getString(const T& t)
|
||||
{
|
||||
std::ostringstream os;
|
||||
if constexpr (std::is_same<decltype(T::x), char>::value ||
|
||||
std::is_same<decltype(T::x), unsigned char>::value) {
|
||||
os << "(" << static_cast<int>(t.x) << ", " << static_cast<int>(t.y) << ")";
|
||||
} else {
|
||||
os << "(" << t.x << ", " << t.y << ")";
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<rank<T>() == 3>::type* = nullptr>
|
||||
const std::string getString(const T& t)
|
||||
{
|
||||
std::ostringstream os;
|
||||
if constexpr (std::is_same<decltype(T::x), char>::value ||
|
||||
std::is_same<decltype(T::x), unsigned char>::value) {
|
||||
os << "(" << static_cast<int>(t.x) << ", " << static_cast<int>(t.y) << ", " <<
|
||||
static_cast<int>(t.z) << ")";
|
||||
} else {
|
||||
os << "(" << t.x << ", " << t.y << ", " << t.z << ")";
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<rank<T>() == 4>::type* = nullptr>
|
||||
const std::string getString(const T& t)
|
||||
{
|
||||
std::ostringstream os;
|
||||
if constexpr (std::is_same<decltype(T::x), char>::value ||
|
||||
std::is_same<decltype(T::x), unsigned char>::value) {
|
||||
os << "(" << static_cast<int>(t.x) << ", " << static_cast<int>(t.y) << ", " <<
|
||||
static_cast<int>(t.z) << ", " << static_cast<int>(t.w) << ")";
|
||||
} else {
|
||||
os << "(" << t.x << ", " << t.y << ", " << t.z << ", " << t.w << ")";
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<std::is_scalar<T>::value>::type* = nullptr>
|
||||
std::string getString(const T& t)
|
||||
{
|
||||
std::ostringstream os;
|
||||
if constexpr (std::is_same<T, char>::value ||
|
||||
std::is_same<T, unsigned char>::value) {
|
||||
os << static_cast<int>(t);
|
||||
} else {
|
||||
os << t;
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline T getRandom() {
|
||||
double r = 0;
|
||||
if (std::is_signed<T>::value) {
|
||||
r = (std::rand() - RAND_MAX / 2.0) / (RAND_MAX / 2.0 + 1.);
|
||||
} else {
|
||||
r = std::rand() / (RAND_MAX + 1.);
|
||||
}
|
||||
if constexpr (std::is_floating_point<T>::value) {
|
||||
// Restrict any float within (-1000, 1000)
|
||||
// to prevent too big float value that would make caculation sick
|
||||
return static_cast<T>(r * 1000.);
|
||||
} else {
|
||||
return static_cast<T>(std::numeric_limits<T>::max() * r);
|
||||
}
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<rank<T>() == 1>::type* = nullptr>
|
||||
static inline void initVal(T &val) {
|
||||
val.x = getRandom<decltype(T::x)>();
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<rank<T>() == 2>::type* = nullptr>
|
||||
static inline void initVal(T &val) {
|
||||
val.x = getRandom<decltype(T::x)>();
|
||||
val.y = getRandom<decltype(T::x)>();
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<rank<T>() == 4>::type* = nullptr>
|
||||
static inline void initVal(T &val) {
|
||||
val.x = getRandom<decltype(T::x)>();
|
||||
val.y = getRandom<decltype(T::x)>();
|
||||
val.z = getRandom<decltype(T::x)>();
|
||||
val.w = getRandom<decltype(T::x)>();
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<std::is_scalar<T>::value>::type* = nullptr>
|
||||
static inline void initVal(T &val) {
|
||||
val = getRandom<T>();
|
||||
}
|
||||
|
||||
/*Convert normalized floatx to typex*/
|
||||
template <typename T, typename F> inline __device__ T getTypeFromNormalizedFloat(const F &f) {
|
||||
T t;
|
||||
if constexpr (std::is_scalar<T>::value)
|
||||
t = static_cast<T>(f.x * std::numeric_limits<T>::max());
|
||||
else {
|
||||
if constexpr (rank<T>() > 0)
|
||||
t.x = static_cast<decltype(T::x)>(f.x * std::numeric_limits<decltype(T::x)>::max());
|
||||
if constexpr (rank<T>() > 1)
|
||||
t.y = static_cast<decltype(T::y)>(f.y * std::numeric_limits<decltype(T::y)>::max());
|
||||
if constexpr (rank<T>() > 2)
|
||||
t.z = static_cast<decltype(T::z)>(f.z * std::numeric_limits<decltype(T::z)>::max());
|
||||
if constexpr (rank<T>() > 3)
|
||||
t.w = static_cast<decltype(T::w)>(f.w * std::numeric_limits<decltype(T::w)>::max());
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/*Convert typex to normalized floatx*/
|
||||
template <class T>
|
||||
inline auto getNormalizedFloatType(const T &t) {
|
||||
if constexpr (std::is_scalar<T>::value)
|
||||
return static_cast<float>(t) / std::numeric_limits<T>::max();
|
||||
else {
|
||||
if constexpr (rank<T>() == 1) {
|
||||
float1 f{static_cast<float>(t.x) / std::numeric_limits<decltype(T::x)>::max()};
|
||||
return f;
|
||||
}
|
||||
if constexpr (rank<T>() == 2) {
|
||||
float2 f{static_cast<float>(t.x) / std::numeric_limits<decltype(T::x)>::max(),
|
||||
static_cast<float>(t.y) / std::numeric_limits<decltype(T::y)>::max()};
|
||||
return f;
|
||||
}
|
||||
if constexpr (rank<T>() == 3) {
|
||||
float3 f{static_cast<float>(t.x) / std::numeric_limits<decltype(T::x)>::max(),
|
||||
static_cast<float>(t.y) / std::numeric_limits<decltype(T::y)>::max(),
|
||||
static_cast<float>(t.z) / std::numeric_limits<decltype(T::z)>::max()};
|
||||
return f;
|
||||
}
|
||||
if constexpr (rank<T>() == 4) {
|
||||
float4 f{static_cast<float>(t.x) / std::numeric_limits<decltype(T::x)>::max(),
|
||||
static_cast<float>(t.y) / std::numeric_limits<decltype(T::y)>::max(),
|
||||
static_cast<float>(t.z) / std::numeric_limits<decltype(T::z)>::max(),
|
||||
static_cast<float>(t.w) / std::numeric_limits<decltype(T::w)>::max()};
|
||||
return f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*Check if T is floatx*/
|
||||
template <typename T> inline bool constexpr isFloat() {
|
||||
if constexpr (std::is_scalar<T>::value)
|
||||
return std::is_floating_point<T>::value;
|
||||
else {
|
||||
return std::is_floating_point<decltype(T::x)>::value;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void getStatics(T* data, size_t N, double& mean, double* deviation = nullptr) {
|
||||
double t = 0;
|
||||
for (size_t i = 0; i < N; i++)
|
||||
t += static_cast<double>(data[i]);
|
||||
mean = t / N;
|
||||
if (!deviation) return;
|
||||
double d = 0;
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
double delta = data[i] - mean;
|
||||
d += delta * delta;
|
||||
}
|
||||
*deviation = sqrt(d / (N - 1));
|
||||
}
|
||||
|
||||
template <typename T> bool verify(T* data, T* data1, size_t N) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
if (!isEqual(data[i], data1[i])) {
|
||||
printf("Difference [ %zu ]:%s ----%s\n", i, getString(data[i]).c_str(),
|
||||
getString(data1[i]).c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,399 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_array_common.hh"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <regex>
|
||||
#include <type_traits>
|
||||
#define TOL 0.001
|
||||
#define guarantee(cond, str) \
|
||||
{ \
|
||||
if (!(cond)) { \
|
||||
INFO("guarantee failed: " << str); \
|
||||
abort(); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
namespace HipTest {
|
||||
template <typename T>
|
||||
size_t checkVectors(T* A, T* B, T* Out, size_t N, T (*F)(T a, T b), bool expectMatch = true,
|
||||
bool reportMismatch = true) {
|
||||
size_t mismatchCount = 0;
|
||||
size_t firstMismatch = 0;
|
||||
size_t mismatchesToPrint = 10;
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
T expected = F(A[i], B[i]);
|
||||
if (std::fabs(Out[i] - expected) > TOL) {
|
||||
if (mismatchCount == 0) {
|
||||
firstMismatch = i;
|
||||
}
|
||||
mismatchCount++;
|
||||
if ((mismatchCount <= mismatchesToPrint) && expectMatch) {
|
||||
INFO("Mismatch at " << i << " Computed: " << Out[i] << " Expeted: " << expected);
|
||||
CHECK(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (reportMismatch) {
|
||||
if (expectMatch) {
|
||||
if (mismatchCount) {
|
||||
INFO(mismatchCount << " Mismatches First Mismatch at index : " << firstMismatch);
|
||||
REQUIRE(false);
|
||||
}
|
||||
} else {
|
||||
if (mismatchCount == 0) {
|
||||
INFO("Expected Mismatch but not found any");
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mismatchCount;
|
||||
}
|
||||
template <typename T> // pointer type
|
||||
bool checkArray(T* hData, T* hOutputData, size_t width, size_t height, size_t depth = 1) {
|
||||
for (size_t i = 0; i < depth; i++) {
|
||||
for (size_t j = 0; j < height; j++) {
|
||||
for (size_t k = 0; k < width; k++) {
|
||||
int offset = i * width * height + j * width + k;
|
||||
if (!isEqual(hData[offset], hOutputData[offset])) {
|
||||
INFO("Mismatch at [" << i << "," << j << "," << k << "]:" << getString(hData[offset])
|
||||
<< "----" << getString(hOutputData[offset]));
|
||||
CHECK(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch = true,
|
||||
bool reportMismatch = true) {
|
||||
return checkVectors<T>(
|
||||
A_h, B_h, result_H, N, [](T a, T b) { return a + b; }, expectMatch, reportMismatch);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t checkVectorSUB(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch = true,
|
||||
bool reportMismatch = true) {
|
||||
return checkVectors<T>(
|
||||
A_h, B_h, result_H, N, [](T a, T b) { return a - b; }, expectMatch, reportMismatch);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void checkTest(T* expected_H, T* result_H, size_t N, bool expectMatch = true) {
|
||||
checkVectors<T>(
|
||||
expected_H, expected_H, result_H, N,
|
||||
[](T a, T b) {
|
||||
guarantee(a == b, "Both values should be equal");
|
||||
return a;
|
||||
},
|
||||
expectMatch);
|
||||
}
|
||||
|
||||
|
||||
// Setters and Memory Management
|
||||
|
||||
template <typename T> void setDefaultData(size_t numElements, T* A_h, T* B_h, T* C_h) {
|
||||
// Initialize the host data:
|
||||
|
||||
for (size_t i = 0; i < numElements; i++) {
|
||||
if (std::is_same<T, int>::value || std::is_same<T, unsigned int>::value) {
|
||||
if (A_h) A_h[i] = 3;
|
||||
if (B_h) B_h[i] = 4;
|
||||
if (C_h) C_h[i] = 5;
|
||||
} else if (std::is_same<T, char>::value || std::is_same<T, unsigned char>::value) {
|
||||
if (A_h) A_h[i] = 'a';
|
||||
if (B_h) B_h[i] = 'b';
|
||||
if (C_h) C_h[i] = 'c';
|
||||
} else {
|
||||
if (A_h) A_h[i] = 3.146f + i;
|
||||
if (B_h) B_h[i] = 1.618f + i;
|
||||
if (C_h) C_h[i] = 1.4f + i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool initArraysForHost(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIP_CHECK(hipHostMalloc((void**)A_h, Nbytes));
|
||||
}
|
||||
if (B_h) {
|
||||
HIP_CHECK(hipHostMalloc((void**)B_h, Nbytes));
|
||||
}
|
||||
if (C_h) {
|
||||
HIP_CHECK(hipHostMalloc((void**)C_h, Nbytes));
|
||||
}
|
||||
} else {
|
||||
if (A_h) {
|
||||
*A_h = (T*)malloc(Nbytes);
|
||||
REQUIRE(*A_h != nullptr);
|
||||
}
|
||||
|
||||
if (B_h) {
|
||||
*B_h = (T*)malloc(Nbytes);
|
||||
REQUIRE(*B_h != nullptr);
|
||||
}
|
||||
|
||||
if (C_h) {
|
||||
*C_h = (T*)malloc(Nbytes);
|
||||
REQUIRE(*C_h != nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
setDefaultData(N, A_h ? *A_h : nullptr, B_h ? *B_h : nullptr, C_h ? *C_h : nullptr);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool initArrays(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h, size_t N,
|
||||
bool usePinnedHost = false) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
if (A_d) {
|
||||
HIP_CHECK(hipMalloc(A_d, Nbytes));
|
||||
}
|
||||
if (B_d) {
|
||||
HIP_CHECK(hipMalloc(B_d, Nbytes));
|
||||
}
|
||||
if (C_d) {
|
||||
HIP_CHECK(hipMalloc(C_d, Nbytes));
|
||||
}
|
||||
|
||||
return initArraysForHost(A_h, B_h, C_h, N, usePinnedHost);
|
||||
}
|
||||
|
||||
// Threaded version of setDefaultData to be called from multi thread tests
|
||||
// Call HIP_CHECK_THREAD_FINALIZE after joining
|
||||
template <typename T> void setDefaultDataT(size_t numElements, T* A_h, T* B_h, T* C_h) {
|
||||
// Initialize the host data:
|
||||
|
||||
for (size_t i = 0; i < numElements; i++) {
|
||||
if (std::is_same<T, int>::value || std::is_same<T, unsigned int>::value) {
|
||||
if (A_h) A_h[i] = 3;
|
||||
if (B_h) B_h[i] = 4;
|
||||
if (C_h) C_h[i] = 5;
|
||||
} else if (std::is_same<T, char>::value || std::is_same<T, unsigned char>::value) {
|
||||
if (A_h) A_h[i] = 'a';
|
||||
if (B_h) B_h[i] = 'b';
|
||||
if (C_h) C_h[i] = 'c';
|
||||
} else {
|
||||
if (A_h) A_h[i] = 3.146f + i;
|
||||
if (B_h) B_h[i] = 1.618f + i;
|
||||
if (C_h) C_h[i] = 1.4f + i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Threaded version of initArraysForHost to be called from multi thread tests
|
||||
// Call HIP_CHECK_THREAD_FINALIZE after joining
|
||||
template <typename T>
|
||||
void initArraysForHostT(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIP_CHECK_THREAD(hipHostMalloc((void**)A_h, Nbytes));
|
||||
}
|
||||
if (B_h) {
|
||||
HIP_CHECK_THREAD(hipHostMalloc((void**)B_h, Nbytes));
|
||||
}
|
||||
if (C_h) {
|
||||
HIP_CHECK_THREAD(hipHostMalloc((void**)C_h, Nbytes));
|
||||
}
|
||||
} else {
|
||||
if (A_h) {
|
||||
*A_h = (T*)malloc(Nbytes);
|
||||
REQUIRE_THREAD(*A_h != nullptr);
|
||||
}
|
||||
|
||||
if (B_h) {
|
||||
*B_h = (T*)malloc(Nbytes);
|
||||
REQUIRE_THREAD(*B_h != nullptr);
|
||||
}
|
||||
|
||||
if (C_h) {
|
||||
*C_h = (T*)malloc(Nbytes);
|
||||
REQUIRE_THREAD(*C_h != nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
setDefaultDataT(N, A_h ? *A_h : nullptr, B_h ? *B_h : nullptr, C_h ? *C_h : nullptr);
|
||||
}
|
||||
|
||||
// Threaded version of initArrays to be called from multi thread tests
|
||||
// Call HIP_CHECK_THREAD_FINALIZE after joining
|
||||
template <typename T>
|
||||
void initArraysT(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h, size_t N,
|
||||
bool usePinnedHost = false) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
if (A_d) {
|
||||
HIP_CHECK_THREAD(hipMalloc(A_d, Nbytes));
|
||||
}
|
||||
if (B_d) {
|
||||
HIP_CHECK_THREAD(hipMalloc(B_d, Nbytes));
|
||||
}
|
||||
if (C_d) {
|
||||
HIP_CHECK_THREAD(hipMalloc(C_d, Nbytes));
|
||||
}
|
||||
|
||||
initArraysForHostT(A_h, B_h, C_h, N, usePinnedHost);
|
||||
}
|
||||
|
||||
// Threaded version of freeArraysForHost to be called from multi thread tests
|
||||
// Call HIP_CHECK_THREAD_FINALIZE after joining
|
||||
template <typename T> void freeArraysForHostT(T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIP_CHECK_THREAD(hipHostFree(A_h));
|
||||
}
|
||||
if (B_h) {
|
||||
HIP_CHECK_THREAD(hipHostFree(B_h));
|
||||
}
|
||||
if (C_h) {
|
||||
HIP_CHECK_THREAD(hipHostFree(C_h));
|
||||
}
|
||||
} else {
|
||||
if (A_h) {
|
||||
free(A_h);
|
||||
}
|
||||
if (B_h) {
|
||||
free(B_h);
|
||||
}
|
||||
if (C_h) {
|
||||
free(C_h);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> bool freeArraysForHost(T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIP_CHECK(hipHostFree(A_h));
|
||||
}
|
||||
if (B_h) {
|
||||
HIP_CHECK(hipHostFree(B_h));
|
||||
}
|
||||
if (C_h) {
|
||||
HIP_CHECK(hipHostFree(C_h));
|
||||
}
|
||||
} else {
|
||||
if (A_h) {
|
||||
free(A_h);
|
||||
}
|
||||
if (B_h) {
|
||||
free(B_h);
|
||||
}
|
||||
if (C_h) {
|
||||
free(C_h);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void freeArraysT(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
|
||||
if (A_d) {
|
||||
HIP_CHECK_THREAD(hipFree(A_d));
|
||||
}
|
||||
if (B_d) {
|
||||
HIP_CHECK_THREAD(hipFree(B_d));
|
||||
}
|
||||
if (C_d) {
|
||||
HIP_CHECK_THREAD(hipFree(C_d));
|
||||
}
|
||||
|
||||
freeArraysForHostT(A_h, B_h, C_h, usePinnedHost);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool freeArrays(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
|
||||
if (A_d) {
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
}
|
||||
if (B_d) {
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
}
|
||||
if (C_d) {
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
|
||||
return freeArraysForHost(A_h, B_h, C_h, usePinnedHost);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static bool assemblyFile_Verification(std::string assemfilename, std::string inst) {
|
||||
std::string filePath = "./catch/unit/deviceLib/";
|
||||
bool result = false;
|
||||
std::string filename;
|
||||
filename = filePath + assemfilename;
|
||||
std::ifstream file(filename.c_str(), std::ios::out);
|
||||
if (file) {
|
||||
std::string line;
|
||||
int line_pos = 0, start_pos = 0;
|
||||
int last_pos = 0;
|
||||
int start_match = 0;
|
||||
while (getline(file, line)) {
|
||||
line_pos++;
|
||||
if ((std::is_same<T, float>::value)) {
|
||||
if (!start_pos && std::regex_search(line, std::regex("Begin function (.*)AtomicCheck"))) {
|
||||
start_pos = line_pos;
|
||||
}
|
||||
if (!last_pos && std::regex_search(line, std::regex(".Lfunc_end0-(.*)AtomicCheck"))) {
|
||||
last_pos = line_pos;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if ((start_match != 2) &&
|
||||
std::regex_search(line, std::regex("Begin function (.*)AtomicCheck"))) {
|
||||
start_match++;
|
||||
if (start_match == 2) start_pos = line_pos;
|
||||
}
|
||||
if (!last_pos && std::regex_search(line, std::regex("func_end1-(.*)AtomicCheck"))) {
|
||||
last_pos = line_pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (start_pos) {
|
||||
result = std::regex_search(line, std::regex(inst));
|
||||
if (result) break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result = true;
|
||||
SUCCEED("Assembly file does not exist");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} // namespace HipTest
|
||||
@@ -0,0 +1,618 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#pragma clang diagnostic ignored "-Wsign-compare"
|
||||
#include "hip_test_context.hh"
|
||||
|
||||
#include <catch.hpp>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <mutex>
|
||||
#include <cstdlib>
|
||||
#include <thread>
|
||||
#include "hip_test_features.hh"
|
||||
|
||||
#ifdef TEST_CLOCK_CYCLE
|
||||
#define clock_function() clock64()
|
||||
#else
|
||||
#define clock_function() wall_clock64()
|
||||
#endif
|
||||
|
||||
#define HIP_PRINT_STATUS(status) INFO(hipGetErrorName(status) << " at line: " << __LINE__);
|
||||
|
||||
// Not thread-safe
|
||||
#define HIP_CHECK(error) \
|
||||
{ \
|
||||
hipError_t localError = error; \
|
||||
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
|
||||
INFO("Error: " << hipGetErrorString(localError) << "\n Code: " << localError \
|
||||
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
|
||||
<< "\n At line: " << __LINE__); \
|
||||
REQUIRE(false); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define HIP_CHECK_IGNORED_RETURN(error, ignoredError) \
|
||||
{ \
|
||||
hipError_t localError = error; \
|
||||
if ((localError == ignoredError)) { \
|
||||
INFO("Skipped: " << hipGetErrorString(localError) << "\n Code: " << localError \
|
||||
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
|
||||
<< "\n At line: " << __LINE__); \
|
||||
return; \
|
||||
} \
|
||||
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
|
||||
INFO("Error: " << hipGetErrorString(localError) << "\n Code: " << localError \
|
||||
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
|
||||
<< "\n At line: " << __LINE__); \
|
||||
REQUIRE(false); \
|
||||
} \
|
||||
}
|
||||
|
||||
// Threaded HIP_CHECKs
|
||||
#define HIP_CHECK_THREAD(error) \
|
||||
{ \
|
||||
/*To see if error has occured in previous threads, stop execution */ \
|
||||
if (TestContext::get().hasErrorOccured() == true) { \
|
||||
return; /*This will only work with std::thread and not with std::async*/ \
|
||||
} \
|
||||
auto localError = error; \
|
||||
HCResult result(__LINE__, __FILE__, localError, #error); \
|
||||
TestContext::get().addResults(result); \
|
||||
}
|
||||
|
||||
#define REQUIRE_THREAD(condition) \
|
||||
{ \
|
||||
/*To see if error has occured in previous threads, stop execution */ \
|
||||
if (TestContext::get().hasErrorOccured() == true) { \
|
||||
return; /*This will only work with std::thread and not with std::async*/ \
|
||||
} \
|
||||
auto localResult = (condition); \
|
||||
HCResult result(__LINE__, __FILE__, hipSuccess, #condition, localResult); \
|
||||
TestContext::get().addResults(result); \
|
||||
}
|
||||
|
||||
// Do not call before all threads have joined
|
||||
#define HIP_CHECK_THREAD_FINALIZE() \
|
||||
{ TestContext::get().finalizeResults(); }
|
||||
|
||||
|
||||
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
|
||||
#define HIP_CHECK_ERROR(errorExpr, expectedError) \
|
||||
{ \
|
||||
hipError_t localError = errorExpr; \
|
||||
INFO("Matching Errors: " \
|
||||
<< "\n Expected Error: " << hipGetErrorString(expectedError) \
|
||||
<< "\n Expected Code: " << expectedError << '\n' \
|
||||
<< " Actual Error: " << hipGetErrorString(localError) \
|
||||
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
|
||||
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
|
||||
REQUIRE(localError == expectedError); \
|
||||
}
|
||||
|
||||
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError or
|
||||
// expectedError1.
|
||||
#define HIP_CHECK_ERRORS(errorExpr, expectedError, expectedError1) \
|
||||
{ \
|
||||
hipError_t localError = errorExpr; \
|
||||
INFO("Matching Errors: " \
|
||||
<< "\n Expected Error: " << hipGetErrorString(expectedError) \
|
||||
<< "\n Expected Code: " << expectedError << " or " << expectedError << '\n' \
|
||||
<< " Actual Error: " << hipGetErrorString(localError) \
|
||||
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
|
||||
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
|
||||
REQUIRE((localError == expectedError || localError == expectedError1)); \
|
||||
}
|
||||
|
||||
// Not thread-safe
|
||||
#define HIPRTC_CHECK(error) \
|
||||
{ \
|
||||
auto localError = error; \
|
||||
if (localError != HIPRTC_SUCCESS) { \
|
||||
INFO("Error: " << hiprtcGetErrorString(localError) << "\n Code: " << localError \
|
||||
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
|
||||
<< "\n At line: " << __LINE__); \
|
||||
REQUIRE(false); \
|
||||
} \
|
||||
}
|
||||
|
||||
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
|
||||
#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \
|
||||
{ \
|
||||
auto localError = errorExpr; \
|
||||
INFO("Matching Errors: " \
|
||||
<< "\n Expected Error: " << hiprtcGetErrorString(expectedError) \
|
||||
<< "\n Expected Code: " << expectedError << '\n' \
|
||||
<< " Actual Error: " << hiprtcGetErrorString(localError) \
|
||||
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
|
||||
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
|
||||
REQUIRE(localError == expectedError); \
|
||||
}
|
||||
|
||||
// Although its assert, it will be evaluated at runtime
|
||||
#define HIP_ASSERT(x) \
|
||||
{ REQUIRE((x)); }
|
||||
|
||||
#define HIPCHECK(error) \
|
||||
{ \
|
||||
hipError_t localError = error; \
|
||||
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
|
||||
printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(localError), localError, \
|
||||
#error, __FILE__, __LINE__); \
|
||||
abort(); \
|
||||
} \
|
||||
}
|
||||
|
||||
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
|
||||
#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \
|
||||
{ \
|
||||
auto localError = errorExpr; \
|
||||
INFO("Matching Errors: " \
|
||||
<< "\n Expected Error: " << hiprtcGetErrorString(expectedError) \
|
||||
<< "\n Expected Code: " << expectedError << '\n' \
|
||||
<< " Actual Error: " << hiprtcGetErrorString(localError) \
|
||||
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
|
||||
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
|
||||
REQUIRE(localError == expectedError); \
|
||||
}
|
||||
|
||||
#define HIPASSERT(condition) \
|
||||
if (!(condition)) { \
|
||||
printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__); \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
#define CTX_CREATE() \
|
||||
hipCtx_t context; \
|
||||
initHipCtx(&context);
|
||||
#define CTX_DESTROY() HIPCHECK(hipCtxDestroy(context));
|
||||
#define ARRAY_DESTROY(array) HIPCHECK(hipArrayDestroy(array));
|
||||
#define HIP_TEX_REFERENCE hipTexRef
|
||||
#define HIP_ARRAY hipArray_t
|
||||
static void initHipCtx(hipCtx_t* pcontext) {
|
||||
HIPCHECK(hipInit(0));
|
||||
hipDevice_t device;
|
||||
HIPCHECK(hipDeviceGet(&device, 0));
|
||||
HIPCHECK(hipCtxCreate(pcontext, 0, device));
|
||||
}
|
||||
#else
|
||||
#define CTX_CREATE()
|
||||
#define CTX_DESTROY()
|
||||
#define ARRAY_DESTROY(array) HIPCHECK(hipFreeArray(array));
|
||||
#define HIP_TEX_REFERENCE textureReference*
|
||||
#define HIP_ARRAY hipArray_t
|
||||
#endif
|
||||
|
||||
static inline int getWarpSize() {
|
||||
#if HT_NVIDIA
|
||||
return 32;
|
||||
#elif HT_AMD
|
||||
int device = -1;
|
||||
int warpSize = -1;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize, device));
|
||||
return warpSize;
|
||||
#else
|
||||
std::cout<<"Have to be either Nvidia or AMD platform, asserting"<<std::endl;
|
||||
assert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool IsGfx11() {
|
||||
#if HT_NVIDIA
|
||||
return false;
|
||||
#elif HT_AMD
|
||||
int device = -1;
|
||||
hipDeviceProp_t props{};
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, device));
|
||||
// Get GCN Arch Name and compare to check if it is gfx11
|
||||
std::string arch = std::string(props.gcnArchName);
|
||||
auto pos = arch.find("gfx11");
|
||||
if (pos != std::string::npos)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
#else
|
||||
std::cout << "Have to be either Nvidia or AMD platform, asserting" << std::endl;
|
||||
assert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Utility Functions
|
||||
namespace HipTest {
|
||||
static inline int getDeviceCount() {
|
||||
int dev = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&dev));
|
||||
return dev;
|
||||
}
|
||||
|
||||
// Returns the current system time in microseconds
|
||||
static inline long long get_time() {
|
||||
return std::chrono::high_resolution_clock::now().time_since_epoch() /
|
||||
std::chrono::microseconds(1);
|
||||
}
|
||||
|
||||
static inline double elapsed_time(long long startTimeUs, long long stopTimeUs) {
|
||||
return ((double)(stopTimeUs - startTimeUs)) / ((double)(1000));
|
||||
}
|
||||
|
||||
static inline unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) {
|
||||
int device{0};
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
hipDeviceProp_t props{};
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, device));
|
||||
|
||||
unsigned blocks = props.multiProcessorCount * blocksPerCU;
|
||||
if (blocks * threadsPerBlock < N) {
|
||||
blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
// Threaded version of setNumBlocks - to be used in multi threaded test
|
||||
// Why? because catch2 does not support multithreaded macro calls
|
||||
// Make sure you call HIP_CHECK_THREAD_FINALIZE after your threads join
|
||||
// Also you can not return in threaded functions, due to how HIP_CHECK_THREAD works
|
||||
static inline void setNumBlocksThread(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N,
|
||||
unsigned& blocks) {
|
||||
int device{0};
|
||||
blocks = 0; // incase error has occured in some other thread and the next call might not execute,
|
||||
// we set the blocks size to 0
|
||||
HIP_CHECK_THREAD(hipGetDevice(&device));
|
||||
hipDeviceProp_t props{};
|
||||
HIP_CHECK_THREAD(hipGetDeviceProperties(&props, device));
|
||||
|
||||
blocks = props.multiProcessorCount * blocksPerCU;
|
||||
if (blocks * threadsPerBlock > N) {
|
||||
blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int RAND_R(unsigned* rand_seed) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
srand(*rand_seed);
|
||||
return rand();
|
||||
#else
|
||||
return rand_r(rand_seed);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool isImageSupported() {
|
||||
int imageSupport = 1;
|
||||
#if HT_AMD
|
||||
int device;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
HIPCHECK(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport, device));
|
||||
#endif
|
||||
return imageSupport != 0;
|
||||
}
|
||||
|
||||
inline bool isPcieAtomicsSupported() {
|
||||
int pcieAtomics = 1;
|
||||
int device;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
HIPCHECK(hipDeviceGetAttribute(&pcieAtomics, hipDeviceAttributeHostNativeAtomicSupported,
|
||||
device));
|
||||
return pcieAtomics != 0;
|
||||
}
|
||||
|
||||
inline bool isP2PSupported(int& d1, int& d2) {
|
||||
int num_devices = HipTest::getDeviceCount();
|
||||
int supported = 1;
|
||||
for (auto i = 0u; i < num_devices; ++i) {
|
||||
int canAccess = 0;
|
||||
for (auto j = 0u; j < num_devices; ++j) {
|
||||
if (i != j) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&canAccess, i, j));
|
||||
if (!canAccess) {
|
||||
supported = 0;
|
||||
d1 = i;
|
||||
d2 = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return supported;
|
||||
}
|
||||
|
||||
inline bool areWarpMatchFunctionsSupported() {
|
||||
int matchFunctionsSupported = 1;
|
||||
#if HT_NVIDIA
|
||||
int device;
|
||||
hipDeviceProp_t prop;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
HIP_CHECK(hipGetDeviceProperties(&prop, device));
|
||||
if (prop.major < 7) {
|
||||
matchFunctionsSupported = 0;
|
||||
}
|
||||
#endif
|
||||
return matchFunctionsSupported != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Causes the test to stop and be skipped at runtime.
|
||||
* reason: Message describing the reason the test has been skipped.
|
||||
*/
|
||||
static inline void HIP_SKIP_TEST(char const* const reason) noexcept {
|
||||
// ctest is setup to parse for "HIP_SKIP_THIS_TEST", at which point it will skip the test.
|
||||
std::cout << "Skipping test. Reason: " << reason << '\n' << "HIP_SKIP_THIS_TEST" << std::endl;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Helper template that returns the expected arguments of a kernel.
|
||||
*
|
||||
* @return constexpr std::tuple<FArgs...> the expected arguments of the kernel.
|
||||
*/
|
||||
template <typename... FArgs> std::tuple<FArgs...> getExpectedArgs(void(FArgs...)){};
|
||||
|
||||
/**
|
||||
* @brief Asserts that the types of the arguments of a function match exactly with the types in the
|
||||
* function signature.
|
||||
* This is necessary because HIP RTC does not do implicit casting of the kernel
|
||||
* parameters.
|
||||
* In order to get the kernel function signature, this function should only called when
|
||||
* RTC is disabled.
|
||||
*
|
||||
* @tparam F the kernel function
|
||||
* @tparam Args the parameters that will be passed to the kernel.
|
||||
*/
|
||||
template <typename F, typename... Args> void validateArguments(F f, Args...) {
|
||||
using expectedArgsTuple = decltype(getExpectedArgs(f));
|
||||
static_assert(std::is_same<expectedArgsTuple, std::tuple<Args...>>::value,
|
||||
"Kernel arguments types must match exactly!");
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Launch a kernel using either HIP or HIP RTC.
|
||||
*
|
||||
* @tparam Typenames A list of typenames used by the kernel (unused if the kernel is not a
|
||||
* template).
|
||||
* @tparam K The kernel type. Expects a function or template when RTC is disabled. Expects a
|
||||
* function pointer instead when RTC is enabled.
|
||||
* @tparam Dim Can be either dim3 or int.
|
||||
* @tparam Args A list of kernel arguments to be forwarded.
|
||||
* @param kernel The kernel to be launched (defined in kernels.hh)
|
||||
* @param numBlocks
|
||||
* @param numThreads
|
||||
* @param memPerBlock
|
||||
* @param stream
|
||||
* @param packedArgs A list of kernel arguments to be forwarded.
|
||||
*/
|
||||
template <typename... Typenames, typename K, typename Dim, typename... Args>
|
||||
void launchKernel(K kernel, Dim numBlocks, Dim numThreads, std::uint32_t memPerBlock,
|
||||
hipStream_t stream, Args&&... packedArgs) {
|
||||
#ifndef RTC_TESTING
|
||||
validateArguments(kernel, packedArgs...);
|
||||
kernel<<<numBlocks, numThreads, memPerBlock, stream>>>(std::forward<Args>(packedArgs)...);
|
||||
#else
|
||||
launchRTCKernel<Typenames...>(kernel, numBlocks, numThreads, memPerBlock, stream,
|
||||
std::forward<Args>(packedArgs)...);
|
||||
#endif
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
//---
|
||||
struct Pinned {
|
||||
static const bool isPinned = true;
|
||||
static const char* str() { return "Pinned"; };
|
||||
|
||||
static void* Alloc(size_t sizeBytes) {
|
||||
void* p;
|
||||
HIPCHECK(hipHostMalloc((void**)&p, sizeBytes));
|
||||
return p;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
struct Unpinned {
|
||||
static const bool isPinned = false;
|
||||
static const char* str() { return "Unpinned"; };
|
||||
|
||||
static void* Alloc(size_t sizeBytes) {
|
||||
void* p = malloc(sizeBytes);
|
||||
HIPASSERT(p);
|
||||
return p;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct Memcpy {
|
||||
static const char* str() { return "Memcpy"; };
|
||||
};
|
||||
|
||||
struct MemcpyAsync {
|
||||
static const char* str() { return "MemcpyAsync"; };
|
||||
};
|
||||
|
||||
|
||||
template <typename C> struct MemTraits;
|
||||
|
||||
|
||||
template <> struct MemTraits<Memcpy> {
|
||||
static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
hipStream_t stream) {
|
||||
(void)stream;
|
||||
HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <> struct MemTraits<MemcpyAsync> {
|
||||
static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
hipStream_t stream) {
|
||||
HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream));
|
||||
}
|
||||
};
|
||||
|
||||
class BlockingContext {
|
||||
std::atomic_bool blocked{true};
|
||||
hipStream_t stream;
|
||||
|
||||
public:
|
||||
BlockingContext(hipStream_t s) : blocked(true), stream(s) {}
|
||||
|
||||
BlockingContext(const BlockingContext& in) {
|
||||
blocked = in.blocked_val();
|
||||
stream = in.stream_val();
|
||||
}
|
||||
|
||||
BlockingContext(const BlockingContext&& in) {
|
||||
blocked = in.blocked_val();
|
||||
stream = in.stream_val();
|
||||
}
|
||||
|
||||
void reset() { blocked = true; }
|
||||
|
||||
BlockingContext& operator=(const BlockingContext& in) {
|
||||
blocked = in.blocked_val();
|
||||
stream = in.stream_val();
|
||||
return *this;
|
||||
}
|
||||
|
||||
void block_stream() {
|
||||
blocked = true;
|
||||
auto blocking_callback = [](hipStream_t, hipError_t, void* data) {
|
||||
auto blocked = reinterpret_cast<std::atomic_bool*>(data);
|
||||
while (blocked->load()) {
|
||||
// Yield this thread till we are waiting
|
||||
std::this_thread::yield();
|
||||
}
|
||||
};
|
||||
HIP_CHECK(hipStreamAddCallback(stream, blocking_callback, (void*)&blocked, 0));
|
||||
}
|
||||
|
||||
void unblock_stream() {
|
||||
blocked = false;
|
||||
}
|
||||
|
||||
bool is_blocked() const { return hipStreamQuery(stream) == hipErrorNotReady; }
|
||||
|
||||
bool blocked_val() const { return blocked.load(); }
|
||||
hipStream_t stream_val() const { return stream; }
|
||||
};
|
||||
} // namespace HipTest
|
||||
|
||||
// This must be called in the beginning of image test app's main() to indicate whether image
|
||||
// is supported.
|
||||
#define CHECK_IMAGE_SUPPORT \
|
||||
if (!HipTest::isImageSupported()) { \
|
||||
INFO("Texture is not support on the device. Skipped."); \
|
||||
return; \
|
||||
}
|
||||
|
||||
// This must be called in host-device memory conherency tests
|
||||
#define CHECK_PCIE_ATOMICS_SUPPORT \
|
||||
if (!HipTest::isPcieAtomicsSupported()) { \
|
||||
INFO("Pcie atomics is not support on the device. Skipped."); \
|
||||
return; \
|
||||
}
|
||||
|
||||
#define CHECK_P2P_SUPPORT \
|
||||
int d1, d2; \
|
||||
if (!HipTest::isP2PSupported(d1,d2)) { \
|
||||
std::string msg = "P2P access check failed between dev1:" + std::to_string(d1) + ",dev2:" + \
|
||||
std::to_string(d2); \
|
||||
HipTest::HIP_SKIP_TEST(msg.c_str()); \
|
||||
return; \
|
||||
} \
|
||||
// This must be called in the beginning of warp test app's main() to indicate warp match functions
|
||||
// are supported.
|
||||
#define CHECK_WARP_MATCH_FUNCTIONS_SUPPORT \
|
||||
if (!HipTest::areWarpMatchFunctionsSupported()) { \
|
||||
INFO("Warp Match Functions are not support on the device. Skipped."); \
|
||||
return; \
|
||||
}
|
||||
|
||||
// Call GENERATE_CAPTURE macro at the start of the test, before using BEGIN/END_CAPTURE.
|
||||
// Use BEGIN/END_CAPTURE macros to execute APIs in both stream capturing and non-capturing modes.
|
||||
// Place BEGIN_CAPTURE before the API call and END_CAPTURE after the call.
|
||||
#define GENERATE_CAPTURE() bool capture = GENERATE(true, false);
|
||||
|
||||
#define BEGIN_CAPTURE(stream) \
|
||||
if (capture && stream != nullptr) { \
|
||||
hipStreamCaptureMode flags = GENERATE( \
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); \
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, flags)); \
|
||||
}
|
||||
|
||||
#define END_CAPTURE(stream) \
|
||||
if (capture && stream != nullptr) { \
|
||||
hipGraph_t graph = nullptr; \
|
||||
hipGraphExec_t graph_exec = nullptr; \
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph)); \
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); \
|
||||
HIP_CHECK(hipGraphLaunch(graph_exec, stream)); \
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec)); \
|
||||
HIP_CHECK(hipGraphDestroy(graph)); \
|
||||
}
|
||||
|
||||
// These macros are used for testing behaviour when sync APIs are being captured. Before
|
||||
// calling BEGIN_CAPTURE_SYNC, hipError_t variable (capture_err) should be initialized to hipSuccess
|
||||
// and passed to this macro. The scenario with using this macro should look like this:
|
||||
// 1. BEGIN_CAPTURE_SYNC(capture_err)
|
||||
// 2. HIP_CHECK_ERROR(SyncAPI, capture_err)
|
||||
// 3. END_CAPTURE_SYNC(capture_err)
|
||||
// Some sync APIs are allowed in relaxed capture mode which is indicated with
|
||||
// rlx_mode_allowed variable. For other two modes, those APIs return
|
||||
// hipErrorStreamCaptureUnsupported. These macros shouldn't be used with hipStreamSync and
|
||||
// hipDeviceSync during capture.
|
||||
#define BEGIN_CAPTURE_SYNC(capture_err, rlx_mode_allowed) \
|
||||
hipStream_t stream; \
|
||||
GENERATE_CAPTURE(); \
|
||||
if (capture) { \
|
||||
HIP_CHECK(hipStreamCreate(&stream)); \
|
||||
hipStreamCaptureMode mode = GENERATE( \
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); \
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, mode)); \
|
||||
if (!rlx_mode_allowed) { \
|
||||
capture_err = hipErrorStreamCaptureImplicit; \
|
||||
} else if (mode != hipStreamCaptureModeRelaxed) { \
|
||||
capture_err = hipErrorStreamCaptureUnsupported; \
|
||||
} \
|
||||
}
|
||||
|
||||
// If test has other HIP API calls that depend on sync call that is captured and fails, the rest of
|
||||
// the test (except freeing the memory) should be skipped after calling END_CAPTURE_SYNC() by
|
||||
// testing if previously created hipError_t variable (capture_err) doesn't equal hipSuccess.
|
||||
#define END_CAPTURE_SYNC(capture_err) \
|
||||
if (capture) { \
|
||||
hipGraph_t graph; \
|
||||
hipError_t stream_err = hipSuccess; \
|
||||
if (capture_err != hipSuccess) { \
|
||||
stream_err = hipErrorStreamCaptureInvalidated; \
|
||||
} \
|
||||
HIP_CHECK_ERROR(hipStreamEndCapture(stream, &graph), stream_err); \
|
||||
if (graph != nullptr) { \
|
||||
HIP_CHECK(hipGraphDestroy(graph)); \
|
||||
} \
|
||||
HIP_CHECK(hipStreamDestroy(stream)); \
|
||||
}
|
||||
@@ -0,0 +1,197 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hiprtc.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
|
||||
// OS Check
|
||||
#if defined(_WIN32)
|
||||
#define HT_WIN 1
|
||||
#define HT_LINUX 0
|
||||
#elif defined(__linux__)
|
||||
#define HT_WIN 0
|
||||
#define HT_LINUX 1
|
||||
#else
|
||||
#error "OS not recognized"
|
||||
#endif
|
||||
|
||||
// Platform check
|
||||
#if defined(__HIP_PLATFORM_AMD__)
|
||||
#define HT_AMD 1
|
||||
#define HT_NVIDIA 0
|
||||
#elif defined(__HIP_PLATFORM_NVIDIA__)
|
||||
#define HT_AMD 0
|
||||
#define HT_NVIDIA 1
|
||||
#else
|
||||
#error "Platform not recognized"
|
||||
#endif
|
||||
|
||||
typedef struct Config_ {
|
||||
std::vector<std::string> json_files; // Json files
|
||||
std::string platform; // amd/nvidia
|
||||
std::string os; // windows/linux
|
||||
} Config;
|
||||
|
||||
// Store Multi threaded results
|
||||
struct HCResult {
|
||||
size_t line; // Line of check (HIP_CHECK_THREAD or REQUIRE_THREAD)
|
||||
std::string file; // File name of the check
|
||||
hipError_t result; // hipResult for HIP_CHECK_THREAD, for conditions its hipSuccess
|
||||
std::string call; // Call of HIP API or a bool condition
|
||||
bool conditionsResult; // If bool condition, result of call. For HIP Calls its true
|
||||
HCResult(size_t l, std::string f, hipError_t r, std::string c, bool b = true)
|
||||
: line(l), file(f), result(r), call(c), conditionsResult(b) {}
|
||||
};
|
||||
|
||||
|
||||
class TestContext {
|
||||
bool p_windows = false, p_linux = false; // OS
|
||||
bool amd = false, nvidia = false; // HIP Platform
|
||||
std::string exe_path;
|
||||
std::string current_test;
|
||||
std::set<std::string> skip_test;
|
||||
std::string json_file_;
|
||||
std::vector<std::string> platform_list_ = {"amd", "nvidia"};
|
||||
std::vector<std::string> os_list_ = {"windows", "linux", "all"};
|
||||
std::vector<std::string> amd_arch_list_ = {};
|
||||
|
||||
struct rtcState {
|
||||
hipModule_t module;
|
||||
hipFunction_t kernelFunction;
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, rtcState> compiledKernels{};
|
||||
|
||||
Config config_;
|
||||
std::string& getCommonJsonFile();
|
||||
std::string substringFound(std::vector<std::string> list, std::string filename);
|
||||
void detectOS();
|
||||
void detectPlatform();
|
||||
void getConfigFiles();
|
||||
void setExePath(int, char**);
|
||||
void parseOptions(int, char**);
|
||||
bool parseJsonFiles();
|
||||
std::string getMatchingConfigFile(std::string config_dir);
|
||||
std::string getCurrentArch();
|
||||
const Config& getConfig() const { return config_; }
|
||||
|
||||
|
||||
TestContext(int argc, char** argv);
|
||||
|
||||
// Multi threaded checks helpers
|
||||
std::mutex resultMutex;
|
||||
std::vector<HCResult> results; // Multi threaded test results buffer
|
||||
std::atomic<bool> hasErrorOccured_{false};
|
||||
|
||||
public:
|
||||
static TestContext& get(int argc = 0, char** argv = nullptr) {
|
||||
static TestContext instance(argc, argv);
|
||||
return instance;
|
||||
}
|
||||
|
||||
static std::string getEnvVar(std::string var) {
|
||||
#if defined(_WIN32)
|
||||
constexpr rsize_t MAX_LEN = 4096;
|
||||
char dstBuf[MAX_LEN];
|
||||
size_t dstSize;
|
||||
if (!::getenv_s(&dstSize, dstBuf, MAX_LEN, var.c_str())) {
|
||||
return std::string(dstBuf);
|
||||
}
|
||||
#elif defined(__linux__)
|
||||
char* val = std::getenv(var.c_str());
|
||||
if (val != NULL) {
|
||||
return std::string(val);
|
||||
}
|
||||
#else
|
||||
#error "OS not recognized"
|
||||
#endif
|
||||
return std::string("");
|
||||
}
|
||||
|
||||
|
||||
bool isWindows() const;
|
||||
bool isLinux() const;
|
||||
bool isNvidia() const;
|
||||
bool isAmd() const;
|
||||
bool skipTest() const;
|
||||
|
||||
const std::string& getCurrentTest() const { return current_test; }
|
||||
std::string currentPath() const;
|
||||
|
||||
// Multi threaded results helpers
|
||||
void addResults(HCResult r); // Add multi threaded results
|
||||
void finalizeResults(); // Validate on all results
|
||||
bool hasErrorOccured(); // Query if error has occured
|
||||
|
||||
/**
|
||||
* @brief Unload all loaded modules.
|
||||
* Note: This function needs to be called at the end of each test that uses RTC.
|
||||
* It is not possible to unload the loaded modules without adding explicit code to the end
|
||||
* of each test. This function exists only to provide a clean way to exit a test when using RTC.
|
||||
* However, not unloading a module explicitly shouldn't have any effect on the outcome of
|
||||
* the test.
|
||||
*/
|
||||
void cleanContext();
|
||||
|
||||
/**
|
||||
* @brief Keeps track of all the already compiled rtc kernels.
|
||||
*
|
||||
* @param kernelNameExpression The name expression (e.g. hipTest::vectorADD<float>).
|
||||
* @param loadedModule The loaded module.
|
||||
* @param kernelFunction The hipFunction that will be used to run the kernel in the future.
|
||||
*/
|
||||
void trackRtcState(std::string kernelNameExpression, hipModule_t loadedModule,
|
||||
hipFunction_t kernelFunction);
|
||||
|
||||
/**
|
||||
* @brief Get the already compiled hip rtc kernel function if it exists.
|
||||
*
|
||||
* @param kernelNameExpression The name expression (e.g. hipTest::vectorADD<float>).
|
||||
* @return the hipFunction if it exists. nullptr otherwise
|
||||
*/
|
||||
hipFunction_t getFunction(const std::string kernelNameExpression);
|
||||
|
||||
TestContext(const TestContext&) = delete;
|
||||
void operator=(const TestContext&) = delete;
|
||||
|
||||
~TestContext();
|
||||
};
|
||||
|
||||
static bool _log_enable = (!TestContext::getEnvVar("HT_LOG_ENABLE").empty() ? true : false);
|
||||
|
||||
// printing logs
|
||||
#define LogPrintf(format, ...) \
|
||||
{ \
|
||||
if(_log_enable) { \
|
||||
printf(format, __VA_ARGS__); \
|
||||
printf("%c", '\n'); \
|
||||
} \
|
||||
}
|
||||
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
// Test groups are named based on the group names from hip_api_runtime.h, with adding "Test" suffix
|
||||
|
||||
/**
|
||||
* @defgroup CallbackTest Callback Activity APIs
|
||||
* @{
|
||||
* This section describes tests for the callback/Activity of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup ContextTest Context Management
|
||||
* @{
|
||||
* This section describes tests for the context management functions of HIP runtime API.
|
||||
* @warning All Context Management APIs are **deprecated** and shall not be implemented.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup AtomicsTest Device Atomics
|
||||
* @{
|
||||
* This section describes tests for the Device Atomic APIs.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup DeviceLanguageTest Device Language
|
||||
* @{
|
||||
* This section describes tests for the Device Language API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup DeviceTest Device Management
|
||||
* @{
|
||||
* This section describes tests for device management functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup DriverTest Initialization and Version
|
||||
* @{
|
||||
* This section describes tests for the initialization and version functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup DynamicLoadingTest Kernel Loading Management
|
||||
* @{
|
||||
* This section describes the different kernel launch approaches.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup ErrorTest Error Handling
|
||||
* @{
|
||||
* This section describes tests for the error handling functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup EventTest Event Management
|
||||
* @{
|
||||
* This section describes tests for the event management functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup ExecutionTest Execution Control
|
||||
* @{
|
||||
* This section describes tests for the execution control functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup GraphTest Graph Management
|
||||
* @{
|
||||
* This section describes tests for the graph management types & functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup KernelTest Kernel Functions Management
|
||||
* @{
|
||||
* This section describes the various kernel functions invocation.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup SyncthreadsTest Synchronization Functions
|
||||
* @{
|
||||
* This section describes tests for Synchronization Functions.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup ThreadfenceTest Memory Fence Functions
|
||||
* @{
|
||||
* This section describes tests for Memory Fence Functions.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup MemoryTest memory Management APIs
|
||||
* @{
|
||||
* This section describes the memory management types & functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup PeerToPeerTest PeerToPeer Device Memory Access
|
||||
* @{
|
||||
* This section describes tests for the PeerToPeer device memory access functions of HIP runtime
|
||||
* API.
|
||||
* @warning PeerToPeer support is experimental.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup PerformanceTest Performance tests
|
||||
* @{
|
||||
* This section describes performance tests for the target API groups and use-cases.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup ShflTest warp shuffle function Management
|
||||
* @{
|
||||
* This section describes the warp shuffle types & functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup p2pTest P2P Management
|
||||
* @{
|
||||
* This section describes the P2P management types & functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup StreamOTest Ordered Memory Allocator
|
||||
* @{
|
||||
* This section describes the tests for Stream Ordered Memory Allocator functions of HIP runtime
|
||||
* API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup StreamTest Stream Management
|
||||
* @{
|
||||
* This section describes the stream management types & functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup ModuleTest Module Management
|
||||
* @{
|
||||
* This section describes the module management types & functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup TextureTest Texture Management
|
||||
* @{
|
||||
* This section describes tests for the texture management functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup VectorTypeTest Vector types
|
||||
* @{
|
||||
* This section describes tests for the Vector type functions and operators.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup MathTest Math Device Functions
|
||||
* @{
|
||||
* This section describes tests for device math functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup PrintfTest Printf API Management
|
||||
* @{
|
||||
* This section describes the various Printf use case Scenarios.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup SurfaceTest Surface Management
|
||||
* @{
|
||||
* This section describes tests for the surface management functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup ComplexTest Complex type
|
||||
* @{
|
||||
* This section describes tests for the Complex type functions.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup VirtualMemoryManagementTest Virtual Memory Management APIs
|
||||
* @{
|
||||
* This section describes the virtual memory management types & functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup ModuleTest Module Functions Management
|
||||
* @{
|
||||
* This section describes the loading of modules from code object files and invocation of different kernels.
|
||||
* @}
|
||||
*/
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <assert.h>
|
||||
#include <unordered_set>
|
||||
|
||||
// Catch Test Features
|
||||
typedef enum CTFeatures {
|
||||
CT_FEATURE_FINEGRAIN_HWSUPPORT = 0x0, // FINEGRAIN Supported Hardware.
|
||||
CT_FEATURE_HMM = 0x1, // HMM Enabled
|
||||
CT_FEATURE_TEXTURES_NOT_SUPPORTED = 0x2, // Textures not supported
|
||||
CT_FEATURE_LAST = 0x3
|
||||
} CTFeatures;
|
||||
|
||||
bool CheckIfFeatSupported(enum CTFeatures test_feat, std::string gcn_arch);
|
||||
bool getGenericTarget(const std::string& agentTarget, std::string& genericTarget);
|
||||
bool isGenericTargetSupported(char* gcnArchName = nullptr, int deviceId = 0);
|
||||
@@ -0,0 +1,89 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
// We haven't checked which filesystem to include yet
|
||||
#ifndef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL
|
||||
// Check for feature test macro for <filesystem>
|
||||
#if defined(__cpp_lib_filesystem)
|
||||
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 0
|
||||
// Check for feature test macro for <experimental/filesystem>
|
||||
#elif defined(__cpp_lib_experimental_filesystem)
|
||||
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 1
|
||||
// We can't check if headers exist...
|
||||
// Let's assume experimental to be safe
|
||||
#elif !defined(__has_include)
|
||||
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 1
|
||||
// Check if the header "<filesystem>" exists
|
||||
#elif __has_include(<filesystem>)
|
||||
// If we're compiling on Visual Studio and are not compiling with C++17,
|
||||
// we need to use experimental
|
||||
#ifdef _MSC_VER
|
||||
// Check and include header that defines "_HAS_CXX17"
|
||||
#if __has_include(<yvals_core.h>)
|
||||
#include <yvals_core.h>
|
||||
|
||||
// Check for enabled C++17 support
|
||||
#if defined(_HAS_CXX17) && _HAS_CXX17
|
||||
// We're using C++17, so let's use the normal version
|
||||
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 0
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// If the marco isn't defined yet, that means any of the other
|
||||
// VS specific checks failed, so we need to use experimental
|
||||
#ifndef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL
|
||||
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 1
|
||||
#endif
|
||||
|
||||
// Not on Visual Studio. Let's use the normal version
|
||||
#else // #ifdef _MSC_VER
|
||||
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 0
|
||||
#endif
|
||||
|
||||
// Check if the header "<filesystem>" exists
|
||||
#elif __has_include(<experimental/filesystem>)
|
||||
#define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 1
|
||||
|
||||
// Fail if neither header is available with a nice error message
|
||||
#else
|
||||
#error Could not find system header "<filesystem>" ||
|
||||
"<experimental/filesystem>"
|
||||
#endif
|
||||
|
||||
// We priously determined that we need the exprimental version
|
||||
#if INCLUDE_STD_FILESYSTEM_EXPERIMENTAL
|
||||
// Include it
|
||||
#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING 1;
|
||||
#include <experimental/filesystem>
|
||||
// We need the alias from std::experimental::filesystem to std::filesystem
|
||||
namespace fs = std::experimental::filesystem;
|
||||
// We have a decent compiler and can use the normal version
|
||||
#else
|
||||
// Include it
|
||||
#include <filesystem>
|
||||
namespace fs = std::filesystem;
|
||||
#endif
|
||||
|
||||
#endif // #ifndef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "hip_test_common.hh"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/sysinfo.h>
|
||||
#else
|
||||
#include <windows.h>
|
||||
#include <sysinfoapi.h>
|
||||
#endif
|
||||
|
||||
namespace HipTest {
|
||||
static inline int getGeviceCount() {
|
||||
int dev = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&dev));
|
||||
return dev;
|
||||
}
|
||||
|
||||
// Get Free Memory from the system
|
||||
static inline size_t getMemoryAmount() {
|
||||
#ifdef __linux__
|
||||
struct sysinfo info{};
|
||||
sysinfo(&info);
|
||||
return info.freeram / (1024 * 1024); // MB
|
||||
#elif defined(_WIN32)
|
||||
MEMORYSTATUSEX statex;
|
||||
statex.dwLength = sizeof(statex);
|
||||
GlobalMemoryStatusEx(&statex);
|
||||
return (statex.ullAvailPhys / (1024 * 1024)); // MB
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline size_t getHostThreadCount(const size_t memPerThread, const size_t maxThreads) {
|
||||
if (memPerThread == 0) return 0;
|
||||
auto memAmount = getMemoryAmount();
|
||||
const auto processor_count = std::thread::hardware_concurrency();
|
||||
if (processor_count == 0 || memAmount == 0) return 0;
|
||||
size_t thread_count = 0;
|
||||
if ((processor_count * memPerThread) < memAmount)
|
||||
thread_count = processor_count;
|
||||
else
|
||||
thread_count = reinterpret_cast<size_t>(memAmount / memPerThread);
|
||||
if (maxThreads > 0) {
|
||||
return (thread_count > maxThreads) ? maxThreads : thread_count;
|
||||
}
|
||||
return thread_count;
|
||||
}
|
||||
|
||||
} // namespace HipTest
|
||||
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
namespace HipTest {
|
||||
template <typename T> __global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < NELEM; i += stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> __global__ void vectorSUB(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < NELEM; i += stride) {
|
||||
C_d[i] = A_d[i] - B_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void vectorADDReverse(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T> __global__ void addCount(const T* A_d, T* C_d, size_t NELEM, int count) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
// Deliberately do this in an inefficient way to increase kernel runtime
|
||||
for (int i = 0; i < count; i++) {
|
||||
for (size_t i = offset; i < NELEM; i += stride) {
|
||||
C_d[i] = A_d[i] + (T)count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
// Deliberately do this in an inefficient way to increase kernel runtime
|
||||
for (int i = 0; i < count; i++) {
|
||||
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
|
||||
C_d[i] = A_d[i] + (T)count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> __global__ void memsetReverse(T* C_d, T val, int64_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
|
||||
C_d[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> __global__ void vector_square(const T* A_d, T* C_d, size_t N_ELMTS) {
|
||||
size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = gputhread; i < N_ELMTS; i += stride) {
|
||||
C_d[i] = A_d[i] * A_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> __global__ void vector_cubic(const T* A_d, T* C_d, size_t N_ELMTS) {
|
||||
size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = gputhread; i < N_ELMTS; i += stride) {
|
||||
C_d[i] = A_d[i] * A_d[i] * A_d[i];
|
||||
}
|
||||
}
|
||||
} // namespace HipTest
|
||||
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_filesystem.hh"
|
||||
|
||||
#include <string>
|
||||
#include <array>
|
||||
#include <cstdlib>
|
||||
#include <random>
|
||||
#include <fstream>
|
||||
#include <streambuf>
|
||||
#include <thread>
|
||||
#include <future>
|
||||
|
||||
namespace hip {
|
||||
/*
|
||||
Class to spawn a process in isolation and test its standard output and return status
|
||||
Good for printf tests and environment variable tests
|
||||
|
||||
How to use:
|
||||
Have the stand alone exe in the same folder
|
||||
Init a class using hip::SpawnProc proc("ExeName", yes_or_no_to_capture_output);
|
||||
proc.run("Optional command line args");
|
||||
*/
|
||||
class SpawnProc {
|
||||
std::string exeName;
|
||||
std::string resultStr;
|
||||
std::string tmpFileName;
|
||||
std::future<int> ret_from_run;
|
||||
bool captureOutput;
|
||||
|
||||
std::string getRandomString(size_t len = 6) {
|
||||
std::random_device dev;
|
||||
std::mt19937 rng(dev());
|
||||
std::uniform_int_distribution<std::mt19937::result_type> dist(0, 25);
|
||||
|
||||
std::string res;
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
res += 'a' + dist(rng);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public:
|
||||
SpawnProc(std::string exeName_, bool captureOutput_ = false)
|
||||
: exeName(exeName_), captureOutput(captureOutput_) {
|
||||
auto dir = fs::path(TestContext::get().currentPath());
|
||||
dir /= exeName;
|
||||
exeName = dir.string();
|
||||
// On Windows, fs::exists returns false without extension.
|
||||
if (TestContext::get().isWindows()) {
|
||||
if (fs::path(exeName).extension().empty()) {
|
||||
exeName += ".exe";
|
||||
}
|
||||
}
|
||||
INFO("Testing that exe exists: " << exeName);
|
||||
REQUIRE(fs::exists(exeName));
|
||||
|
||||
if (captureOutput) {
|
||||
auto path = fs::temp_directory_path();
|
||||
path /= getRandomString();
|
||||
tmpFileName = path.string();
|
||||
INFO("Testing that capture file does not exist already: " << tmpFileName);
|
||||
REQUIRE(!fs::exists(tmpFileName));
|
||||
}
|
||||
if (TestContext::get().isWindows()) {
|
||||
exeName = (exeName.find(" ", 0) == std::string::npos) ? exeName : ("\"" + exeName + "\"");
|
||||
tmpFileName = (tmpFileName.find(" ", 0) == std::string::npos) ? tmpFileName : ("\"" + tmpFileName + "\"");
|
||||
}
|
||||
}
|
||||
|
||||
int run(std::string commandLineArgs = "") {
|
||||
std::string execCmd = exeName;
|
||||
|
||||
// Append command line args
|
||||
if (commandLineArgs.size() > 0) {
|
||||
execCmd += " "; // Add space for command line args
|
||||
execCmd += commandLineArgs;
|
||||
}
|
||||
|
||||
if (captureOutput) {
|
||||
execCmd += " > ";
|
||||
execCmd += tmpFileName;
|
||||
}
|
||||
if (TestContext::get().isWindows()) {
|
||||
execCmd = (execCmd.find(" ", 0) == std::string::npos) ? execCmd : ("\"" + execCmd + "\"");
|
||||
}
|
||||
auto res = std::system(execCmd.c_str());
|
||||
|
||||
if (captureOutput) {
|
||||
std::ifstream t(tmpFileName.c_str());
|
||||
resultStr =
|
||||
std::string((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
|
||||
t.close();
|
||||
}
|
||||
#if HT_LINUX
|
||||
return WEXITSTATUS(res);
|
||||
#else
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
void run_async(std::string commandLineArgs = "") {
|
||||
ret_from_run = std::async(std::launch::async, &hip::SpawnProc::run, this, commandLineArgs);
|
||||
}
|
||||
|
||||
int wait() {
|
||||
ret_from_run.wait();
|
||||
return ret_from_run.get();
|
||||
}
|
||||
|
||||
std::string getOutput() { return resultStr; }
|
||||
};
|
||||
} // namespace hip
|
||||
@@ -0,0 +1,280 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hiprtc.h>
|
||||
#include <kernel_mapping.hh>
|
||||
#include <catch.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <set>
|
||||
#include <mutex>
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include "hip_test_context.hh"
|
||||
|
||||
#define STRINGIFY(x) #x
|
||||
namespace HipTest {
|
||||
|
||||
struct KernelArgument {
|
||||
const void* ptr;
|
||||
size_t sizeRequirement;
|
||||
size_t alignmentRequirement;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Reconstructs the name expression for the kernel.
|
||||
*
|
||||
* @param kernelName the name of the kernel (e.g. "HipTest::VectorADD")
|
||||
* @param typenames the typenames used by this kernel (e.g. "float").
|
||||
* @return std::string the reconstructed expression (e.g. "VectorADD<float>""). Returns kernelName
|
||||
* instead if the kernel is not a template.
|
||||
*/
|
||||
inline std::string reconstructExpression(std::string& kernelName,
|
||||
std::vector<std::string>& typenames) {
|
||||
std::string kernelExpression = kernelName;
|
||||
if (typenames.size() > 0) {
|
||||
kernelExpression += "<" + typenames[0];
|
||||
for (size_t i = 1; i < typenames.size(); ++i) {
|
||||
kernelExpression += "," + typenames[i];
|
||||
}
|
||||
kernelExpression += ">";
|
||||
}
|
||||
|
||||
return kernelExpression;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Packs the kernel arguments into the format expected by hipModuleLaunchKernel
|
||||
*
|
||||
* @param args list of arguments for the kernel and their alignemnt requirements.
|
||||
* @return std::vector<char> the packed arguments ready to be passed on to hipModuleLaunchKernel
|
||||
*/
|
||||
inline std::vector<char> alignArguments(std::vector<KernelArgument>& args) {
|
||||
std::vector<char> alignedArguments{};
|
||||
int count = 0;
|
||||
for (auto& arg : args) {
|
||||
const char* argPtr{reinterpret_cast<const char*>(arg.ptr)};
|
||||
|
||||
int paddingNeeded = (arg.alignmentRequirement - 1) & (~count + 1);
|
||||
alignedArguments.insert(std::end(alignedArguments), paddingNeeded, 0);
|
||||
count += paddingNeeded;
|
||||
|
||||
alignedArguments.insert(std::end(alignedArguments), argPtr, argPtr + arg.sizeRequirement);
|
||||
count += arg.sizeRequirement;
|
||||
}
|
||||
return alignedArguments;
|
||||
}
|
||||
|
||||
inline std::vector<char> getKernelCode(hiprtcProgram& rtcProgram) {
|
||||
size_t codeSize;
|
||||
REQUIRE(HIPRTC_SUCCESS == hiprtcGetCodeSize(rtcProgram, &codeSize));
|
||||
|
||||
std::vector<char> code(codeSize);
|
||||
REQUIRE(HIPRTC_SUCCESS == hiprtcGetCode(rtcProgram, code.data()));
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compiles a kernel using HIP RTC
|
||||
*
|
||||
* @param rtcKernel the name of the kernel to compile.
|
||||
* @param kernelNameExpression the name expression to be added to the RTC program (e.g.
|
||||
* HipTest::VectorADD<float>)
|
||||
* @return hiprtcProgram the compiled rtc program.
|
||||
*/
|
||||
inline hiprtcProgram compileRTC(std::string& rtcKernel, std::string& kernelNameExpression) {
|
||||
std::string fileName = mapKernelToFileName.at(rtcKernel);
|
||||
std::string filePath{STRINGIFY(KERNELS_PATH) + fileName};
|
||||
|
||||
INFO("Opening Kernel File: " << filePath);
|
||||
std::ifstream kernelFile{filePath};
|
||||
REQUIRE(kernelFile.is_open());
|
||||
|
||||
std::stringstream stringStream;
|
||||
std::string line;
|
||||
while (getline(kernelFile, line)) {
|
||||
/* Skip the include directive since it is not part of the kernel */
|
||||
if (line.find("#include") != std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
stringStream << line << '\n';
|
||||
}
|
||||
kernelFile.close();
|
||||
|
||||
std::string kernelCode{stringStream.str()};
|
||||
INFO("RTC Kernel Code:\n" << kernelCode)
|
||||
|
||||
hiprtcProgram rtcProgram;
|
||||
hiprtcCreateProgram(&rtcProgram, kernelCode.c_str(), (fileName + ".cu").c_str(), 0, nullptr,
|
||||
nullptr);
|
||||
|
||||
std::vector<const char*> options{};
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
|
||||
int deviceCount;
|
||||
REQUIRE(hipSuccess == hipGetDeviceCount(&deviceCount));
|
||||
|
||||
std::set<std::string> architectures{};
|
||||
for (int i = 0; i < deviceCount; ++i) {
|
||||
hipDeviceProp_t props;
|
||||
REQUIRE(hipSuccess == hipGetDeviceProperties(&props, i));
|
||||
architectures.insert(std::string{"--gpu-architecture="} + props.gcnArchName);
|
||||
}
|
||||
|
||||
for (auto& architecture : architectures) {
|
||||
options.push_back(architecture.c_str());
|
||||
}
|
||||
#else
|
||||
options.push_back("--fmad=false");
|
||||
#endif
|
||||
|
||||
REQUIRE(HIPRTC_SUCCESS == hiprtcAddNameExpression(rtcProgram, kernelNameExpression.c_str()));
|
||||
REQUIRE(HIPRTC_SUCCESS == hiprtcCompileProgram(rtcProgram, 1, options.data()));
|
||||
|
||||
return rtcProgram;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get a typename as a string
|
||||
*
|
||||
* @tparam T The typename
|
||||
* @return std::string the string representation of T
|
||||
*/
|
||||
template <typename T> std::string getTypeName() {
|
||||
std::string name, prefix, suffix;
|
||||
|
||||
|
||||
#ifdef __clang__
|
||||
name = __PRETTY_FUNCTION__;
|
||||
prefix = "std::string HipTest::getTypeName() [T = ";
|
||||
suffix = "]";
|
||||
#elif defined(__GNUC__)
|
||||
name = __PRETTY_FUNCTION__;
|
||||
prefix = "std::string HipTest::getTypeName() [with T = ";
|
||||
suffix = "; std::string = std::__cxx11::basic_string<char>]";
|
||||
#elif defined(_MSC_VER)
|
||||
name = __FUNCSIG__;
|
||||
prefix = "std::string __cdecl HipTest::getTypeName<";
|
||||
suffix = ">(void)";
|
||||
#endif
|
||||
|
||||
return name.substr(prefix.size(), name.rfind(suffix) - prefix.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Tells the user that the kernels are using HIP RTC. Prints only once per test.
|
||||
*
|
||||
*/
|
||||
static inline void printInfo() {
|
||||
static bool alreadyPrinted{false};
|
||||
|
||||
if (!alreadyPrinted) {
|
||||
std::cout << "INFO: This test is running using HIP RTC to compile and run the kernels."
|
||||
<< std::endl;
|
||||
alreadyPrinted = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compiles and launches a kernel using HIP RTC
|
||||
*
|
||||
* @tparam Typenames A list of typenames used by the kernel (unused if the kernel is not a
|
||||
* template).
|
||||
* @tparam Args A list of kernel arguments to be forwarded.
|
||||
* @param getKernelName A function wrapper that returns the name of the kernel to launch (check
|
||||
* kernels.hh for more info)
|
||||
* @param numBlocks
|
||||
* @param numThreads
|
||||
* @param memPerBlock
|
||||
* @param stream
|
||||
* @param packedArgs A list of kernel arguments to be forwarded.
|
||||
*/
|
||||
template <typename... Typenames, typename... Args>
|
||||
void launchRTCKernel(std::string (*getKernelName)(), dim3 numBlocks, dim3 numThreads,
|
||||
std::uint32_t memPerBlock, hipStream_t stream, Args&&... packedArgs) {
|
||||
printInfo();
|
||||
TestContext& testContext = TestContext::get();
|
||||
std::string kernelName = (*getKernelName)();
|
||||
|
||||
std::vector<std::string> kernelTypenames{std::string(HipTest::getTypeName<Typenames>())...};
|
||||
std::string kernelExpression = reconstructExpression(kernelName, kernelTypenames);
|
||||
|
||||
static std::mutex mutex{};
|
||||
{
|
||||
std::lock_guard<std::mutex> lockGuard(mutex);
|
||||
if (testContext.getFunction(kernelExpression) == nullptr) {
|
||||
hiprtcProgram rtcProgram{compileRTC(kernelName, kernelExpression)};
|
||||
std::vector<char> compiledCode{getKernelCode(rtcProgram)};
|
||||
|
||||
hipModule_t module;
|
||||
|
||||
REQUIRE(hipSuccess == hipModuleLoadData(&module, compiledCode.data()));
|
||||
|
||||
hipFunction_t kernelFunction;
|
||||
|
||||
const char* loweredName;
|
||||
REQUIRE(HIPRTC_SUCCESS ==
|
||||
hiprtcGetLoweredName(rtcProgram, kernelExpression.c_str(), &loweredName));
|
||||
REQUIRE(hipSuccess == hipModuleGetFunction(&kernelFunction, module, loweredName));
|
||||
|
||||
/* After obtaining the kernelFunction, the program is no longer needed. So it can be destroyed */
|
||||
REQUIRE(HIPRTC_SUCCESS == hiprtcDestroyProgram(&rtcProgram));
|
||||
|
||||
testContext.trackRtcState(kernelExpression, module, kernelFunction);
|
||||
}
|
||||
}
|
||||
|
||||
hipFunction_t kernelFunction = testContext.getFunction(kernelExpression);
|
||||
|
||||
std::vector<KernelArgument> args = {
|
||||
{reinterpret_cast<const void*>(&packedArgs), sizeof(Args), alignof(Args)}...};
|
||||
|
||||
std::vector<char> alignedArguments{alignArguments(args)};
|
||||
size_t argumentsSize{alignedArguments.size()};
|
||||
|
||||
void* config_array[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, alignedArguments.data(),
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, reinterpret_cast<void*>(&argumentsSize),
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
REQUIRE(hipSuccess ==
|
||||
hipModuleLaunchKernel(kernelFunction, numBlocks.x, numBlocks.y, numBlocks.z, numThreads.x,
|
||||
numThreads.y, numThreads.z, memPerBlock, stream, nullptr,
|
||||
config_array));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Template overload for when numBlocks and numThreads is an integer.
|
||||
*
|
||||
*/
|
||||
template <typename... Typenames, typename... Args>
|
||||
void launchRTCKernel(std::string kernelName, int numBlocks, int numThreads,
|
||||
std::uint32_t memPerBlock, hipStream_t stream, Args&&... packedArgs) {
|
||||
launchRTCKernel<Typenames...>(kernelName, dim3(numBlocks), dim3(numThreads), memPerBlock, stream,
|
||||
std::forward<Args>(packedArgs)...);
|
||||
}
|
||||
|
||||
} // namespace HipTest
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/**
|
||||
* @brief Error codes retured by rocm_smi_lib functions
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful
|
||||
RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid
|
||||
RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or
|
||||
//!< action is not available for the
|
||||
//!< given input, on the given system
|
||||
RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This
|
||||
//!< may because the operation is not
|
||||
//!< supported by the Linux kernel
|
||||
//!< version running on the executing
|
||||
//!< machine
|
||||
RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file
|
||||
//!< error. Many functions require
|
||||
//!< root access to run.
|
||||
RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other
|
||||
//!< resource
|
||||
RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught
|
||||
RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of
|
||||
//!< allowable or safe range
|
||||
RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi
|
||||
//!< initializing internal data
|
||||
//!< structures
|
||||
RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
|
||||
RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not
|
||||
//!< yet been implemented in the
|
||||
//!< current system for the current
|
||||
//!< devices
|
||||
RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not
|
||||
//!< found
|
||||
RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were
|
||||
//!< available for the operation
|
||||
RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during
|
||||
//!< execution of function
|
||||
RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data
|
||||
//!< was read
|
||||
RSMI_STATUS_NO_DATA, //!< No data was found for a given
|
||||
//!< input
|
||||
RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to
|
||||
//!< function is not what was expected
|
||||
RSMI_STATUS_BUSY, //!< A resource or mutex could not be
|
||||
//!< acquired because it is already
|
||||
//!< being used
|
||||
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
|
||||
//!< exceeded INT32_MAX
|
||||
|
||||
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
|
||||
} rsmi_status_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Types of memory
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_MEM_TYPE_FIRST = 0,
|
||||
|
||||
RSMI_MEM_TYPE_VRAM = RSMI_MEM_TYPE_FIRST, //!< VRAM memory
|
||||
RSMI_MEM_TYPE_VIS_VRAM, //!< VRAM memory that is visible
|
||||
RSMI_MEM_TYPE_GTT, //!< GTT memory
|
||||
|
||||
RSMI_MEM_TYPE_LAST = RSMI_MEM_TYPE_GTT
|
||||
} rsmi_memory_type_t;
|
||||
@@ -0,0 +1,376 @@
|
||||
#pragma once
|
||||
#include <math.h>
|
||||
|
||||
#define HIP_SAMPLING_VERIFY_EPSILON 0.00001
|
||||
// The internal precision varies by the GPU family and sometimes within the family.
|
||||
// Thus the following threshold is subject to change.
|
||||
#define HIP_SAMPLING_VERIFY_RELATIVE_THRESHOLD 0.05 // 5% for filter mode
|
||||
#define HIP_SAMPLING_VERIFY_ABSOLUTE_THRESHOLD 0.1
|
||||
|
||||
#if HT_NVIDIA
|
||||
typedef unsigned char uchar;
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, T>::type
|
||||
inline __host__ __device__ operator+(const T &a, const T &b)
|
||||
{
|
||||
return {a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, T>::type
|
||||
inline __host__ __device__ operator-(const T &a, const T &b)
|
||||
{
|
||||
return {a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, bool>::type
|
||||
inline __host__ __device__ operator==(const T &a, const T &b)
|
||||
{
|
||||
return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, T>::type
|
||||
inline __host__ __device__ operator*(const decltype(T::x) &a, const T &b)
|
||||
{
|
||||
return {a * b.x, a * b.y, a * b.z, a * b.w};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, void>::type
|
||||
inline __host__ __device__ operator*=(T &a, const decltype(T::x) &b)
|
||||
{
|
||||
a.x *= b;
|
||||
a.y *= b;
|
||||
a.z *= b;
|
||||
a.w *= b;
|
||||
}
|
||||
#endif // HT_NVIDIA
|
||||
|
||||
template <typename T> struct mipmapLevelArray {
|
||||
T* data; // level array data
|
||||
hipExtent e; // level array size
|
||||
};
|
||||
|
||||
// From CIE 1931 color space to sRGB
|
||||
inline float hipSRGBMap(float fc) {
|
||||
double c = static_cast<double>(fc);
|
||||
|
||||
#if !defined(_WIN32)
|
||||
if (std::isnan(c))
|
||||
c = 0.0;
|
||||
#else
|
||||
if (_isnan(c)) c = 0.0;
|
||||
#endif
|
||||
|
||||
if (c > 1.0)
|
||||
c = 1.0;
|
||||
else if (c < 0.0)
|
||||
c = 0.0;
|
||||
else if (c < 0.0031308)
|
||||
c = 12.92 * c;
|
||||
else
|
||||
c = 1.055 * pow(c, 5.0 / 12.0) - 0.055;
|
||||
|
||||
return static_cast<float>(c);
|
||||
}
|
||||
|
||||
// From sRGB to CIE 1931 color space
|
||||
inline float hipSRGBUnmap(float fc) {
|
||||
double c = static_cast<double>(fc);
|
||||
|
||||
if (c <= 0.04045)
|
||||
c = c / 12.92;
|
||||
else
|
||||
c = pow((c + 0.055) / 1.055, 2.4);
|
||||
|
||||
return static_cast<float>(c);
|
||||
}
|
||||
|
||||
inline float4 hipSRGBMap(float4 fc) {
|
||||
fc.x = hipSRGBMap(fc.x);
|
||||
fc.y = hipSRGBMap(fc.y);
|
||||
fc.z = hipSRGBMap(fc.z);
|
||||
// Alpha channel will keep unchanged
|
||||
return fc;
|
||||
}
|
||||
|
||||
inline float4 hipSRGBUnmap(float4 fc) {
|
||||
fc.x = hipSRGBUnmap(fc.x);
|
||||
fc.y = hipSRGBUnmap(fc.y);
|
||||
fc.z = hipSRGBUnmap(fc.z);
|
||||
// Alpha channel will keep unchanged
|
||||
return fc;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<std::is_scalar<T>::value == true, double>::type
|
||||
hipFabs(const T &t) {
|
||||
return fabs(t);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 1, double>::type
|
||||
hipFabs(const T &t) {
|
||||
return fabs(t.x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 2, double>::type
|
||||
hipFabs(const T &t) {
|
||||
double x = static_cast<double>(t.x);
|
||||
double y = static_cast<double>(t.y);
|
||||
double s = x * x + y * y;
|
||||
return sqrt(s);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 3, double>::type
|
||||
hipFabs(const T &t) {
|
||||
double x = static_cast<double>(t.x);
|
||||
double y = static_cast<double>(t.y);
|
||||
double z = static_cast<double>(t.z);
|
||||
double s = x * x + y * y + z * z;
|
||||
return sqrt(s);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, double>::type
|
||||
hipFabs(const T &t) {
|
||||
double x = static_cast<double>(t.x);
|
||||
double y = static_cast<double>(t.y);
|
||||
double z = static_cast<double>(t.z);
|
||||
double w = static_cast<double>(t.w);
|
||||
double s = x * x + y * y + z * z + w * w;
|
||||
return sqrt(s);
|
||||
}
|
||||
|
||||
template<typename T, hipTextureFilterMode fMode = hipFilterModePoint, bool sRGB = false>
|
||||
bool hipTextureSamplingVerify(const T &outputData, const T &expected) {
|
||||
bool testResult = false;
|
||||
if (fMode == hipFilterModePoint && !sRGB) {
|
||||
testResult = outputData == expected;
|
||||
} else {
|
||||
double mean = (hipFabs(outputData) + hipFabs(expected)) / 2;
|
||||
double diff = hipFabs(outputData - expected);
|
||||
double ratio = diff / (mean + HIP_SAMPLING_VERIFY_EPSILON);
|
||||
if (ratio <= HIP_SAMPLING_VERIFY_RELATIVE_THRESHOLD) {
|
||||
testResult = true;
|
||||
} else if (diff <= HIP_SAMPLING_VERIFY_ABSOLUTE_THRESHOLD) {
|
||||
// Some small outputs have big ratio due to float operation difference of ALU and GPU
|
||||
testResult = true;
|
||||
}
|
||||
}
|
||||
return testResult;
|
||||
}
|
||||
|
||||
// Simulate CTS static AddressingTable sAddressingTable
|
||||
template<hipTextureAddressMode addressMode>
|
||||
void hipTextureGetAddress(int &value, const int maxValue)
|
||||
{
|
||||
switch(addressMode)
|
||||
{
|
||||
case hipAddressModeClamp:
|
||||
value = value < 0 ? 0
|
||||
: (value > maxValue - 1 ? maxValue - 1 : value);
|
||||
break;
|
||||
case hipAddressModeBorder:
|
||||
value = value < -1 ? -1
|
||||
: (value > maxValue ? maxValue : value);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Simulate logics in CTS read_image_pixel_float().
|
||||
// x, y and z must be returned by hipTextureGetAddress()
|
||||
template<typename T, hipTextureAddressMode addressMode, bool sRGB = false>
|
||||
T hipTextureGetValue(const T *data, const int x, const int width,
|
||||
const int y = 0, const int height = 0, const int z = 0, const int depth = 0) {
|
||||
T result;
|
||||
memset(&result, 0, sizeof(result));
|
||||
switch (addressMode) {
|
||||
case hipAddressModeClamp:
|
||||
if (width > 0) {
|
||||
if (height == 0 && depth == 0) {
|
||||
result = data[x]; // 1D
|
||||
} else if (depth == 0) {
|
||||
result = data[y * width + x]; // 2D
|
||||
} else {
|
||||
result = data[z * width * height + y * width + x]; // 3D
|
||||
}
|
||||
}
|
||||
break;
|
||||
case hipAddressModeBorder:
|
||||
if (width > 0) {
|
||||
if (height == 0 && depth == 0) {
|
||||
if (x >= 0 && x < width)
|
||||
result = data[x]; // 1D
|
||||
} else if (depth == 0) {
|
||||
if (x >= 0 && x < width && y >= 0 && y < height)
|
||||
result = data[y * width + x]; // 2D
|
||||
} else {
|
||||
if (x >= 0 && x < width && y >= 0 && y < height && z >= 0 && z < depth)
|
||||
result = data[z * width * height + y * width + x]; // 3D
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if constexpr (sRGB && std::is_same<T, float4>::value) {
|
||||
result = hipSRGBUnmap(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T, hipTextureAddressMode addressMode, hipTextureFilterMode filterMode, bool sRGB = false>
|
||||
T getExpectedValue(const int width, float x, const T *data) {
|
||||
T result;
|
||||
memset(&result, 0, sizeof(result));
|
||||
switch (filterMode) {
|
||||
case hipFilterModePoint: {
|
||||
int i1 = static_cast<int>(floor(x));
|
||||
hipTextureGetAddress < addressMode > (i1, width);
|
||||
result = hipTextureGetValue < T, addressMode, sRGB > (data, i1, width);
|
||||
}
|
||||
break;
|
||||
case hipFilterModeLinear: {
|
||||
x -= 0.5;
|
||||
int i1 = static_cast<int>(floor(x));
|
||||
int i2 = i1 + 1;
|
||||
float a = x - i1;
|
||||
hipTextureGetAddress < addressMode > (i1, width);
|
||||
hipTextureGetAddress < addressMode > (i2, width);
|
||||
|
||||
T t1 = hipTextureGetValue < T, addressMode, sRGB> (data, i1, width);
|
||||
T t2 = hipTextureGetValue < T, addressMode, sRGB > (data, i2, width);
|
||||
|
||||
return (1 - a) * t1 + a * t2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T, hipTextureAddressMode addressMode, hipTextureFilterMode filterMode, bool sRGB = false>
|
||||
T getExpectedValue(const int width, const int height, float x, float y, const T *data) {
|
||||
T result;
|
||||
memset(&result, 0, sizeof(result));
|
||||
switch (filterMode) {
|
||||
case hipFilterModePoint: {
|
||||
int i1 = static_cast<int>(floor(x));
|
||||
int j1 = static_cast<int>(floor(y));
|
||||
hipTextureGetAddress < addressMode > (i1, width);
|
||||
hipTextureGetAddress < addressMode > (j1, height);
|
||||
result = hipTextureGetValue < T, addressMode, sRGB > (data, i1, width, j1, height);
|
||||
}
|
||||
break;
|
||||
case hipFilterModeLinear: {
|
||||
x -= 0.5;
|
||||
y -= 0.5;
|
||||
|
||||
int i1 = static_cast<int>(floor(x));
|
||||
int j1 = static_cast<int>(floor(y));
|
||||
|
||||
int i2 = i1 + 1;
|
||||
int j2 = j1 + 1;
|
||||
|
||||
float a = x - i1;
|
||||
float b = y - j1;
|
||||
|
||||
hipTextureGetAddress < addressMode > (i1, width);
|
||||
hipTextureGetAddress < addressMode > (i2, width);
|
||||
hipTextureGetAddress < addressMode > (j1, height);
|
||||
hipTextureGetAddress < addressMode > (j2, height);
|
||||
|
||||
T t11 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i1, width, j1, height);
|
||||
T t21 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i2, width, j1, height);
|
||||
T t12 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i1, width, j2, height);
|
||||
T t22 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i2, width, j2, height);
|
||||
|
||||
result = (1 - a) * (1 - b) * t11 + a * (1 - b) * t21 + (1 - a) * b * t12
|
||||
+ a * b * t22;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<class T, hipTextureAddressMode addressMode, hipTextureFilterMode filterMode, bool sRGB = false>
|
||||
T getExpectedValue(const int width, const int height, const int depth,
|
||||
float x, float y, float z, const T *data) {
|
||||
T result;
|
||||
memset(&result, 0, sizeof(result));
|
||||
switch (filterMode) {
|
||||
case hipFilterModePoint: {
|
||||
int i1 = static_cast<int>(floor(x));
|
||||
int j1 = static_cast<int>(floor(y));
|
||||
int k1 = static_cast<int>(floor(z));
|
||||
|
||||
hipTextureGetAddress < addressMode > (i1, width);
|
||||
hipTextureGetAddress < addressMode > (j1, height);
|
||||
hipTextureGetAddress < addressMode > (k1, depth);
|
||||
|
||||
result = hipTextureGetValue < T, addressMode, sRGB > (data, i1, width, j1, height, k1, depth);
|
||||
}
|
||||
break;
|
||||
case hipFilterModeLinear: {
|
||||
x -= 0.5;
|
||||
y -= 0.5;
|
||||
z -= 0.5;
|
||||
|
||||
int i1 = static_cast<int>(floor(x));
|
||||
int j1 = static_cast<int>(floor(y));
|
||||
int k1 = static_cast<int>(floor(z));
|
||||
|
||||
int i2 = i1 + 1;
|
||||
int j2 = j1 + 1;
|
||||
int k2 = k1 + 1;
|
||||
|
||||
float a = x - i1;
|
||||
float b = y - j1;
|
||||
float c = z - k1;
|
||||
|
||||
hipTextureGetAddress < addressMode > (i1, width);
|
||||
hipTextureGetAddress < addressMode > (i2, width);
|
||||
hipTextureGetAddress < addressMode > (j1, height);
|
||||
hipTextureGetAddress < addressMode > (j2, height);
|
||||
hipTextureGetAddress < addressMode > (k1, depth);
|
||||
hipTextureGetAddress < addressMode > (k2, depth);
|
||||
|
||||
T t111 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i1, width, j1, height, k1, depth);
|
||||
T t211 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i2, width, j1, height, k1, depth);
|
||||
T t121 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i1, width, j2, height, k1, depth);
|
||||
T t112 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i1, width, j1, height, k2, depth);
|
||||
T t122 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i1, width, j2, height, k2, depth);
|
||||
T t212 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i2, width, j1, height, k2, depth);
|
||||
T t221 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i2, width, j2, height, k1, depth);
|
||||
T t222 = hipTextureGetValue < T, addressMode, sRGB
|
||||
> (data, i2, width, j2, height, k2, depth);
|
||||
|
||||
result =
|
||||
(1 - a) * (1 - b) * (1 - c) * t111 + a * (1 - b) * (1 - c) * t211 +
|
||||
(1 - a) * b * (1 - c) * t121 + a * b * (1 - c) * t221 +
|
||||
(1 - a) * (1 - b) * c * t112 + a * (1 - b) * c * t212 +
|
||||
(1 - a) * b * c * t122 + a * b * c * t222;
|
||||
|
||||
}
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
const std::map<std::string, std::string> mapKernelToFileName{
|
||||
{"Set", "Set.cpp"},
|
||||
{"HipTest::vectorADD", "vectorADD.inl"},
|
||||
};
|
||||
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <map>
|
||||
|
||||
#ifndef RTC_TESTING
|
||||
|
||||
__global__ void Set(int* Ad, int val);
|
||||
|
||||
/* Kernel Templates */
|
||||
#include "vectorADD.inl"
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* Wrapper Macros that create a string representation of the kernel name.
|
||||
* In the case of kernel templates, a variadic template is used to ensure compatibility with
|
||||
* the launchKernel template when RTC is not enabled. If the kernel is inside a namespace, use the
|
||||
* "_NS" version of the Macro.
|
||||
*/
|
||||
#define FUNCTION_WRAPPER(param) \
|
||||
std::string param() { return #param; }
|
||||
#define TEMPLATE_WRAPPER(param) \
|
||||
template <typename...> std::string param() { return #param; }
|
||||
#define FUNCTION_WRAPPER_NS(param, namespace) \
|
||||
std::string param() { return #namespace "::" #param; }
|
||||
#define TEMPLATE_WRAPPER_NS(param, namespace) \
|
||||
template <typename...> std::string param() { return #namespace "::" #param; }
|
||||
|
||||
FUNCTION_WRAPPER(Set);
|
||||
|
||||
namespace HipTest {
|
||||
TEMPLATE_WRAPPER_NS(vectorADD, HipTest);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,329 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip_test_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
static inline unsigned int GenerateLinearAllocationFlagCombinations(
|
||||
const LinearAllocs allocation_type) {
|
||||
switch (allocation_type) {
|
||||
case LinearAllocs::hipHostMalloc:
|
||||
return GENERATE(hipHostMallocDefault, hipHostMallocPortable, hipHostMallocMapped,
|
||||
hipHostMallocWriteCombined);
|
||||
case LinearAllocs::mallocAndRegister:
|
||||
case LinearAllocs::hipMallocManaged:
|
||||
case LinearAllocs::malloc:
|
||||
case LinearAllocs::hipMalloc:
|
||||
return 0u;
|
||||
default:
|
||||
assert("Invalid LinearAllocs enumerator");
|
||||
throw std::invalid_argument("Invalid LinearAllocs enumerator");
|
||||
}
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void MemcpyDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
using LA = LinearAllocs;
|
||||
const auto allocation_size = GENERATE(kPageSize / 2, kPageSize, kPageSize * 2);
|
||||
const auto host_allocation_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
const auto host_allocation_flags = GenerateLinearAllocationFlagCombinations(host_allocation_type);
|
||||
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, allocation_size,
|
||||
host_allocation_flags);
|
||||
LinearAllocGuard<int> device_allocation(LA::hipMalloc, allocation_size);
|
||||
|
||||
const auto element_count = allocation_size / sizeof(*device_allocation.ptr());
|
||||
constexpr auto thread_count = 1024;
|
||||
const auto block_count = element_count / thread_count + 1;
|
||||
constexpr int expected_value = 42;
|
||||
VectorSet<<<block_count, thread_count>>>(device_allocation.ptr(), expected_value, element_count);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(memcpy_func(host_allocation.host_ptr(), device_allocation.ptr(), allocation_size));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
ArrayFindIfNot(host_allocation.host_ptr(), expected_value, element_count);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void MemcpyHostToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
using LA = LinearAllocs;
|
||||
const auto allocation_size = GENERATE(kPageSize / 2, kPageSize, kPageSize * 2);
|
||||
const auto host_allocation_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
const auto host_allocation_flags = GenerateLinearAllocationFlagCombinations(host_allocation_type);
|
||||
|
||||
LinearAllocGuard<int> src_host_allocation(host_allocation_type, allocation_size,
|
||||
host_allocation_flags);
|
||||
LinearAllocGuard<int> dst_host_allocation(LA::hipHostMalloc, allocation_size);
|
||||
LinearAllocGuard<int> device_allocation(LA::hipMalloc, allocation_size);
|
||||
|
||||
const auto element_count = allocation_size / sizeof(*device_allocation.ptr());
|
||||
constexpr int fill_value = 42;
|
||||
std::fill_n(src_host_allocation.host_ptr(), element_count, fill_value);
|
||||
std::fill_n(dst_host_allocation.host_ptr(), element_count, 0);
|
||||
|
||||
HIP_CHECK(memcpy_func(device_allocation.ptr(), src_host_allocation.host_ptr(), allocation_size));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(dst_host_allocation.host_ptr(), device_allocation.ptr(), allocation_size,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
ArrayFindIfNot(dst_host_allocation.host_ptr(), fill_value, element_count);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void MemcpyHostToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
using LA = LinearAllocs;
|
||||
const auto allocation_size = GENERATE(kPageSize / 2, kPageSize, kPageSize * 2);
|
||||
const auto src_allocation_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
const auto dst_allocation_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
const auto src_allocation_flags = GenerateLinearAllocationFlagCombinations(src_allocation_type);
|
||||
const auto dst_allocation_flags = GenerateLinearAllocationFlagCombinations(dst_allocation_type);
|
||||
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, allocation_size, src_allocation_flags);
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, allocation_size, dst_allocation_flags);
|
||||
|
||||
const auto element_count = allocation_size / sizeof(*src_allocation.host_ptr());
|
||||
constexpr auto expected_value = 42;
|
||||
std::fill_n(src_allocation.host_ptr(), element_count, expected_value);
|
||||
|
||||
HIP_CHECK(memcpy_func(dst_allocation.host_ptr(), src_allocation.host_ptr(), allocation_size));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
ArrayFindIfNot(dst_allocation.host_ptr(), expected_value, element_count);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, bool enable_peer_access, typename F>
|
||||
void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto allocation_size = GENERATE(kPageSize / 2, kPageSize, kPageSize * 2);
|
||||
const auto device_count = HipTest::getDeviceCount();
|
||||
const auto src_device = GENERATE_COPY(range(0, device_count));
|
||||
const auto dst_device = GENERATE_COPY(range(0, device_count));
|
||||
|
||||
INFO("Src device: " << src_device << ", Dst device: " << dst_device);
|
||||
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
if constexpr (enable_peer_access) {
|
||||
if (src_device == dst_device) {
|
||||
return;
|
||||
}
|
||||
int can_access_peer = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
|
||||
if (!can_access_peer) {
|
||||
std::string msg = "Skipped as peer access cannot be enabled between devices " +
|
||||
std::to_string(src_device) + " " + std::to_string(dst_device);
|
||||
HipTest::HIP_SKIP_TEST(msg.c_str());
|
||||
return;
|
||||
}
|
||||
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
|
||||
}
|
||||
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, allocation_size);
|
||||
LinearAllocGuard<int> result(LinearAllocs::hipHostMalloc, allocation_size, hipHostMallocPortable);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, allocation_size);
|
||||
|
||||
const auto element_count = allocation_size / sizeof(*src_allocation.ptr());
|
||||
constexpr auto thread_count = 1024;
|
||||
const auto block_count = element_count / thread_count + 1;
|
||||
constexpr int expected_value = 42;
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
VectorSet<<<block_count, thread_count>>>(src_allocation.ptr(), expected_value, element_count);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(memcpy_func(dst_allocation.ptr(), src_allocation.ptr(), allocation_size));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(
|
||||
hipMemcpy(result.host_ptr(), dst_allocation.ptr(), allocation_size, hipMemcpyDeviceToHost));
|
||||
if constexpr (enable_peer_access) {
|
||||
// If we've gotten this far, EnablePeerAccess must have succeeded, so we
|
||||
// only need to check this condition
|
||||
HIP_CHECK(hipDeviceDisablePeerAccess(dst_device));
|
||||
}
|
||||
|
||||
ArrayFindIfNot(result.host_ptr(), expected_value, element_count);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void MemcpyWithDirectionCommonTests(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
using namespace std::placeholders;
|
||||
SECTION("Device to host") {
|
||||
MemcpyDeviceToHostShell<should_synchronize>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToHost), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Device to host with default kind") {
|
||||
MemcpyDeviceToHostShell<should_synchronize>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Host to device") {
|
||||
MemcpyHostToDeviceShell<should_synchronize>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToDevice), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Host to device with default kind") {
|
||||
MemcpyHostToDeviceShell<should_synchronize>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Host to host") {
|
||||
MemcpyHostToHostShell<should_synchronize>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToHost), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Host to host with default kind") {
|
||||
MemcpyHostToHostShell<should_synchronize>(std::bind(memcpy_func, _1, _2, _3,
|
||||
hipMemcpyDefault), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Device to device") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<should_synchronize, true>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice), kernel_stream);
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<should_synchronize, false>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice), kernel_stream);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Device to device with default kind") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<should_synchronize, true>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<should_synchronize, false>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Synchronization behavior checks
|
||||
template <typename F>
|
||||
void MemcpySyncBehaviorCheck(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream) {
|
||||
LaunchDelayKernel(std::chrono::milliseconds{100}, kernel_stream);
|
||||
HIP_CHECK(memcpy_func());
|
||||
if (should_sync) {
|
||||
HIP_CHECK(hipStreamQuery(kernel_stream));
|
||||
} else {
|
||||
HIP_CHECK_ERROR(hipStreamQuery(kernel_stream), hipErrorNotReady);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void MemcpyHPageabletoDSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::malloc, kPageSize);
|
||||
LinearAllocGuard<int> device_alloc(LinearAllocs::hipMalloc, kPageSize);
|
||||
MemcpySyncBehaviorCheck(std::bind(memcpy_func, device_alloc.ptr(), host_alloc.ptr(), kPageSize),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void MemcpyHPinnedtoDSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, kPageSize);
|
||||
LinearAllocGuard<int> device_alloc(LinearAllocs::hipMalloc, kPageSize);
|
||||
MemcpySyncBehaviorCheck(std::bind(memcpy_func, device_alloc.ptr(), host_alloc.ptr(), kPageSize),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void MemcpyDtoHPageableSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::malloc, kPageSize);
|
||||
LinearAllocGuard<int> device_alloc(LinearAllocs::hipMalloc, kPageSize);
|
||||
MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.ptr(), kPageSize),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void MemcpyDtoHPinnedSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, kPageSize);
|
||||
LinearAllocGuard<int> device_alloc(LinearAllocs::hipMalloc, kPageSize);
|
||||
MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.ptr(), kPageSize),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void MemcpyDtoDSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard<int> src_alloc(LinearAllocs::hipMalloc, kPageSize);
|
||||
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipMalloc, kPageSize);
|
||||
MemcpySyncBehaviorCheck(std::bind(memcpy_func, dst_alloc.ptr(), src_alloc.ptr(), kPageSize),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void MemcpyHtoHSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
using LA = LinearAllocs;
|
||||
auto src_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
auto dst_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
|
||||
LinearAllocGuard<int> src_alloc(src_alloc_type, kPageSize);
|
||||
LinearAllocGuard<int> dst_alloc(dst_alloc_type, kPageSize);
|
||||
MemcpySyncBehaviorCheck(std::bind(memcpy_func, dst_alloc.ptr(), src_alloc.ptr(), kPageSize),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
// Common negative tests
|
||||
template <typename F> void MemcpyCommonNegativeTests(F f, void* dst, void* src, size_t count) {
|
||||
SECTION("dst == nullptr") { HIP_CHECK_ERROR(f(nullptr, src, count), hipErrorInvalidValue); }
|
||||
SECTION("src == nullptr") { HIP_CHECK_ERROR(f(dst, nullptr, count), hipErrorInvalidValue); }
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void MemcpyWithDirectionCommonNegativeTests(F f, void* dst, void* src, size_t count,
|
||||
hipMemcpyKind kind) {
|
||||
using namespace std::placeholders;
|
||||
MemcpyCommonNegativeTests(std::bind(f, _1, _2, _3, kind), dst, src, count);
|
||||
|
||||
// Disabled on AMD due to defect - EXSWHTEC-128
|
||||
#if HT_NVIDIA
|
||||
SECTION("Invalid MemcpyKind") {
|
||||
HIP_CHECK_ERROR(f(dst, src, count, static_cast<hipMemcpyKind>(-1)),
|
||||
hipErrorInvalidMemcpyDirection);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,893 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#pragma clang diagnostic ignored "-Wmissing-field-initializers"
|
||||
#pragma clang diagnostic ignored "-Wunused-lambda-capture"
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
#include <variant>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <utils.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
using PtrVariant = std::variant<hipPitchedPtr, hipArray_t>;
|
||||
|
||||
static inline hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction) {
|
||||
switch (direction) {
|
||||
case hipMemcpyHostToDevice:
|
||||
return hipMemcpyDeviceToHost;
|
||||
case hipMemcpyDeviceToHost:
|
||||
return hipMemcpyHostToDevice;
|
||||
default:
|
||||
return direction;
|
||||
}
|
||||
};
|
||||
|
||||
static inline hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos,
|
||||
PtrVariant src_ptr, hipPos src_pos,
|
||||
hipExtent extent, hipMemcpyKind kind) {
|
||||
hipMemcpy3DParms parms = {0};
|
||||
if (std::holds_alternative<hipArray_t>(dst_ptr)) {
|
||||
parms.dstArray = std::get<hipArray_t>(dst_ptr);
|
||||
} else {
|
||||
parms.dstPtr = std::get<hipPitchedPtr>(dst_ptr);
|
||||
}
|
||||
parms.dstPos = dst_pos;
|
||||
if (std::holds_alternative<hipArray_t>(src_ptr)) {
|
||||
parms.srcArray = std::get<hipArray_t>(src_ptr);
|
||||
} else {
|
||||
parms.srcPtr = std::get<hipPitchedPtr>(src_ptr);
|
||||
}
|
||||
parms.srcPos = src_pos;
|
||||
parms.extent = extent;
|
||||
parms.kind = kind;
|
||||
|
||||
return parms;
|
||||
}
|
||||
|
||||
static bool operator==(const hipPitchedPtr& lhs, const hipPitchedPtr& rhs) {
|
||||
// not checking for xsize currently as hipGraphMemcpyNodeGetParams returns incorrect value
|
||||
return lhs.ptr == rhs.ptr && lhs.pitch == rhs.pitch && lhs.ysize == rhs.ysize;
|
||||
}
|
||||
|
||||
static bool operator==(const hipPos& lhs, const hipPos& rhs) {
|
||||
return lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z;
|
||||
}
|
||||
|
||||
static bool operator==(const hipExtent& lhs, const hipExtent& rhs) {
|
||||
return lhs.width == rhs.width && lhs.height == rhs.height && lhs.depth == rhs.depth;
|
||||
}
|
||||
|
||||
static inline bool operator==(const hipMemcpy3DParms& lhs, const hipMemcpy3DParms& rhs) {
|
||||
return lhs.dstArray == rhs.dstArray && lhs.dstPtr == rhs.dstPtr && lhs.dstPos == rhs.dstPos &&
|
||||
lhs.srcArray == rhs.srcArray && lhs.srcPtr == rhs.srcPtr && lhs.srcPos == rhs.srcPos &&
|
||||
lhs.extent == rhs.extent && lhs.kind == rhs.kind;
|
||||
}
|
||||
|
||||
template <bool async = false, bool graph = false, bool set_params = false>
|
||||
hipError_t Memcpy3DWrapper(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr, hipPos src_pos,
|
||||
hipExtent extent, hipMemcpyKind kind, hipStream_t stream = nullptr) {
|
||||
auto parms = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
|
||||
if constexpr (graph) {
|
||||
hipGraph_t g = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&g, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
|
||||
if constexpr (set_params) {
|
||||
auto reversed_parms = GetMemcpy3DParms(src_ptr, src_pos, dst_ptr, dst_pos, extent,
|
||||
ReverseMemcpyDirection(kind));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&node, g, nullptr, 0, &reversed_parms));
|
||||
HIP_CHECK(hipGraphMemcpyNodeSetParams(node, &parms));
|
||||
} else {
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&node, g, nullptr, 0, &parms));
|
||||
}
|
||||
|
||||
hipMemcpy3DParms retrieved_params = {0};
|
||||
HIP_CHECK(hipGraphMemcpyNodeGetParams(node, &retrieved_params));
|
||||
REQUIRE(parms == retrieved_params);
|
||||
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, g, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(g));
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
if constexpr (async) {
|
||||
return hipMemcpy3DAsync(&parms, stream);
|
||||
} else {
|
||||
return hipMemcpy3D(&parms);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyDeviceToHost, hipMemcpyDefault);
|
||||
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
|
||||
const size_t host_pitch = GENERATE_REF(device_alloc.width(), device_alloc.width() + 64);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
|
||||
host_pitch * device_alloc.height() * device_alloc.depth());
|
||||
|
||||
const dim3 threads_per_block(32, 32);
|
||||
const dim3 blocks(device_alloc.width_logical() / threads_per_block.x + 1,
|
||||
device_alloc.height() / threads_per_block.y + 1, device_alloc.depth());
|
||||
Iota<<<blocks, threads_per_block>>>(device_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width_logical(), device_alloc.height(),
|
||||
device_alloc.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(memcpy_func(
|
||||
make_hipPitchedPtr(host_alloc.ptr(), host_pitch, device_alloc.width(), device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), device_alloc.extent(),
|
||||
kind, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
constexpr auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemoryVerify(host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
|
||||
device_alloc.height(), device_alloc.depth(), f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, bool enable_peer_access, typename F>
|
||||
void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyDeviceToDevice, hipMemcpyDefault);
|
||||
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
const auto device_count = HipTest::getDeviceCount();
|
||||
const auto src_device = GENERATE_COPY(range(0, device_count));
|
||||
const auto dst_device = GENERATE_COPY(range(0, device_count));
|
||||
|
||||
INFO("Src device: " << src_device << ", Dst device: " << dst_device);
|
||||
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
if (device_count > 0 && kernel_stream != nullptr && kernel_stream != hipStreamPerThread) {
|
||||
HIP_CHECK(hipStreamCreate(&kernel_stream));
|
||||
}
|
||||
if constexpr (enable_peer_access) {
|
||||
if (src_device == dst_device) {
|
||||
if (device_count > 0 && kernel_stream != nullptr && kernel_stream != hipStreamPerThread) {
|
||||
HIP_CHECK(hipStreamDestroy(kernel_stream));
|
||||
}
|
||||
return;
|
||||
}
|
||||
int can_access_peer = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
|
||||
if (!can_access_peer) {
|
||||
std::string msg = "Skipped as peer access cannot be enabled between devices " +
|
||||
std::to_string(src_device) + " " + std::to_string(dst_device);
|
||||
HipTest::HIP_SKIP_TEST(msg.c_str());
|
||||
if (device_count > 0 && kernel_stream != nullptr && kernel_stream != hipStreamPerThread) {
|
||||
HIP_CHECK(hipStreamDestroy(kernel_stream));
|
||||
}
|
||||
return;
|
||||
}
|
||||
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
|
||||
}
|
||||
|
||||
LinearAllocGuard3D<int> src_alloc(extent);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard3D<int> dst_alloc(extent);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
|
||||
dst_alloc.width() * dst_alloc.height() * dst_alloc.depth());
|
||||
|
||||
const dim3 threads_per_block(32, 32);
|
||||
const dim3 blocks(dst_alloc.width_logical() / threads_per_block.x + 1,
|
||||
dst_alloc.height() / threads_per_block.y + 1, dst_alloc.depth());
|
||||
// Using dst_alloc width and height to set only the elements that will be copied over to
|
||||
// dst_alloc
|
||||
Iota<<<blocks, threads_per_block>>>(src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width_logical(),
|
||||
dst_alloc.height(), dst_alloc.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), dst_alloc.extent(), kind, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
if (device_count > 0 && kernel_stream != nullptr && kernel_stream != hipStreamPerThread) {
|
||||
HIP_CHECK(hipStreamDestroy(kernel_stream));
|
||||
}
|
||||
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(host_alloc.ptr(), dst_alloc.width(),
|
||||
dst_alloc.width(), dst_alloc.height()),
|
||||
make_hipPos(0, 0, 0), dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
dst_alloc.extent(), hipMemcpyDeviceToHost));
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
constexpr auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemoryVerify(host_alloc.ptr(), dst_alloc.width(), dst_alloc.width_logical(),
|
||||
dst_alloc.height(), dst_alloc.depth(), f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DHostToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyHostToDevice, hipMemcpyDefault);
|
||||
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
|
||||
const size_t host_pitch = GENERATE_REF(device_alloc.pitch(), 2 * device_alloc.pitch());
|
||||
|
||||
LinearAllocGuard<int> src_host_alloc(LinearAllocs::hipHostMalloc,
|
||||
host_pitch * device_alloc.height() * device_alloc.depth());
|
||||
LinearAllocGuard<int> dst_host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
constexpr auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemorySet(src_host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
|
||||
device_alloc.height(), device_alloc.depth(), f);
|
||||
|
||||
std::fill_n(dst_host_alloc.ptr(),
|
||||
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 0);
|
||||
|
||||
HIP_CHECK(memcpy_func(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_host_alloc.ptr(), host_pitch, device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.extent(), kind, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(dst_host_alloc.ptr(), device_alloc.width(),
|
||||
device_alloc.width(), device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
device_alloc.extent(), hipMemcpyDeviceToHost));
|
||||
|
||||
PitchedMemoryVerify(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.width_logical(),
|
||||
device_alloc.height(), device_alloc.depth(), f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DHostToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyHostToHost, hipMemcpyDefault);
|
||||
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
const size_t padding = GENERATE_COPY(0, 64);
|
||||
const size_t src_pitch = extent.width + padding;
|
||||
|
||||
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
|
||||
src_pitch * extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
constexpr auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemorySet(src_host.ptr(), src_pitch, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
|
||||
HIP_CHECK(
|
||||
memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_host.ptr(), src_pitch, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0), extent, kind, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
PitchedMemoryVerify(dst_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DArrayHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
constexpr hipExtent extent{127, 128, 8};
|
||||
|
||||
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
|
||||
extent.width * sizeof(int) * extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
|
||||
extent.width * sizeof(int) * extent.height * extent.depth);
|
||||
|
||||
ArrayAllocGuard<int> src_array(extent);
|
||||
ArrayAllocGuard<int> dst_array(extent);
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
return z * extent.width * extent.height + y * extent.width + x;
|
||||
};
|
||||
PitchedMemorySet(src_host.ptr(), extent.width * sizeof(int), extent.width, extent.height,
|
||||
extent.depth, f);
|
||||
|
||||
// Host -> Array
|
||||
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_host.ptr(), extent.width * sizeof(int),
|
||||
extent.width * sizeof(int), extent.height),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Array
|
||||
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Host
|
||||
HIP_CHECK(memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width * sizeof(int),
|
||||
extent.width * sizeof(int), extent.height),
|
||||
make_hipPos(0, 0, 0), dst_array.ptr(), make_hipPos(0, 0, 0), extent,
|
||||
hipMemcpyDeviceToHost, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
PitchedMemoryVerify(dst_host.ptr(), extent.width * sizeof(int), extent.width, extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
constexpr hipExtent extent{127, 128, 8};
|
||||
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * sizeof(int) * extent.height * extent.depth);
|
||||
|
||||
ArrayAllocGuard<int> src_array(extent);
|
||||
ArrayAllocGuard<int> dst_array(extent);
|
||||
|
||||
LinearAllocGuard3D<int> src_device(extent.width, extent.height, extent.depth);
|
||||
LinearAllocGuard3D<int> dst_device(extent.width, extent.height, extent.depth);
|
||||
|
||||
const dim3 threads_per_block(32, 32);
|
||||
const dim3 blocks(src_device.width_logical() / threads_per_block.x + 1,
|
||||
src_device.height() / threads_per_block.y + 1, src_device.depth());
|
||||
Iota<<<blocks, threads_per_block>>>(src_device.ptr(), src_device.pitch(),
|
||||
src_device.width_logical(), src_device.height(),
|
||||
src_device.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Device -> Array
|
||||
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Array
|
||||
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Device
|
||||
HIP_CHECK(memcpy_func(dst_device.pitched_ptr(), make_hipPos(0, 0, 0), dst_array.ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Device -> Host
|
||||
HIP_CHECK(memcpy_func(make_hipPitchedPtr(host_alloc.ptr(), extent.width * sizeof(int),
|
||||
extent.width * sizeof(int), extent.height),
|
||||
make_hipPos(0, 0, 0), dst_device.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
dst_device.extent(), hipMemcpyDeviceToHost, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
return z * extent.width * extent.height + y * extent.width + x;
|
||||
};
|
||||
PitchedMemoryVerify(host_alloc.ptr(), extent.width * sizeof(int), extent.width, extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy3DHtoDSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
using LA = LinearAllocs;
|
||||
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LA::hipHostMalloc, device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func, device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.extent(), hipMemcpyHostToDevice, kernel_stream),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy3DDtoHPageableSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::malloc, device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func,
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
device_alloc.extent(), hipMemcpyDeviceToHost, kernel_stream),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy3DDtoHPinnedSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func,
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
device_alloc.extent(), hipMemcpyDeviceToHost, kernel_stream),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy3DDtoDSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard3D<int> src_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
|
||||
LinearAllocGuard3D<int> dst_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func, dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice, kernel_stream),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy3DHtoHSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
using LA = LinearAllocs;
|
||||
const auto src_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
const auto dst_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
|
||||
LinearAllocGuard<int> src_alloc(src_alloc_type, 32 * sizeof(int) * 32 * 8);
|
||||
LinearAllocGuard<int> dst_alloc(dst_alloc_type, 32 * sizeof(int) * 32 * 8);
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func,
|
||||
make_hipPitchedPtr(dst_alloc.ptr(), 32 * sizeof(int), 32 * sizeof(int), 32),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_alloc.ptr(), 32 * sizeof(int), 32 * sizeof(int), 32),
|
||||
make_hipPos(0, 0, 0), make_hipExtent(32 * sizeof(int), 32, 8), hipMemcpyHostToHost,
|
||||
kernel_stream),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DZeroWidthHeightDepth(F memcpy_func, const hipStream_t stream = nullptr) {
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
const auto [width_mult, height_mult, depth_mult] =
|
||||
GENERATE(std::make_tuple(0, 1, 1), std::make_tuple(1, 0, 1), std::make_tuple(1, 1, 0));
|
||||
|
||||
SECTION("Device to Host") {
|
||||
LinearAllocGuard3D<uint8_t> device_alloc(extent);
|
||||
LinearAllocGuard<uint8_t> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
std::fill_n(host_alloc.ptr(),
|
||||
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 42);
|
||||
HIP_CHECK(hipMemset3D(device_alloc.pitched_ptr(), 1, device_alloc.extent()));
|
||||
HIP_CHECK(memcpy_func(
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipExtent(device_alloc.width() * width_mult, device_alloc.height() * height_mult,
|
||||
device_alloc.depth() * depth_mult),
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
|
||||
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth());
|
||||
}
|
||||
|
||||
SECTION("Device to Device") {
|
||||
LinearAllocGuard3D<uint8_t> src_alloc(extent);
|
||||
LinearAllocGuard3D<uint8_t> dst_alloc(extent);
|
||||
LinearAllocGuard<uint8_t> host_alloc(
|
||||
LinearAllocs::hipHostMalloc, dst_alloc.width() * dst_alloc.height() * dst_alloc.depth());
|
||||
HIP_CHECK(hipMemset3D(src_alloc.pitched_ptr(), 1, src_alloc.extent()));
|
||||
HIP_CHECK(hipMemset3D(dst_alloc.pitched_ptr(), 42, dst_alloc.extent()));
|
||||
HIP_CHECK(
|
||||
memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipExtent(dst_alloc.width() * width_mult, dst_alloc.height() * height_mult,
|
||||
dst_alloc.depth() * depth_mult),
|
||||
hipMemcpyDeviceToDevice, stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(host_alloc.ptr(), dst_alloc.width(),
|
||||
dst_alloc.width(), dst_alloc.height()),
|
||||
make_hipPos(0, 0, 0), dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
dst_alloc.extent(), hipMemcpyDeviceToHost));
|
||||
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
|
||||
dst_alloc.width_logical() * dst_alloc.height());
|
||||
}
|
||||
|
||||
SECTION("Host to Device") {
|
||||
LinearAllocGuard3D<uint8_t> device_alloc(extent);
|
||||
LinearAllocGuard<uint8_t> src_host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
LinearAllocGuard<uint8_t> dst_host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
std::fill_n(src_host_alloc.ptr(),
|
||||
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 1);
|
||||
HIP_CHECK(hipMemset3D(device_alloc.pitched_ptr(), 42, device_alloc.extent()));
|
||||
HIP_CHECK(memcpy_func(
|
||||
device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipExtent(device_alloc.width() * width_mult, device_alloc.height() * height_mult,
|
||||
device_alloc.depth() * depth_mult),
|
||||
hipMemcpyHostToDevice, stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(dst_host_alloc.ptr(), device_alloc.width(),
|
||||
device_alloc.width(), device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), device_alloc.extent(), hipMemcpyDeviceToHost));
|
||||
ArrayFindIfNot(dst_host_alloc.ptr(), static_cast<uint8_t>(42),
|
||||
device_alloc.width_logical() * device_alloc.height());
|
||||
}
|
||||
|
||||
SECTION("Host to Host") {
|
||||
const auto alloc_size = extent.width * extent.height * extent.depth;
|
||||
LinearAllocGuard<uint8_t> src_alloc(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
LinearAllocGuard<uint8_t> dst_alloc(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
std::fill_n(src_alloc.ptr(), alloc_size, 1);
|
||||
std::fill_n(dst_alloc.ptr(), alloc_size, 42);
|
||||
HIP_CHECK(
|
||||
memcpy_func(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipExtent(extent.width * width_mult, extent.height * height_mult,
|
||||
extent.depth * depth_mult),
|
||||
hipMemcpyHostToHost, stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
ArrayFindIfNot(dst_alloc.ptr(), static_cast<uint8_t>(42), alloc_size);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr auto MemTypeHost() {
|
||||
#if HT_AMD
|
||||
return hipMemoryTypeHost;
|
||||
#else
|
||||
return CU_MEMORYTYPE_HOST;
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr auto MemTypeDevice() {
|
||||
#if HT_AMD
|
||||
return hipMemoryTypeDevice;
|
||||
#else
|
||||
return CU_MEMORYTYPE_DEVICE;
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr auto MemTypeArray() {
|
||||
#if HT_AMD
|
||||
return hipMemoryTypeArray;
|
||||
#else
|
||||
return CU_MEMORYTYPE_ARRAY;
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr auto MemTypeUnified() {
|
||||
#if HT_AMD
|
||||
return hipMemoryTypeUnified;
|
||||
#else
|
||||
return CU_MEMORYTYPE_UNIFIED;
|
||||
#endif
|
||||
}
|
||||
|
||||
using DrvPtrVariant = std::variant<hipPitchedPtr, hipArray_t>;
|
||||
|
||||
static inline HIP_MEMCPY3D GetDrvMemcpy3DParms(DrvPtrVariant dst_ptr, hipPos dst_pos,
|
||||
DrvPtrVariant src_ptr, hipPos src_pos,
|
||||
hipExtent extent, hipMemcpyKind kind) {
|
||||
HIP_MEMCPY3D parms = {0};
|
||||
|
||||
if (std::holds_alternative<hipArray_t>(dst_ptr)) {
|
||||
parms.dstMemoryType = hipMemoryTypeArray;
|
||||
parms.dstArray = std::get<hipArray_t>(dst_ptr);
|
||||
} else {
|
||||
auto ptr = std::get<hipPitchedPtr>(dst_ptr);
|
||||
parms.dstPitch = ptr.pitch;
|
||||
switch (kind) {
|
||||
case hipMemcpyDeviceToHost:
|
||||
case hipMemcpyHostToHost:
|
||||
parms.dstMemoryType = hipMemoryTypeHost;
|
||||
parms.dstHost = ptr.ptr;
|
||||
break;
|
||||
case hipMemcpyDeviceToDevice:
|
||||
case hipMemcpyHostToDevice:
|
||||
parms.dstMemoryType = hipMemoryTypeDevice;
|
||||
parms.dstDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
|
||||
break;
|
||||
case hipMemcpyDefault:
|
||||
parms.dstMemoryType = hipMemoryTypeUnified;
|
||||
parms.dstDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
if (std::holds_alternative<hipArray_t>(src_ptr)) {
|
||||
parms.srcMemoryType = hipMemoryTypeArray;
|
||||
parms.srcArray = std::get<hipArray_t>(src_ptr);
|
||||
} else {
|
||||
auto ptr = std::get<hipPitchedPtr>(src_ptr);
|
||||
parms.srcPitch = ptr.pitch;
|
||||
switch (kind) {
|
||||
case hipMemcpyDeviceToHost:
|
||||
case hipMemcpyDeviceToDevice:
|
||||
parms.srcMemoryType = hipMemoryTypeDevice;
|
||||
parms.srcDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
|
||||
break;
|
||||
case hipMemcpyHostToDevice:
|
||||
case hipMemcpyHostToHost:
|
||||
parms.srcMemoryType = hipMemoryTypeHost;
|
||||
parms.srcHost = ptr.ptr;
|
||||
break;
|
||||
case hipMemcpyDefault:
|
||||
parms.srcMemoryType = hipMemoryTypeUnified;
|
||||
parms.srcDevice = reinterpret_cast<hipDeviceptr_t>(ptr.ptr);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
parms.WidthInBytes = extent.width;
|
||||
parms.Height = extent.height;
|
||||
parms.Depth = extent.depth;
|
||||
parms.srcXInBytes = src_pos.x;
|
||||
parms.srcY = src_pos.y;
|
||||
parms.srcZ = src_pos.z;
|
||||
parms.dstXInBytes = dst_pos.x;
|
||||
parms.dstY = dst_pos.y;
|
||||
parms.dstZ = dst_pos.z;
|
||||
|
||||
return parms;
|
||||
}
|
||||
|
||||
static inline bool operator==(const HIP_MEMCPY3D& lhs, const HIP_MEMCPY3D& rhs) {
|
||||
bool pos_eq = lhs.dstXInBytes == rhs.dstXInBytes && lhs.dstY == rhs.dstY &&
|
||||
lhs.dstZ == rhs.dstZ && lhs.srcXInBytes == rhs.srcXInBytes && lhs.srcY == rhs.srcY &&
|
||||
lhs.srcZ == rhs.srcZ;
|
||||
bool extent_eq =
|
||||
lhs.WidthInBytes == rhs.WidthInBytes && lhs.Height == rhs.Height && lhs.Depth == rhs.Depth;
|
||||
bool mem_eq = true;
|
||||
if (lhs.dstArray) {
|
||||
mem_eq = lhs.dstArray == rhs.dstArray && lhs.dstMemoryType == rhs.dstMemoryType;
|
||||
} else {
|
||||
mem_eq = lhs.dstPitch == rhs.dstPitch && lhs.dstMemoryType == rhs.dstMemoryType;
|
||||
}
|
||||
if (lhs.srcArray) {
|
||||
mem_eq = lhs.srcArray == rhs.srcArray && lhs.srcMemoryType == rhs.srcMemoryType;
|
||||
} else {
|
||||
mem_eq = lhs.srcPitch == rhs.srcPitch && lhs.srcMemoryType == rhs.srcMemoryType;
|
||||
}
|
||||
if (lhs.dstDevice) {
|
||||
mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice);
|
||||
}
|
||||
if (lhs.dstHost) {
|
||||
mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice);
|
||||
}
|
||||
if (lhs.srcDevice) {
|
||||
mem_eq = mem_eq && (lhs.srcDevice == rhs.srcDevice);
|
||||
}
|
||||
if (lhs.srcHost) {
|
||||
mem_eq = mem_eq && (lhs.srcHost == rhs.srcHost);
|
||||
}
|
||||
|
||||
return pos_eq && extent_eq && mem_eq;
|
||||
}
|
||||
|
||||
template <bool set_params = false>
|
||||
hipError_t DrvMemcpy3DGraphWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
|
||||
hipCtx_t context, hipStream_t stream = nullptr) {
|
||||
auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
|
||||
hipGraph_t g = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&g, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
if constexpr (set_params) {
|
||||
auto reversed_parms = GetDrvMemcpy3DParms(src_ptr, src_pos, dst_ptr, dst_pos, extent,
|
||||
ReverseMemcpyDirection(kind));
|
||||
HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &reversed_parms, context));
|
||||
HIP_CHECK(hipDrvGraphMemcpyNodeSetParams(node, &parms));
|
||||
} else {
|
||||
HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &parms, context));
|
||||
}
|
||||
|
||||
HIP_MEMCPY3D retrieved_params = {0};
|
||||
HIP_CHECK(hipDrvGraphMemcpyNodeGetParams(node, &retrieved_params));
|
||||
REQUIRE(parms == retrieved_params);
|
||||
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, g, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(g));
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
template <bool async = false>
|
||||
hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
|
||||
hipStream_t stream = nullptr) {
|
||||
auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
|
||||
if constexpr (async) {
|
||||
return hipDrvMemcpy3DAsync(&parms, stream);
|
||||
} else {
|
||||
return hipDrvMemcpy3D(&parms);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void DrvMemcpy3DArrayHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
|
||||
DrvArrayAllocGuard<int> src_array(extent);
|
||||
DrvArrayAllocGuard<int> dst_array(extent);
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
constexpr auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemorySet(src_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
|
||||
// Host -> Array
|
||||
HIP_CHECK(
|
||||
memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_host.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Array
|
||||
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Host
|
||||
HIP_CHECK(
|
||||
memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0), dst_array.ptr(), make_hipPos(0, 0, 0), extent,
|
||||
hipMemcpyDeviceToHost, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
PitchedMemoryVerify(dst_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void DrvMemcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
|
||||
DrvArrayAllocGuard<int> src_array(extent);
|
||||
DrvArrayAllocGuard<int> dst_array(extent);
|
||||
|
||||
LinearAllocGuard3D<int> src_device(extent);
|
||||
LinearAllocGuard3D<int> dst_device(extent);
|
||||
|
||||
const dim3 threads_per_block(32, 32);
|
||||
const dim3 blocks(src_device.width_logical() / threads_per_block.x + 1,
|
||||
src_device.height() / threads_per_block.y + 1, src_device.depth());
|
||||
Iota<<<blocks, threads_per_block>>>(src_device.ptr(), src_device.pitch(),
|
||||
src_device.width_logical(), src_device.height(),
|
||||
src_device.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Device -> Array
|
||||
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Array
|
||||
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Device
|
||||
HIP_CHECK(memcpy_func(dst_device.pitched_ptr(), make_hipPos(0, 0, 0), dst_array.ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(
|
||||
memcpy_func(make_hipPitchedPtr(host_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0), dst_device.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
dst_device.extent(), hipMemcpyDeviceToHost, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
constexpr auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemoryVerify(host_alloc.ptr(), extent.width, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
@@ -0,0 +1,251 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include <cmd_options.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-but-set-variable"
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(_WIN64)
|
||||
typedef __int64 ssize_t;
|
||||
#else // !_WIN64
|
||||
typedef __int32 ssize_t;
|
||||
#endif // !_WIN64
|
||||
#endif /*_WIN32*/
|
||||
|
||||
class Timer {
|
||||
public:
|
||||
Timer(const Timer&) = delete;
|
||||
Timer& operator=(const Timer&) = delete;
|
||||
|
||||
protected:
|
||||
Timer(float& time, hipStream_t stream) : time_(time), stream_(stream) {}
|
||||
|
||||
void Record(float time) { time_ += time; }
|
||||
|
||||
hipStream_t GetStream() const { return stream_; }
|
||||
|
||||
private:
|
||||
float& time_;
|
||||
hipStream_t stream_;
|
||||
};
|
||||
|
||||
class EventTimer : public Timer {
|
||||
public:
|
||||
EventTimer(float& time, hipStream_t stream = nullptr) : Timer(time, stream) {
|
||||
HIP_CHECK(hipEventCreate(&start_));
|
||||
HIP_CHECK(hipEventCreate(&stop_));
|
||||
HIP_CHECK(hipEventRecord(start_, GetStream()));
|
||||
}
|
||||
|
||||
~EventTimer() {
|
||||
hipError_t error; // to avoid compiler warnings
|
||||
|
||||
error = hipEventRecord(stop_, GetStream());
|
||||
error = hipEventSynchronize(stop_);
|
||||
|
||||
float ms;
|
||||
error = hipEventElapsedTime(&ms, start_, stop_);
|
||||
Record(ms);
|
||||
|
||||
error = hipEventDestroy(start_);
|
||||
error = hipEventDestroy(stop_);
|
||||
}
|
||||
|
||||
private:
|
||||
hipEvent_t start_;
|
||||
hipEvent_t stop_;
|
||||
};
|
||||
|
||||
class CpuTimer : public Timer {
|
||||
public:
|
||||
CpuTimer(float& time, hipStream_t stream = nullptr) : Timer(time, stream) {
|
||||
start_ = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
~CpuTimer() {
|
||||
hipError_t error; // to avoid compiler warnings
|
||||
error = hipStreamSynchronize(GetStream());
|
||||
|
||||
stop_ = std::chrono::steady_clock::now();
|
||||
|
||||
std::chrono::duration<float, std::milli> ms = stop_ - start_;
|
||||
Record(ms.count());
|
||||
}
|
||||
|
||||
private:
|
||||
std::chrono::time_point<std::chrono::steady_clock> start_;
|
||||
std::chrono::time_point<std::chrono::steady_clock> stop_;
|
||||
};
|
||||
|
||||
template <typename Derived> class Benchmark {
|
||||
public:
|
||||
Benchmark()
|
||||
: iterations_(cmd_options.iterations),
|
||||
warmups_(cmd_options.warmups),
|
||||
display_output_(!cmd_options.no_display),
|
||||
progress_bar_(cmd_options.progress) {
|
||||
benchmark_name_ = Catch::getResultCapture().getCurrentTestName();
|
||||
}
|
||||
|
||||
Benchmark(const Benchmark&) = delete;
|
||||
Benchmark& operator=(const Benchmark&) = delete;
|
||||
|
||||
static constexpr ssize_t kWarmup = -1;
|
||||
|
||||
void Configure(size_t iterations, size_t warmups) {
|
||||
iterations_ = iterations;
|
||||
warmups_ = warmups;
|
||||
}
|
||||
|
||||
void AddSectionName(const std::string& section_name) { benchmark_name_ += "/" + section_name; }
|
||||
|
||||
using ModifierSignature = std::function<float(float)>;
|
||||
void RegisterModifier(const ModifierSignature& modifier) { modifier_ = modifier; }
|
||||
|
||||
template <typename... Args> std::tuple<float, float, float, float> Run(Args&&... args) {
|
||||
AddSectionName(std::to_string(iterations_));
|
||||
AddSectionName(std::to_string(warmups_));
|
||||
|
||||
auto& derived = static_cast<Derived&>(*this);
|
||||
|
||||
current_ = kWarmup;
|
||||
for (size_t i = 0u; i < warmups_; ++i) {
|
||||
PrintProgress("warmup", static_cast<int>(100.f * (i + 1) / warmups_));
|
||||
derived(args...);
|
||||
}
|
||||
time_ = .0;
|
||||
|
||||
std::vector<float> samples;
|
||||
samples.reserve(iterations_);
|
||||
|
||||
for (current_ = 0; current_ < iterations_; ++current_) {
|
||||
PrintProgress("measurement", static_cast<int>(100.f * (current_ + 1) / iterations_));
|
||||
derived(args...);
|
||||
if (modifier_) time_ = modifier_(time_);
|
||||
samples.push_back(time_);
|
||||
time_ = .0;
|
||||
}
|
||||
|
||||
float sum = std::accumulate(cbegin(samples), cend(samples), .0);
|
||||
float mean = sum / samples.size();
|
||||
|
||||
float deviation =
|
||||
std::accumulate(cbegin(samples), cend(samples), .0,
|
||||
[mean](float sum, float next) { return sum + std::pow(next - mean, 2); });
|
||||
deviation = sqrt(deviation / samples.size());
|
||||
|
||||
float best = *std::min_element(cbegin(samples), cend(samples));
|
||||
float worst = *std::max_element(cbegin(samples), cend(samples));
|
||||
|
||||
PrintStats(mean, deviation, best, worst);
|
||||
|
||||
return {mean, deviation, best, worst};
|
||||
}
|
||||
|
||||
protected:
|
||||
template <bool event_based>
|
||||
using TimerType = std::conditional_t<event_based, EventTimer, CpuTimer>;
|
||||
|
||||
template <bool event_based = false>
|
||||
std::unique_ptr<TimerType<event_based>> GetTimer(hipStream_t stream = nullptr) {
|
||||
return std::make_unique<TimerType<event_based>>(time_, stream);
|
||||
}
|
||||
|
||||
float time() const { return time_; }
|
||||
|
||||
size_t iterations() const { return iterations_; }
|
||||
|
||||
size_t warmups() const { return warmups_; }
|
||||
|
||||
ssize_t current() const { return current_; }
|
||||
|
||||
private:
|
||||
std::string benchmark_name_;
|
||||
float time_;
|
||||
size_t iterations_;
|
||||
size_t warmups_;
|
||||
ssize_t current_;
|
||||
bool display_output_;
|
||||
bool progress_bar_;
|
||||
|
||||
ModifierSignature modifier_;
|
||||
|
||||
void Print(const std::string& out = "") {
|
||||
if (!display_output_) return;
|
||||
std::cout << "\r" << std::setw(110) << std::left << benchmark_name_ << "\t|\t" << out
|
||||
<< std::flush;
|
||||
}
|
||||
|
||||
void PrintProgress(const std::string& name, int progress) {
|
||||
if (!(display_output_ && progress_bar_)) return;
|
||||
Print(name + ": [" + std::to_string(progress) + "%]");
|
||||
}
|
||||
|
||||
void PrintStats(float mean, float deviation, float best, float worst) {
|
||||
if (!display_output_) return;
|
||||
Print("Average time: " + std::to_string(mean) + " ms, Standard deviation: " +
|
||||
std::to_string(deviation) + " ms, Fastest: " + std::to_string(best) +
|
||||
" ms, Slowest: " + std::to_string(worst) + " ms\n");
|
||||
}
|
||||
};
|
||||
|
||||
constexpr bool kTimerTypeCpu = false;
|
||||
constexpr bool kTimerTypeEvent = true;
|
||||
|
||||
#define TIMED_SECTION_STREAM(TIMER_TYPE, STREAM) \
|
||||
if (auto _ = this->template GetTimer<TIMER_TYPE>(STREAM); true)
|
||||
#define TIMED_SECTION(TIMER_TYPE) TIMED_SECTION_STREAM(TIMER_TYPE, nullptr)
|
||||
|
||||
constexpr size_t operator"" _KB(unsigned long long int kb) { return kb << 10; }
|
||||
|
||||
constexpr size_t operator"" _MB(unsigned long long int mb) { return mb << 20; }
|
||||
|
||||
constexpr size_t operator"" _GB(unsigned long long int gb) { return gb << 30; }
|
||||
|
||||
static std::string GetAllocationSectionName(LinearAllocs allocation_type) {
|
||||
switch (allocation_type) {
|
||||
case LinearAllocs::malloc:
|
||||
return "host pageable";
|
||||
case LinearAllocs::hipHostMalloc:
|
||||
return "host pinned";
|
||||
case LinearAllocs::hipMalloc:
|
||||
return "device malloc";
|
||||
case LinearAllocs::hipMallocManaged:
|
||||
return "managed";
|
||||
default:
|
||||
return "unknown alloc type";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,454 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip_array_common.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
enum class LinearAllocs {
|
||||
malloc,
|
||||
mallocAndRegister,
|
||||
hipHostMalloc,
|
||||
hipMalloc,
|
||||
hipMallocManaged,
|
||||
noAlloc
|
||||
};
|
||||
|
||||
inline std::string to_string(const LinearAllocs allocation_type) {
|
||||
switch (allocation_type) {
|
||||
case LinearAllocs::malloc:
|
||||
return "malloc";
|
||||
case LinearAllocs::mallocAndRegister:
|
||||
return "malloc + hipHostRegister";
|
||||
case LinearAllocs::hipHostMalloc:
|
||||
return "hipHostMalloc";
|
||||
case LinearAllocs::hipMalloc:
|
||||
return "hipMalloc";
|
||||
case LinearAllocs::hipMallocManaged:
|
||||
return "hipMallocManaged";
|
||||
default:
|
||||
return "unknown alloc type";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> class LinearAllocGuard {
|
||||
public:
|
||||
LinearAllocGuard() = default;
|
||||
|
||||
LinearAllocGuard(const LinearAllocs allocation_type, const size_t size,
|
||||
const unsigned int flags = 0u)
|
||||
: allocation_type_{allocation_type},
|
||||
size_{size} {
|
||||
switch (allocation_type_) {
|
||||
case LinearAllocs::malloc:
|
||||
ptr_ = host_ptr_ = reinterpret_cast<T*>(malloc(size));
|
||||
break;
|
||||
case LinearAllocs::mallocAndRegister:
|
||||
host_ptr_ = reinterpret_cast<T*>(malloc(size));
|
||||
HIP_CHECK(hipHostRegister(host_ptr_, size, flags));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&ptr_), host_ptr_, 0u));
|
||||
break;
|
||||
case LinearAllocs::hipHostMalloc:
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&ptr_), size, flags));
|
||||
host_ptr_ = ptr_;
|
||||
break;
|
||||
case LinearAllocs::hipMalloc:
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&ptr_), size));
|
||||
break;
|
||||
case LinearAllocs::hipMallocManaged:
|
||||
HIP_CHECK(hipMallocManaged(reinterpret_cast<void**>(&ptr_), size, flags ? flags : 1u));
|
||||
host_ptr_ = ptr_;
|
||||
break;
|
||||
case LinearAllocs::noAlloc:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LinearAllocGuard(const LinearAllocGuard&) = delete;
|
||||
|
||||
LinearAllocGuard(LinearAllocGuard&& o) { *this = std::move(o); }
|
||||
|
||||
LinearAllocGuard& operator=(LinearAllocGuard&& o) {
|
||||
if (this != &o) {
|
||||
dealloc();
|
||||
|
||||
allocation_type_ = o.allocation_type_;
|
||||
ptr_ = o.ptr_;
|
||||
host_ptr_ = o.host_ptr_;
|
||||
size_ = o.size_;
|
||||
|
||||
o.allocation_type_ = LinearAllocs::noAlloc;
|
||||
o.ptr_ = nullptr;
|
||||
o.host_ptr_ = nullptr;
|
||||
o.size_ = 0;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~LinearAllocGuard() { dealloc(); }
|
||||
|
||||
T* ptr() const { return ptr_; };
|
||||
T* host_ptr() const { return host_ptr_; }
|
||||
size_t size_bytes() const { return size_; }
|
||||
|
||||
private:
|
||||
LinearAllocs allocation_type_ = LinearAllocs::noAlloc;
|
||||
T* ptr_ = nullptr;
|
||||
T* host_ptr_ = nullptr;
|
||||
size_t size_ = 0;
|
||||
|
||||
void dealloc() {
|
||||
if (ptr_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
// No Catch macros, don't want to possibly throw in the destructor
|
||||
if (ptr_ != nullptr) {
|
||||
switch (allocation_type_) {
|
||||
case LinearAllocs::noAlloc:
|
||||
break;
|
||||
case LinearAllocs::malloc:
|
||||
free(ptr_);
|
||||
break;
|
||||
case LinearAllocs::mallocAndRegister:
|
||||
// Cast to void to suppress nodiscard warnings
|
||||
static_cast<void>(hipHostUnregister(host_ptr_));
|
||||
free(host_ptr_);
|
||||
break;
|
||||
case LinearAllocs::hipHostMalloc:
|
||||
static_cast<void>(hipHostFree(ptr_));
|
||||
break;
|
||||
case LinearAllocs::hipMalloc:
|
||||
case LinearAllocs::hipMallocManaged:
|
||||
static_cast<void>(hipFree(ptr_));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> class LinearAllocGuardMultiDim {
|
||||
protected:
|
||||
LinearAllocGuardMultiDim(hipExtent extent) : extent_{extent} {}
|
||||
|
||||
~LinearAllocGuardMultiDim() { static_cast<void>(hipFree(pitched_ptr_.ptr)); }
|
||||
|
||||
public:
|
||||
T* ptr() const { return reinterpret_cast<T*>(pitched_ptr_.ptr); };
|
||||
|
||||
size_t pitch() const { return pitched_ptr_.pitch; }
|
||||
|
||||
hipExtent extent() const { return extent_; }
|
||||
|
||||
hipPitchedPtr pitched_ptr() const { return pitched_ptr_; }
|
||||
|
||||
size_t width() const { return extent_.width; }
|
||||
|
||||
size_t width_logical() const { return extent_.width / sizeof(T); }
|
||||
|
||||
size_t height() const { return extent_.height; }
|
||||
|
||||
public:
|
||||
hipPitchedPtr pitched_ptr_;
|
||||
const hipExtent extent_;
|
||||
};
|
||||
|
||||
template <typename T, bool unaligned = false>
|
||||
class LinearAllocGuard2D : public LinearAllocGuardMultiDim<T> {
|
||||
public:
|
||||
LinearAllocGuard2D(const size_t width_logical, const size_t height)
|
||||
: LinearAllocGuardMultiDim<T>{make_hipExtent(width_logical * sizeof(T), height, 1)} {
|
||||
if (unaligned) {
|
||||
this->pitched_ptr_.pitch = width_logical * sizeof(T);
|
||||
HIP_CHECK(hipMalloc(&this->pitched_ptr_.ptr, this->pitched_ptr_.pitch * height));
|
||||
} else {
|
||||
HIP_CHECK(hipMallocPitch(&this->pitched_ptr_.ptr, &this->pitched_ptr_.pitch,
|
||||
this->extent_.width, this->extent_.height));
|
||||
}
|
||||
}
|
||||
|
||||
LinearAllocGuard2D(const LinearAllocGuard2D&) = delete;
|
||||
LinearAllocGuard2D(LinearAllocGuard2D&&) = delete;
|
||||
};
|
||||
|
||||
template <typename T> class LinearAllocGuard3D : public LinearAllocGuardMultiDim<T> {
|
||||
public:
|
||||
LinearAllocGuard3D(const size_t width_logical, const size_t height, const size_t depth)
|
||||
: LinearAllocGuardMultiDim<T>{make_hipExtent(width_logical * sizeof(T), height, depth)} {
|
||||
HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_));
|
||||
}
|
||||
|
||||
LinearAllocGuard3D(const hipExtent extent) : LinearAllocGuardMultiDim<T>(extent) {
|
||||
HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_));
|
||||
}
|
||||
|
||||
LinearAllocGuard3D(const LinearAllocGuard3D&) = delete;
|
||||
LinearAllocGuard3D(LinearAllocGuard3D&&) = delete;
|
||||
|
||||
size_t depth() const { return this->extent_.depth; }
|
||||
};
|
||||
|
||||
template <typename T> class ArrayAllocGuard {
|
||||
public:
|
||||
// extent should contain logical width
|
||||
ArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} {
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
|
||||
HIP_CHECK(hipMalloc3DArray(&ptr_, &desc, extent_, flags));
|
||||
}
|
||||
|
||||
~ArrayAllocGuard() { static_cast<void>(hipFreeArray(ptr_)); }
|
||||
|
||||
ArrayAllocGuard(const ArrayAllocGuard&) = delete;
|
||||
ArrayAllocGuard(ArrayAllocGuard&&) = delete;
|
||||
|
||||
hipArray_t ptr() const { return ptr_; }
|
||||
|
||||
hipExtent extent() const { return extent_; }
|
||||
|
||||
private:
|
||||
hipArray_t ptr_ = nullptr;
|
||||
const hipExtent extent_;
|
||||
};
|
||||
|
||||
template <typename T> class MipmappedArrayAllocGuard {
|
||||
public:
|
||||
// extent should contain logical width
|
||||
MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int levels,
|
||||
const unsigned int flags)
|
||||
: extent_{extent}, levels_{levels} {
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
|
||||
HIP_CHECK(hipMallocMipmappedArray(&ptr_, &desc, extent_, levels_, flags));
|
||||
}
|
||||
|
||||
MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u)
|
||||
: MipmappedArrayAllocGuard{extent, 1, flags} {}
|
||||
|
||||
~MipmappedArrayAllocGuard() { static_cast<void>(hipFreeMipmappedArray(ptr_)); }
|
||||
|
||||
MipmappedArrayAllocGuard(const MipmappedArrayAllocGuard&) = delete;
|
||||
MipmappedArrayAllocGuard(MipmappedArrayAllocGuard&&) = delete;
|
||||
|
||||
hipMipmappedArray_t ptr() const { return ptr_; }
|
||||
|
||||
hipArray_t GetLevel(unsigned int level) {
|
||||
hipArray_t ret;
|
||||
HIP_CHECK(hipGetMipmappedArrayLevel(&ret, ptr_, level));
|
||||
return ret;
|
||||
}
|
||||
|
||||
hipExtent extent() const { return extent_; }
|
||||
|
||||
unsigned int levels() const { return levels_; }
|
||||
|
||||
private:
|
||||
hipMipmappedArray_t ptr_ = nullptr;
|
||||
const hipExtent extent_;
|
||||
const unsigned int levels_;
|
||||
};
|
||||
|
||||
template <typename T> class DrvArrayAllocGuard {
|
||||
public:
|
||||
// extent should contain width in bytes
|
||||
DrvArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} {
|
||||
HIP_ARRAY3D_DESCRIPTOR desc{};
|
||||
using vec_info = vector_info<T>;
|
||||
desc.Format = vec_info::format;
|
||||
desc.NumChannels = vec_info::size;
|
||||
desc.Width = extent_.width / sizeof(T);
|
||||
desc.Height = extent_.height;
|
||||
desc.Depth = extent_.depth;
|
||||
desc.Flags = flags;
|
||||
HIP_CHECK(hipArray3DCreate(&ptr_, &desc));
|
||||
}
|
||||
|
||||
~DrvArrayAllocGuard() { static_cast<void>(hipArrayDestroy(ptr_)); }
|
||||
|
||||
DrvArrayAllocGuard(const DrvArrayAllocGuard&) = delete;
|
||||
DrvArrayAllocGuard(DrvArrayAllocGuard&&) = delete;
|
||||
|
||||
hipArray_t ptr() const { return ptr_; }
|
||||
|
||||
hipExtent extent() const { return extent_; }
|
||||
|
||||
private:
|
||||
hipArray_t ptr_ = nullptr;
|
||||
const hipExtent extent_;
|
||||
};
|
||||
|
||||
enum class Streams { nullstream, perThread, created, withFlags, withPriority };
|
||||
|
||||
class StreamGuard {
|
||||
public:
|
||||
StreamGuard() = default;
|
||||
|
||||
StreamGuard(const Streams stream_type, unsigned int flags = hipStreamDefault, int priority = 0)
|
||||
: stream_type_{stream_type}, flags_{flags}, priority_{priority} {
|
||||
switch (stream_type_) {
|
||||
case Streams::nullstream:
|
||||
stream_ = nullptr;
|
||||
break;
|
||||
case Streams::perThread:
|
||||
stream_ = hipStreamPerThread;
|
||||
break;
|
||||
case Streams::created:
|
||||
HIP_CHECK(hipStreamCreate(&stream_));
|
||||
break;
|
||||
case Streams::withFlags:
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&stream_, flags_));
|
||||
break;
|
||||
case Streams::withPriority:
|
||||
HIP_CHECK(hipStreamCreateWithPriority(&stream_, flags_, priority_));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
StreamGuard(const StreamGuard&) = delete;
|
||||
|
||||
StreamGuard(StreamGuard&& o) { *this = std::move(o); }
|
||||
|
||||
StreamGuard& operator=(StreamGuard&& o) {
|
||||
if (this != &o) {
|
||||
if (stream_type_ >= Streams::created) {
|
||||
static_cast<void>(hipStreamDestroy(stream_));
|
||||
}
|
||||
|
||||
stream_type_ = o.stream_type_;
|
||||
flags_ = o.flags_;
|
||||
priority_ = o.priority_;
|
||||
stream_ = o.stream_;
|
||||
|
||||
o.stream_type_ = Streams::nullstream;
|
||||
o.flags_ = 0u;
|
||||
o.priority_ = 0;
|
||||
o.stream_ = nullptr;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~StreamGuard() {
|
||||
if (stream_type_ >= Streams::created && stream_ != nullptr) {
|
||||
static_cast<void>(hipStreamDestroy(stream_));
|
||||
}
|
||||
}
|
||||
|
||||
hipStream_t stream() const { return stream_; }
|
||||
|
||||
private:
|
||||
Streams stream_type_ = Streams::nullstream;
|
||||
unsigned int flags_ = 0u;
|
||||
int priority_ = 0;
|
||||
hipStream_t stream_ = nullptr;
|
||||
};
|
||||
|
||||
class EventsGuard {
|
||||
public:
|
||||
EventsGuard(size_t N) : events_(N) {
|
||||
for (auto& e : events_) HIP_CHECK(hipEventCreate(&e));
|
||||
}
|
||||
|
||||
EventsGuard(const EventsGuard&) = delete;
|
||||
EventsGuard(EventsGuard&&) = delete;
|
||||
|
||||
~EventsGuard() {
|
||||
for (auto& e : events_) {
|
||||
static_cast<void>(hipEventDestroy(e));
|
||||
}
|
||||
}
|
||||
|
||||
hipEvent_t& operator[](int index) { return events_[index]; }
|
||||
|
||||
operator hipEvent_t() const { return events_.at(0); }
|
||||
|
||||
std::vector<hipEvent_t>& event_list() { return events_; }
|
||||
|
||||
private:
|
||||
std::vector<hipEvent_t> events_;
|
||||
};
|
||||
|
||||
class StreamsGuard {
|
||||
public:
|
||||
StreamsGuard(size_t N) : streams_(N) {
|
||||
for (auto& s : streams_) HIP_CHECK(hipStreamCreate(&s));
|
||||
}
|
||||
|
||||
StreamsGuard(const StreamsGuard&) = delete;
|
||||
StreamsGuard(StreamsGuard&&) = delete;
|
||||
|
||||
~StreamsGuard() {
|
||||
for (auto& s : streams_) static_cast<void>(hipStreamDestroy(s));
|
||||
}
|
||||
|
||||
hipStream_t& operator[](int index) { return streams_[index]; }
|
||||
|
||||
operator hipStream_t() const { return streams_.at(0); }
|
||||
|
||||
std::vector<hipStream_t>& stream_list() { return streams_; }
|
||||
|
||||
private:
|
||||
std::vector<hipStream_t> streams_;
|
||||
};
|
||||
|
||||
enum class MemPools { dev_default, created };
|
||||
|
||||
class MemPoolGuard {
|
||||
public:
|
||||
MemPoolGuard(const MemPools mempool_type, int device,
|
||||
hipMemAllocationHandleType handle_type = hipMemHandleTypeNone)
|
||||
: mempool_type_{mempool_type}, device_{device}, handle_type_{handle_type} {
|
||||
switch (mempool_type_) {
|
||||
case MemPools::dev_default:
|
||||
HIP_CHECK(hipDeviceGetDefaultMemPool(&mempool_, device_));
|
||||
break;
|
||||
case MemPools::created:
|
||||
hipMemPoolProps pool_props;
|
||||
memset(&pool_props, 0, sizeof(pool_props));
|
||||
pool_props.allocType = hipMemAllocationTypePinned;
|
||||
pool_props.handleTypes = handle_type_;
|
||||
pool_props.location.type = hipMemLocationTypeDevice;
|
||||
pool_props.location.id = device_;
|
||||
pool_props.win32SecurityAttributes = nullptr;
|
||||
|
||||
HIP_CHECK(hipMemPoolCreate(&mempool_, &pool_props));
|
||||
}
|
||||
}
|
||||
|
||||
MemPoolGuard(const MemPoolGuard&) = delete;
|
||||
MemPoolGuard(MemPoolGuard&&) = delete;
|
||||
|
||||
~MemPoolGuard() {
|
||||
if (mempool_type_ == MemPools::created) {
|
||||
static_cast<void>(hipMemPoolDestroy(mempool_));
|
||||
} else {
|
||||
// Reset max states for default mem pool, so subtests won't fail
|
||||
uint64_t value = 0;
|
||||
HIP_CHECK(hipMemPoolSetAttribute(mempool_, hipMemPoolAttrUsedMemHigh, &value));
|
||||
HIP_CHECK(hipMemPoolSetAttribute(mempool_, hipMemPoolAttrReservedMemHigh, &value));
|
||||
}
|
||||
}
|
||||
|
||||
hipMemPool_t mempool() const { return mempool_; }
|
||||
|
||||
private:
|
||||
const MemPools mempool_type_;
|
||||
int device_;
|
||||
hipMemAllocationHandleType handle_type_;
|
||||
hipMemPool_t mempool_;
|
||||
};
|
||||
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
/*
|
||||
Guarantees total ordering between parent and child thread
|
||||
PARENT CHILD
|
||||
THREAD THREAD
|
||||
TestPart1
|
||||
\
|
||||
\
|
||||
\
|
||||
TestPart2
|
||||
/
|
||||
/
|
||||
/
|
||||
TestPart3
|
||||
\
|
||||
\
|
||||
\
|
||||
TestPart4
|
||||
Usage:
|
||||
Define a derived class which inherits from ThreadedZigZagTest instantiated with that selfsame class,
|
||||
which implements the appropriate test methods
|
||||
class DerivedTestClass : public ThreadedZigZagTest<DerivedTestClass> {
|
||||
void TestPart1() {...}
|
||||
void TestPart2() {...}
|
||||
void TestPart3() {...}
|
||||
void TestPart4() {...}
|
||||
};
|
||||
The derived class can contain state that the test requires.
|
||||
*/
|
||||
|
||||
template <typename T> class ThreadedZigZagTest {
|
||||
public:
|
||||
void run() {
|
||||
// 1.
|
||||
static_cast<T*>(this)->TestPart1();
|
||||
|
||||
auto t = std::thread([this] {
|
||||
// 2.
|
||||
static_cast<T*>(this)->TestPart2();
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
ready_ = true;
|
||||
}
|
||||
cv_.notify_one();
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx_);
|
||||
cv_.wait(lock, [this] { return !ready_; });
|
||||
}
|
||||
|
||||
// 4.
|
||||
static_cast<T*>(this)->TestPart4();
|
||||
});
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx_);
|
||||
cv_.wait(lock, [this] { return ready_; });
|
||||
}
|
||||
|
||||
// 3.
|
||||
static_cast<T*>(this)->TestPart3();
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
ready_ = false;
|
||||
}
|
||||
cv_.notify_one();
|
||||
|
||||
// Finalize
|
||||
t.join();
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
}
|
||||
|
||||
void TestPart1() const {}
|
||||
void TestPart2() const {}
|
||||
void TestPart3() const {}
|
||||
void TestPart4() const {}
|
||||
|
||||
private:
|
||||
std::mutex mtx_;
|
||||
std::condition_variable cv_;
|
||||
bool ready_ = false;
|
||||
};
|
||||
@@ -0,0 +1,194 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <optional>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
namespace {
|
||||
inline constexpr size_t kPageSize = 4096;
|
||||
} // anonymous namespace
|
||||
|
||||
template <typename T>
|
||||
void ArrayMismatch(T* const expected, T* const actual, const size_t num_elements) {
|
||||
const auto ret = std::mismatch(expected, expected + num_elements, actual);
|
||||
if (ret.first != expected + num_elements) {
|
||||
const auto idx = std::distance(expected, ret.first);
|
||||
INFO("Value mismatch at index: " << idx);
|
||||
REQUIRE(expected[idx] == actual[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename It, typename T> void ArrayFindIfNot(It begin, It end, const T expected_value) {
|
||||
const auto it = std::find_if_not(
|
||||
begin, end, [expected_value](const T elem) { return expected_value == elem; });
|
||||
|
||||
if (it != end) {
|
||||
const auto idx = std::distance(begin, it);
|
||||
INFO("Value mismatch at index " << idx);
|
||||
REQUIRE(expected_value == *it);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_elements) {
|
||||
ArrayFindIfNot(array, array + num_elements, expected_value);
|
||||
}
|
||||
|
||||
template <typename T, typename F>
|
||||
static inline void ArrayAllOf(const T* arr, uint32_t count, F value_gen) {
|
||||
for (auto i = 0u; i < count; ++i) {
|
||||
const std::optional<T> expected_val = value_gen(i);
|
||||
if (!expected_val.has_value()) continue;
|
||||
// Using require on every iteration leads to a noticeable performance loss on large arrays,
|
||||
// even when the require passes.
|
||||
if (arr[i] != expected_val.value()) {
|
||||
INFO("Mismatch at index: " << i);
|
||||
REQUIRE(arr[i] == expected_val.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline void ArrayInRange(const T* arr, uint32_t count,const T minval,const T maxval) {
|
||||
for (auto i = 0u; i < count; ++i) {
|
||||
if(arr[i] < minval)
|
||||
{
|
||||
INFO("Mismatch at index: " << i);
|
||||
REQUIRE(arr[i] > minval);
|
||||
}
|
||||
else if(arr[i] > maxval)
|
||||
{
|
||||
INFO("Mismatch at index: " << i);
|
||||
REQUIRE(arr[i] < maxval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename F>
|
||||
void PitchedMemoryVerify(T* const ptr, const size_t pitch, const size_t width, const size_t height,
|
||||
const size_t depth, F expected_value_generator) {
|
||||
for (size_t z = 0; z < depth; ++z) {
|
||||
for (size_t y = 0; y < height; ++y) {
|
||||
for (size_t x = 0; x < width; ++x) {
|
||||
const auto slice = reinterpret_cast<uint8_t*>(ptr) + pitch * height * z;
|
||||
const auto row = slice + pitch * y;
|
||||
if (reinterpret_cast<T*>(row)[x] != expected_value_generator(x, y, z)) {
|
||||
INFO("Mismatch at indices: " << x << ", " << y << ", " << z);
|
||||
REQUIRE(reinterpret_cast<T*>(row)[x] == expected_value_generator(x, y, z));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename F>
|
||||
void PitchedMemorySet(T* const ptr, const size_t pitch, const size_t width, const size_t height,
|
||||
const size_t depth, F expected_value_generator) {
|
||||
for (size_t z = 0; z < depth; ++z) {
|
||||
for (size_t y = 0; y < height; ++y) {
|
||||
for (size_t x = 0; x < width; ++x) {
|
||||
const auto slice = reinterpret_cast<uint8_t*>(ptr) + pitch * height * z;
|
||||
const auto row = slice + pitch * y;
|
||||
reinterpret_cast<T*>(row)[x] = expected_value_generator(x, y, z);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void VectorIncrement(T* const vec, const T increment_value, size_t N) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < N; i += stride) {
|
||||
vec[i] += increment_value;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> __global__ void VectorSet(T* const vec, const T value, size_t N) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < N; i += stride) {
|
||||
vec[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// Will execute for atleast interval milliseconds
|
||||
static __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) {
|
||||
while (interval--) {
|
||||
#if HT_AMD
|
||||
uint64_t start = clock_function();
|
||||
while (clock_function() - start < ticks_per_ms) {
|
||||
__builtin_amdgcn_s_sleep(10);
|
||||
}
|
||||
#endif
|
||||
#if HT_NVIDIA
|
||||
uint64_t start = clock64();
|
||||
while (clock64() - start < ticks_per_ms) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void Iota(T* const out, size_t pitch, size_t w, size_t h, size_t d) {
|
||||
const auto x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const auto y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
const auto z = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
if (x < w && y < h && z < d) {
|
||||
char* const slice = reinterpret_cast<char*>(out) + pitch * h * z;
|
||||
char* const row = slice + pitch * y;
|
||||
reinterpret_cast<T*>(row)[x] = z * w * h + y * w + x;
|
||||
}
|
||||
}
|
||||
|
||||
inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hipStream_t stream = nullptr) {
|
||||
int ticks_per_ms = 0;
|
||||
#if HT_AMD
|
||||
HIPCHECK(hipDeviceGetAttribute(&ticks_per_ms, hipDeviceAttributeWallClockRate, 0));
|
||||
#endif
|
||||
#if HT_NVIDIA
|
||||
HIPCHECK(hipDeviceGetAttribute(&ticks_per_ms, hipDeviceAttributeClockRate, 0));
|
||||
#endif
|
||||
Delay<<<1, 1, 0, stream>>>(interval.count(), ticks_per_ms);
|
||||
}
|
||||
|
||||
template <typename... Attributes>
|
||||
inline bool DeviceAttributesSupport(const int device, Attributes... attributes) {
|
||||
constexpr auto DeviceAttributeSupport = [](const int device,
|
||||
const hipDeviceAttribute_t attribute) {
|
||||
int value = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&value, attribute, device));
|
||||
return value;
|
||||
};
|
||||
return (... && DeviceAttributeSupport(device, attributes));
|
||||
}
|
||||
|
||||
inline int GetDeviceAttribute(const hipDeviceAttribute_t attr, int device) {
|
||||
int value = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&value, attr, device));
|
||||
return value;
|
||||
}
|
||||
@@ -0,0 +1,519 @@
|
||||
/*
|
||||
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#define HIP_ENABLE_WARP_SYNC_BUILTINS
|
||||
#define HIP_ENABLE_EXTRA_WARP_SYNC_TYPES
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
#include <hip/hip_cooperative_groups.h>
|
||||
#include <hip/hip_fp16.h>
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <ios>
|
||||
|
||||
#define MASK_SHIFT(x, n) \
|
||||
(x & (static_cast<uint64_t>(1) << n)) >> n
|
||||
|
||||
const unsigned long long Every5thBit = 0x1084210842108421;
|
||||
const unsigned long long Every9thBit = 0x8040201008040201;
|
||||
const unsigned long long Every5thBut9th = Every5thBit & ~Every9thBit;
|
||||
const unsigned long long AllThreads = ~0;
|
||||
static constexpr int kNumReduces = 5000;
|
||||
|
||||
inline __device__ bool deactivate_thread(const uint64_t* const active_masks) {
|
||||
const auto warp =
|
||||
cooperative_groups::tiled_partition(cooperative_groups::this_thread_block(), warpSize);
|
||||
const auto block = cooperative_groups::this_thread_block();
|
||||
const auto warps_per_block = (block.size() + warpSize - 1) / warpSize;
|
||||
const auto block_rank = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
|
||||
const auto idx = block_rank * warps_per_block + block.thread_rank() / warpSize;
|
||||
return !(active_masks[idx] & (static_cast<uint64_t>(1) << warp.thread_rank()));
|
||||
}
|
||||
|
||||
inline std::mt19937& GetRandomGenerator() {
|
||||
static std::mt19937 mt(std::random_device{}());
|
||||
return mt;
|
||||
}
|
||||
|
||||
template <typename T> inline T GenerateRandomInteger(const T min, const T max) {
|
||||
std::uniform_int_distribution<T> dist(min, max);
|
||||
return dist(GetRandomGenerator());
|
||||
}
|
||||
|
||||
template <typename T> inline T GenerateRandomReal(const T min, const T max) {
|
||||
std::uniform_real_distribution<T> dist(min, max);
|
||||
return dist(GetRandomGenerator());
|
||||
}
|
||||
|
||||
inline int generate_width(int warp_size) {
|
||||
int exponent = 0;
|
||||
while (warp_size >>= 1) {
|
||||
++exponent;
|
||||
}
|
||||
|
||||
return GENERATE_COPY(map([](int e) { return 1 << e; }, range(1, exponent + 1)));
|
||||
}
|
||||
|
||||
inline uint64_t get_active_mask(unsigned int warp_id, unsigned int warp_size) {
|
||||
uint64_t active_mask = 0;
|
||||
switch (warp_id % 5) {
|
||||
case 0: // even threads in the warp
|
||||
active_mask = 0xAAAAAAAAAAAAAAAA;
|
||||
break;
|
||||
case 1: // odd threads in the warp
|
||||
active_mask = 0x5555555555555555;
|
||||
break;
|
||||
case 2: // first half of the warp
|
||||
for (int i = 0; i < warp_size / 2; i++) {
|
||||
active_mask = active_mask | (static_cast<uint64_t>(1) << i);
|
||||
}
|
||||
break;
|
||||
case 3: // second half of the warp
|
||||
for (int i = warp_size / 2; i < warp_size; i++) {
|
||||
active_mask = active_mask | (static_cast<uint64_t>(1) << i);
|
||||
}
|
||||
break;
|
||||
case 4: // all threads
|
||||
active_mask = 0xFFFFFFFFFFFFFFFF;
|
||||
break;
|
||||
}
|
||||
return active_mask;
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
|
||||
inline T expandPrecision(int X) { return X; }
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
|
||||
inline T expandPrecision(int X) {
|
||||
return X * 3.141592653589793115997963468544185161590576171875;
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_same<T, __half>::value, bool> = true>
|
||||
inline __half expandPrecision(int X) {
|
||||
return (__half)expandPrecision<float>(X);
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_same<T, __half2>::value, bool> = true>
|
||||
inline __half2 expandPrecision(int X) {
|
||||
__half H = expandPrecision<float>(X);
|
||||
return {H, H};
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
|
||||
inline void expandPrecision(T* Array, int size) {
|
||||
(void)Array;
|
||||
(void)size;
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
|
||||
inline void expandPrecision(T *Array, int size) {
|
||||
for (int i = 0; i != size; ++i) {
|
||||
Array[i] *= 3.141592653589793115997963468544185161590576171875;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void initializeInput(T *Input, int size) {
|
||||
int Values[] = {0, -1, 2, 3, 4, 5, -6, 7,
|
||||
8, -9, 10, 11, 12, 13, -14, 15,
|
||||
16, 17, -18, 19, 20, -21, 22, 23,
|
||||
24, 25, 26, -27, 28, 29, 30, 31,
|
||||
-32, 33, 34, 35, -36, 37, 38, -39,
|
||||
40, 41, 42, 43, -44, -45, 46, 47,
|
||||
48, 49, 50, -51, 52, 53, -54, 55,
|
||||
56, 57, -58, 59, 60, 61, 62, -63};
|
||||
|
||||
for (int i = 0; i != size; ++i) {
|
||||
Input[i] = expandPrecision<T>(Values[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void initializeExpected(T *Expected, int *Values, int size) {
|
||||
for (int i = 0; i != size; ++i) {
|
||||
Expected[i] = expandPrecision<T>(Values[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool compareEqual(T X, T Y) { return X == Y; }
|
||||
|
||||
template <>
|
||||
inline bool compareEqual(__half X, __half Y) {
|
||||
return __half2float(X) == __half2float(Y);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool compareEqual(__half2 X, __half2 Y) {
|
||||
return compareEqual(X.x, Y.x) && compareEqual(X.y, Y.y);
|
||||
}
|
||||
|
||||
inline bool compareMaskEqual(unsigned long long *Actual, unsigned long long *Expected,
|
||||
int i, int warpSize) {
|
||||
if (warpSize == 32)
|
||||
return (unsigned)Actual[i] == (unsigned)Expected[i];
|
||||
return Actual[i] == Expected[i];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T alignUp(T num, size_t n) {
|
||||
if (num % n == 0) {
|
||||
return num;
|
||||
}
|
||||
|
||||
return ((num + n - 1) / n) * n;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
struct DistributionType {
|
||||
using type = std::uniform_int_distribution<T>;
|
||||
};
|
||||
|
||||
// there is no std::uniform_real_distribution for 'half' type, so we cast from
|
||||
// unsigned short, avoiding Nan and Infinity
|
||||
template <>
|
||||
struct DistributionType<__half> {
|
||||
using type = std::uniform_int_distribution<unsigned short>;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct DistributionType<float> {
|
||||
using type = std::uniform_real_distribution<float>;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct DistributionType<double> {
|
||||
using type = std::uniform_real_distribution<double>;
|
||||
};
|
||||
|
||||
|
||||
template <class T>
|
||||
struct MinOp {
|
||||
T operator()(const T& lhs, const T& rhs) const
|
||||
{
|
||||
return std::min(lhs, rhs);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct MaxOp {
|
||||
T operator()(const T& lhs, const T& rhs) const
|
||||
{
|
||||
return std::max(lhs, rhs);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct XorOp {
|
||||
__host__ __device__ T operator()(const T& lhs, const T& rhs)
|
||||
{
|
||||
return (!lhs) != (!rhs) == 1;
|
||||
}
|
||||
};
|
||||
|
||||
// typeid(T).name() does seem to return a very descriptive name for primitive types,
|
||||
// at least on clang, so we roll out an equivalent
|
||||
template<class T>
|
||||
const char* typeToString()
|
||||
{
|
||||
if (std::is_same<T, int>::value)
|
||||
return "int";
|
||||
if (std::is_same<T, unsigned int>::value)
|
||||
return "unsigned int";
|
||||
if (std::is_same<T, long long>::value)
|
||||
return "long long";
|
||||
if (std::is_same<T, unsigned long long>::value)
|
||||
return "unsigned long long";
|
||||
if (std::is_same<T, half>::value)
|
||||
return "half";
|
||||
if (std::is_same<T, float>::value)
|
||||
return "float";
|
||||
if (std::is_same<T, double>::value)
|
||||
return "double";
|
||||
|
||||
assert(false && "Missing conversion to string for type");
|
||||
return "";
|
||||
}
|
||||
|
||||
template<class T, template <typename> class Op>
|
||||
const char* opToString()
|
||||
{
|
||||
if constexpr (std::is_same<Op<T>, std::plus<T>>::value)
|
||||
return "add";
|
||||
else if constexpr (std::is_same<Op<T>, MinOp<T>>::value)
|
||||
return "min";
|
||||
else if constexpr (std::is_same<Op<T>, MaxOp<T>>::value)
|
||||
return "max";
|
||||
else if constexpr (std::is_same<Op<T>, std::logical_and<T>>::value)
|
||||
return "logical_and";
|
||||
else if constexpr (std::is_same<Op<T>, std::logical_or<T>>::value)
|
||||
return "logical_or";
|
||||
else if constexpr (std::is_same<Op<T>, XorOp<T>>::value)
|
||||
return "logical_xor";
|
||||
else {
|
||||
static_assert(std::is_void<T>::value, "Unsupported operator");
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class Gen>
|
||||
void genRandomMasks(LinearAllocGuard<T>& d_buf,
|
||||
LinearAllocGuard<T>& buf,
|
||||
Gen& gen,
|
||||
int numItems)
|
||||
{
|
||||
// masks must be != 0, hence passing 1 as the 'a' distribution parameter
|
||||
std::uniform_int_distribution<unsigned long long> dist(1);
|
||||
int numBytes = numItems * sizeof(T);
|
||||
LinearAllocGuard<T> tmp(LinearAllocs::malloc, numBytes);
|
||||
LinearAllocGuard<T> d_tmp(LinearAllocs::hipMalloc, numBytes);
|
||||
|
||||
buf = std::move(tmp);
|
||||
d_buf = std::move(d_tmp);
|
||||
|
||||
for (int i = 0; i < numItems; i++) {
|
||||
T mask = dist(gen);
|
||||
|
||||
if (getWarpSize() == 32)
|
||||
mask &= 0xFFFFFFFF;
|
||||
|
||||
buf.ptr()[i] = mask;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(d_buf.ptr(), buf.ptr(), numBytes, hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
// generates a random __half (instead of using uniform_real_distribution<float> casting to __half
|
||||
// which is problematic)
|
||||
// @expDist needs to be between [0-2^5-2]
|
||||
template <class Gen>
|
||||
__half genRandomHalf(std::uniform_int_distribution<unsigned short>& dist,
|
||||
Gen& gen)
|
||||
{
|
||||
__half_raw tmp;
|
||||
|
||||
tmp.x = dist(gen);
|
||||
// rewrite the exponent to force the number to be (-8<x<8) and at the same time avoid NaN or
|
||||
// infinity
|
||||
tmp.x &= 0xBBFF;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
// generates a random buffer in buf, copies it to device memory in d_buf
|
||||
template <class T, class Dist, class Gen>
|
||||
void genRandomBuffers(LinearAllocGuard<T>& d_buf,
|
||||
LinearAllocGuard<T>& buf,
|
||||
Dist& dist,
|
||||
Gen& gen,
|
||||
int numItems)
|
||||
{
|
||||
int numBytes = numItems * sizeof(T);
|
||||
LinearAllocGuard<T> tmp(LinearAllocs::malloc, numBytes);
|
||||
LinearAllocGuard<T> d_tmp(LinearAllocs::hipMalloc, numBytes);
|
||||
|
||||
buf = std::move(tmp);
|
||||
d_buf = std::move(d_tmp);
|
||||
|
||||
for (int i = 0; i < numItems; i++)
|
||||
if constexpr (std::is_same<T, __half>::value)
|
||||
buf.ptr()[i] = genRandomHalf(dist, gen);
|
||||
else
|
||||
buf.ptr()[i] = dist(gen);
|
||||
|
||||
HIP_CHECK(hipMemcpy(d_buf.ptr(), buf.ptr(), numBytes, hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
// given an operation produces the expected result of the reduction
|
||||
// @mask indicates the lanes that will participate in the computation
|
||||
template <class T, class Op>
|
||||
T calculateExpected(const T* input, Op op, unsigned long long mask)
|
||||
{
|
||||
T result;
|
||||
int wavefrontSize = getWarpSize();
|
||||
|
||||
if (std::is_same<Op, std::plus<T>>::value) {
|
||||
T tmp[64] = { 0 };
|
||||
|
||||
for (int i = 0; i < wavefrontSize; i++) {
|
||||
if (mask & (1ul << i)) {
|
||||
tmp[i] = input[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int modulo = 2; modulo <= wavefrontSize; modulo *= 2) {
|
||||
for (int i = 0; i < wavefrontSize; i += modulo) {
|
||||
int j = i + modulo / 2;
|
||||
|
||||
if (j < wavefrontSize)
|
||||
tmp[i] += tmp[j];
|
||||
}
|
||||
}
|
||||
result = tmp[0];
|
||||
} else {
|
||||
bool initialized = false;
|
||||
|
||||
for (int i = 0; i < wavefrontSize; i++) {
|
||||
if (mask & (1ul << i)) {
|
||||
if (initialized)
|
||||
result = op(input[i], result);
|
||||
else {
|
||||
result = input[i];
|
||||
initialized = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void printMismatch(const T& result, const T& expected, const T* input, unsigned long long mask)
|
||||
{
|
||||
std::ios init(NULL);
|
||||
|
||||
init.copyfmt(std::cout);
|
||||
std::cout << "\nMismatch\n";
|
||||
std::cout << "Mask: 0x" << std::hex << std::setfill('0') << std::setw(16) << mask << "\n";
|
||||
std::cout.copyfmt(init);
|
||||
|
||||
for (int i = 0; i < getWarpSize(); i++) {
|
||||
if ((1ul << i) & mask) {
|
||||
if constexpr (std::is_same<T, __half>::value)
|
||||
std::cout << "Lane " << i << ": " << __half2float(input[i]) << "\n";
|
||||
else
|
||||
std::cout << "Lane " << i << ": " << input[i] << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (std::is_same<T, __half>::value) {
|
||||
std::cout << "Result: " << __half2float(result) << "\n";
|
||||
std::cout << "Expected: " << __half2float(expected) << "\n";
|
||||
} else {
|
||||
std::cout << "Result: " << result << "\n";
|
||||
std::cout << "Expected: " << expected << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void compareFloatingPoint(const T& result, const T& expected, unsigned long long mask, const T* input)
|
||||
{
|
||||
using namespace Catch::Matchers;
|
||||
if constexpr (std::is_same<T, __half>::value) {
|
||||
float resultFloat = __half2float(result);
|
||||
float expectedFloat = __half2float(expected);
|
||||
float absDifference = fabs(resultFloat - expectedFloat);
|
||||
float relativeEpsilon = 0.1 * fmax(resultFloat, expectedFloat);
|
||||
float eps = 0.01f;
|
||||
|
||||
REQUIRE(!__hisnan(result));
|
||||
REQUIRE(!__hisinf(result));
|
||||
|
||||
if (relativeEpsilon > eps) {
|
||||
if (absDifference > 0.0001) {
|
||||
if (absDifference >= eps * fabs(fmax(resultFloat, expectedFloat))) {
|
||||
printMismatch(result, expected, input, mask);
|
||||
std::cout << "Relative epsilon: " << relativeEpsilon << "\n";
|
||||
std::cout << "Difference: " << absDifference << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
REQUIRE_THAT(__half2float(resultFloat), WithinRel(expectedFloat, eps));
|
||||
}
|
||||
} else {
|
||||
// for float or double, also lossy in terms of precision
|
||||
T absDifference = fabs(result - expected);
|
||||
T relativeEpsilon = 0.1 * fmax(result, expected);
|
||||
T eps = 0.01;
|
||||
|
||||
if (relativeEpsilon > eps) {
|
||||
if (absDifference > 0.0001) {
|
||||
if (absDifference >= eps * fabs(fmax(result, expected))) {
|
||||
printMismatch(result, expected, input, mask);
|
||||
std::cout << "Relative epsilon: " << relativeEpsilon << "\n";
|
||||
std::cout << "Difference: " << absDifference << "\n";
|
||||
}
|
||||
|
||||
REQUIRE_THAT(result, WithinRel(expected, eps));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// @tparam Reduce a functor; abstracts away kernel dispatching
|
||||
// (via hiprtc or normal execution)
|
||||
template <class T, class Reduce, template <typename> class Op>
|
||||
void runTestReduce(int iteration, Reduce reduce)
|
||||
{
|
||||
using namespace Catch::Matchers;
|
||||
using distribution = typename DistributionType<T>::type;
|
||||
unsigned int wavefrontSize = getWarpSize();
|
||||
// one result per reduce per thread to be checked
|
||||
LinearAllocGuard<T> d_output(LinearAllocs::hipMalloc, kNumReduces * wavefrontSize * sizeof(T));
|
||||
LinearAllocGuard<T> output(LinearAllocs::malloc, kNumReduces * wavefrontSize * sizeof(T));
|
||||
std::mt19937_64 gen(iteration);
|
||||
// for float16, we generate any random unsigned short, but cap the exponent later on
|
||||
// to keep it in the range (-8.0..8.0) (just to avoid overflows)
|
||||
// On the rest of the types, just use a bigger reduced range of numbers to avoid overflows too
|
||||
T a = std::is_same<T, half>::value? std::numeric_limits<unsigned short>::lowest() : -1023;
|
||||
T b = std::is_same<T, half>::value? std::numeric_limits<unsigned short>::max() : 1023;
|
||||
distribution dist(a, b);
|
||||
LinearAllocGuard<T> input, d_input;
|
||||
LinearAllocGuard<unsigned long long> masks, d_masks;
|
||||
Op<T> op;
|
||||
int numReduce = 0;
|
||||
|
||||
genRandomBuffers(d_input, input, dist, gen, kNumReduces * wavefrontSize);
|
||||
genRandomMasks(d_masks, masks, gen, kNumReduces);
|
||||
reduce(d_output.ptr(), d_input.ptr(), d_masks.ptr(), kNumReduces, op);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(output.ptr(), d_output.ptr(), d_output.size_bytes(), hipMemcpyDeviceToHost));
|
||||
|
||||
while (numReduce < kNumReduces) {
|
||||
T expected = calculateExpected<T>(input.ptr(), op, masks.ptr()[numReduce]);
|
||||
int lane = 0;
|
||||
|
||||
while (lane < wavefrontSize) {
|
||||
auto result = output.ptr()[numReduce * wavefrontSize + lane];
|
||||
unsigned long long mask = masks.ptr()[numReduce];
|
||||
|
||||
if ((1ul << lane) & mask) {
|
||||
if constexpr (std::is_integral<T>::value || std::is_same<Op<T>, MinOp<T>>::value ||
|
||||
std::is_same<Op<T>, MaxOp<T>>::value) {
|
||||
// for integral types or min/max the result should match exactly
|
||||
if constexpr (std::is_same<T, __half>::value)
|
||||
REQUIRE(__half2float(result) == __half2float(expected));
|
||||
else {
|
||||
if (result != expected) {
|
||||
printMismatch(result, expected, input.ptr(), mask);
|
||||
REQUIRE(result == expected);
|
||||
}
|
||||
}
|
||||
} else
|
||||
compareFloatingPoint(result, expected, mask, input.ptr());
|
||||
|
||||
}
|
||||
lane++;
|
||||
}
|
||||
numReduce++;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
if(NOT RTC_TESTING)
|
||||
set(TEST_SRC
|
||||
Set.cpp
|
||||
)
|
||||
|
||||
add_library(KERNELS EXCLUDE_FROM_ALL OBJECT ${TEST_SRC})
|
||||
target_compile_options(KERNELS PUBLIC -std=c++17)
|
||||
endif()
|
||||
@@ -0,0 +1,6 @@
|
||||
#include <kernels.hh>
|
||||
|
||||
__global__ void Set(int* Ad, int val) {
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tx] = val;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
namespace HipTest {
|
||||
template <typename T> __global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < NELEM; i += stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
# Common Tests
|
||||
set(TEST_SRC
|
||||
childMalloc.cc
|
||||
hipDeviceComputeCapabilityMproc.cc
|
||||
hipDeviceGetPCIBusIdMproc.cc
|
||||
hipDeviceTotalMemMproc.cc
|
||||
hipGetDeviceAttributeMproc.cc
|
||||
hipGetDeviceCountMproc.cc
|
||||
hipGetDevicePropertiesMproc.cc
|
||||
hipSetGetDeviceMproc.cc
|
||||
hipIpcMemAccessTest.cc
|
||||
hipMallocConcurrencyMproc.cc
|
||||
hipMemCoherencyTstMProc.cc
|
||||
hipIpcEventHandle.cc
|
||||
deviceAllocationMproc.cc
|
||||
hipNoGpuTsts.cc
|
||||
hipMemGetInfoMProc.cc
|
||||
)
|
||||
|
||||
if(UNIX)
|
||||
add_custom_target(dummy_kernel.code
|
||||
COMMAND ${CMAKE_CXX_COMPILER}
|
||||
--genco ${CMAKE_CURRENT_SOURCE_DIR}/dummy_kernel.cpp
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/../multiproc/dummy_kernel.code
|
||||
-I${HIP_PATH}/include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include
|
||||
--rocm-path=${ROCM_PATH})
|
||||
set_property(GLOBAL APPEND PROPERTY
|
||||
G_INSTALL_CUSTOM_TARGETS ${CMAKE_CURRENT_BINARY_DIR}/dummy_kernel.code)
|
||||
endif()
|
||||
|
||||
# the last argument linker libraries is required for this test but optional to the function
|
||||
if(HIP_PLATFORM MATCHES "nvidia")
|
||||
hip_add_exe_to_target(NAME MultiProc
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS nvrtc)
|
||||
set_target_properties(MultiProc PROPERTIES COMPILE_FLAGS -arch=sm_70)
|
||||
elseif(HIP_PLATFORM MATCHES "amd")
|
||||
hip_add_exe_to_target(NAME MultiProc
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS hiprtc)
|
||||
endif()
|
||||
|
||||
if(UNIX)
|
||||
add_dependencies(build_tests dummy_kernel.code)
|
||||
endif()
|
||||
@@ -0,0 +1,62 @@
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/wait.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
bool testMallocFromChild() {
|
||||
int fd[2];
|
||||
pid_t childpid;
|
||||
bool testResult = false;
|
||||
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
|
||||
childpid = fork();
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
|
||||
return testResult;
|
||||
|
||||
} else if (!childpid) { // Child
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
char* A_d = nullptr;
|
||||
hipError_t ret = hipMalloc(&A_d, 1024);
|
||||
|
||||
printf("hipMalloc returned : %s\n", hipGetErrorString(ret));
|
||||
if (ret == hipSuccess)
|
||||
testResult = true;
|
||||
else
|
||||
testResult = false;
|
||||
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("ChildMalloc") {
|
||||
auto res = testMallocFromChild();
|
||||
REQUIRE(res == true);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,348 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/wait.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
#define SIZE 2097152
|
||||
// GPU threads
|
||||
#define BLOCKSIZE 512
|
||||
#define GRIDSIZE 256
|
||||
|
||||
__device__ static char* dev_common_ptr = nullptr;
|
||||
|
||||
/**
|
||||
* This kernel allocates a memory chunk using malloc().
|
||||
*/
|
||||
static __global__ void kerTestDeviceMalloc(size_t size) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
if (myId == 0) {
|
||||
dev_common_ptr = reinterpret_cast<char*> (malloc(size));
|
||||
if (dev_common_ptr == nullptr) {
|
||||
printf("Device Allocation Failed! \n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel writes to the memory location allocated in kernel
|
||||
* kerTestDeviceMalloc or kerTestDeviceNew.
|
||||
*/
|
||||
static __global__ void kerTestDeviceWrite() {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
if (dev_common_ptr == nullptr) {
|
||||
printf("Device Allocation Failed! \n");
|
||||
return;
|
||||
}
|
||||
*(dev_common_ptr + myId) = SCHAR_MAX;
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel frees the memory chunk allocated in kernel
|
||||
* kerTestDeviceMalloc using free().
|
||||
*/
|
||||
static __global__ void kerTestDeviceFree(int *result) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
if (myId == 0) {
|
||||
if (dev_common_ptr != nullptr) {
|
||||
*result = 1;
|
||||
for (int idx = 0; idx < (BLOCKSIZE*GRIDSIZE); idx++) {
|
||||
if (*(dev_common_ptr + myId) != SCHAR_MAX) {
|
||||
*result = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(dev_common_ptr);
|
||||
} else {
|
||||
*result = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel allocates a memory chunk using new operator.
|
||||
*/
|
||||
static __global__ void kerTestDeviceNew(size_t size) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
if (myId == 0) {
|
||||
dev_common_ptr = new char[size];
|
||||
if (dev_common_ptr == nullptr) {
|
||||
printf("Device Allocation Failed! \n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel frees the memory chunk allocated in kernel
|
||||
* kerTestDeviceNew using delete operator.
|
||||
*/
|
||||
static __global__ void kerTestDeviceDelete(int *result) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
if (myId == 0) {
|
||||
if (dev_common_ptr != nullptr) {
|
||||
*result = 1;
|
||||
for (int idx = 0; idx < (BLOCKSIZE*GRIDSIZE); idx++) {
|
||||
if (*(dev_common_ptr + myId) != SCHAR_MAX) {
|
||||
*result = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
delete[] dev_common_ptr;
|
||||
} else {
|
||||
*result = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test device malloc()/new in both Parent and Child Process.
|
||||
* Allocate SIZE bytes in both parent and child process. Verify
|
||||
* the allocated size in both parent and child process.
|
||||
*/
|
||||
static bool testDeviceAllocMulProc(bool testmalloc) {
|
||||
int fd[2];
|
||||
pid_t childpid;
|
||||
bool testResult = false;
|
||||
size_t avail = 0, tot = 0;
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
// fork process
|
||||
childpid = fork();
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
int *result_d{nullptr};
|
||||
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
|
||||
// Allocate in parent
|
||||
if (testmalloc) {
|
||||
kerTestDeviceMalloc<<<1, 1>>>(SIZE);
|
||||
} else {
|
||||
kerTestDeviceNew<<<1, 1>>>(SIZE);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Check allocated memory size
|
||||
HIP_CHECK(hipMemGetInfo(&avail, &tot));
|
||||
if ((tot - avail) < SIZE) {
|
||||
// Clean up memory before return
|
||||
if (testmalloc) {
|
||||
kerTestDeviceFree<<<1, 1>>>(result_d);
|
||||
} else {
|
||||
kerTestDeviceDelete<<<1, 1>>>(result_d);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipFree(result_d));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
return false;
|
||||
}
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
// At this point the child process exits.
|
||||
// Ensure that device memory allocated from child is freed.
|
||||
HIP_CHECK(hipMemGetInfo(&avail, &tot));
|
||||
if ((tot - avail) < SIZE) {
|
||||
testResult = false;
|
||||
}
|
||||
if (testmalloc) {
|
||||
kerTestDeviceFree<<<1, 1>>>(result_d);
|
||||
} else {
|
||||
kerTestDeviceDelete<<<1, 1>>>(result_d);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipFree(result_d));
|
||||
} else if (!childpid) { // Child
|
||||
// Wait for hipDeviceSetLimit() completion in parent.
|
||||
int *result_d{nullptr};
|
||||
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
|
||||
close(fd[0]);
|
||||
// Allocate in child
|
||||
if (testmalloc) {
|
||||
kerTestDeviceMalloc<<<1, 1>>>(SIZE);
|
||||
} else {
|
||||
kerTestDeviceNew<<<1, 1>>>(SIZE);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Check allocated memory size
|
||||
HIP_CHECK(hipMemGetInfo(&avail, &tot));
|
||||
if ((tot - avail) < SIZE) {
|
||||
testResult = false;
|
||||
} else {
|
||||
testResult = true;
|
||||
}
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
if (testmalloc) {
|
||||
kerTestDeviceFree<<<1, 1>>>(result_d);
|
||||
} else {
|
||||
kerTestDeviceDelete<<<1, 1>>>(result_d);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipFree(result_d));
|
||||
exit(0);
|
||||
}
|
||||
return testResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test device malloc()/new, write and free()/delete[]
|
||||
* from both Parent and Child Process. From both Parent and
|
||||
* Child Process invoke the kernel to allocate memory, the
|
||||
* kernel to write to the allocated memory and a third kernel
|
||||
* to verify the memory contents and free it.
|
||||
*/
|
||||
static bool testDeviceMemMulProc(bool testmalloc) {
|
||||
int fd[2];
|
||||
bool testResult = false;
|
||||
pid_t childpid;
|
||||
int testResultChild = 0;
|
||||
size_t size = BLOCKSIZE*GRIDSIZE;
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
// fork process
|
||||
childpid = fork();
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
int *result_d{nullptr}, *result_h{nullptr};
|
||||
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
|
||||
result_h = reinterpret_cast<int*> (malloc(sizeof(int)));
|
||||
REQUIRE(result_h != nullptr);
|
||||
// Allocate in parent
|
||||
if (testmalloc) {
|
||||
kerTestDeviceMalloc<<<1, 1>>>(size);
|
||||
} else {
|
||||
kerTestDeviceNew<<<1, 1>>>(size);
|
||||
}
|
||||
// Write
|
||||
kerTestDeviceWrite<<<GRIDSIZE, BLOCKSIZE>>>();
|
||||
// Free
|
||||
if (testmalloc) {
|
||||
kerTestDeviceFree<<<1, 1>>>(result_d);
|
||||
} else {
|
||||
kerTestDeviceDelete<<<1, 1>>>(result_d);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
*result_h = 0;
|
||||
HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(int),
|
||||
hipMemcpyDefault));
|
||||
if (*result_h == 0) {
|
||||
testResult = false;
|
||||
} else {
|
||||
testResult = true;
|
||||
}
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &testResultChild, sizeof(int));
|
||||
if (testResultChild == 0) {
|
||||
testResult &= false;
|
||||
} else {
|
||||
testResult &= true;
|
||||
}
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
HIP_CHECK(hipFree(result_d));
|
||||
free(result_h);
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
} else if (!childpid) { // Child
|
||||
// Wait for hipDeviceSetLimit() completion in parent.
|
||||
close(fd[0]);
|
||||
int *result_d{nullptr}, *result_h{nullptr};
|
||||
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
|
||||
result_h = reinterpret_cast<int*> (malloc(sizeof(int)));
|
||||
REQUIRE(result_h != nullptr);
|
||||
// Allocate in child
|
||||
if (testmalloc) {
|
||||
kerTestDeviceMalloc<<<1, 1>>>(size);
|
||||
} else {
|
||||
kerTestDeviceNew<<<1, 1>>>(size);
|
||||
}
|
||||
// Write
|
||||
kerTestDeviceWrite<<<GRIDSIZE, BLOCKSIZE>>>();
|
||||
// Free
|
||||
if (testmalloc) {
|
||||
kerTestDeviceFree<<<1, 1>>>(result_d);
|
||||
} else {
|
||||
kerTestDeviceDelete<<<1, 1>>>(result_d);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
*result_h = 0;
|
||||
HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(int),
|
||||
hipMemcpyDefault));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], result_h, sizeof(int));
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
HIP_CHECK(hipFree(result_d));
|
||||
free(result_h);
|
||||
exit(0);
|
||||
}
|
||||
return testResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiprocess device side malloc test.
|
||||
*/
|
||||
TEST_CASE("Unit_deviceAllocation_Malloc_MultProcess") {
|
||||
auto res = testDeviceAllocMulProc(true);
|
||||
REQUIRE(res == true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiprocess device side new test.
|
||||
*/
|
||||
TEST_CASE("Unit_deviceAllocation_New_MultProcess") {
|
||||
auto res = testDeviceAllocMulProc(false);
|
||||
REQUIRE(res == true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiprocess device side malloc, write and free test.
|
||||
*/
|
||||
TEST_CASE("Unit_deviceAllocation_MallocFree_MultProcess") {
|
||||
auto res = testDeviceMemMulProc(true);
|
||||
REQUIRE(res == true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiprocess device side new, write and delete test.
|
||||
*/
|
||||
TEST_CASE("Unit_deviceAllocation_NewDelete_MultProcess") {
|
||||
auto res = testDeviceMemMulProc(false);
|
||||
REQUIRE(res == true);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
extern "C" __global__ void dummy_ker() {
|
||||
}
|
||||
@@ -0,0 +1,159 @@
|
||||
/*
|
||||
Copyright (c) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* hipDeviceComputeCapability tests
|
||||
* Scenario: Validate behavior of hipDeviceComputeCapability for masked devices
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
|
||||
#define MAX_SIZE 30
|
||||
#define VISIBLE_DEVICE 0
|
||||
|
||||
/**
|
||||
* Fetches Gpu device count
|
||||
*/
|
||||
static void getDeviceCount(int *pdevCnt) {
|
||||
int fd[2], val = 0;
|
||||
pid_t childpid;
|
||||
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
#endif
|
||||
|
||||
childpid = fork();
|
||||
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &val, sizeof(val));
|
||||
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
|
||||
*pdevCnt = val;
|
||||
} else if (!childpid) { // Child
|
||||
int devCnt = 1;
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &devCnt, sizeof(devCnt));
|
||||
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else { // failure
|
||||
*pdevCnt = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs test on masked devices
|
||||
*/
|
||||
bool runMaskedDeviceTest(int actualNumGPUs) {
|
||||
bool testResult = true;
|
||||
int fd[2];
|
||||
pipe(fd);
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
hipError_t err;
|
||||
char visibleDeviceString[MAX_SIZE] = {};
|
||||
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
HIP_CHECK(hipInit(0));
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
#endif
|
||||
|
||||
for (int count = 1;
|
||||
count < actualNumGPUs; count++) {
|
||||
int major, minor;
|
||||
err = hipDeviceComputeCapability(&major, &minor, count);
|
||||
if (err == hipSuccess) {
|
||||
testResult = false;
|
||||
} else {
|
||||
printf("hipDeviceComputeCapability: Error Code Returned: '%s'(%d)\n",
|
||||
hipGetErrorString(err), err);
|
||||
}
|
||||
}
|
||||
close(fd[0]);
|
||||
printf("testResult = %d \n", testResult);
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
|
||||
} else if (cPid > 0) { // parent
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
|
||||
} else {
|
||||
printf("fork() failed\n");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
return testResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate behavior of hipDeviceComputeCapability for masked devices.
|
||||
*/
|
||||
TEST_CASE("Unit_hipDeviceGet_MaskedDevices") {
|
||||
int count = -1;
|
||||
constexpr int ReqGPUs = 2;
|
||||
bool ret;
|
||||
|
||||
getDeviceCount(&count);
|
||||
|
||||
if (count >= ReqGPUs) {
|
||||
ret = runMaskedDeviceTest(count);
|
||||
REQUIRE(ret == true);
|
||||
} else {
|
||||
SUCCEED("Not enough GPUs to run the masked GPU tests");
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __linux__
|
||||
@@ -0,0 +1,258 @@
|
||||
/*
|
||||
* Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Tests to
|
||||
* 1. Compare {pciDomainID, pciBusID, pciDeviceID} values
|
||||
* hipDeviceGetPCIBusId vs lspci
|
||||
* 2. Validate behavior of hipDeviceGetPCIBusId for masked devices
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#define MAX_DEVICE_LENGTH 20
|
||||
#define MAX_SIZE 30
|
||||
#define VISIBLE_DEVICE 0
|
||||
|
||||
namespace hipDeviceGetPCIBusIdTests {
|
||||
|
||||
/**
|
||||
* Fetches Gpu device count
|
||||
*/
|
||||
void getDeviceCount(int *pdevCnt) {
|
||||
int fd[2], val = 0;
|
||||
pid_t childpid;
|
||||
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
#endif
|
||||
|
||||
childpid = fork();
|
||||
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &val, sizeof(val));
|
||||
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
|
||||
*pdevCnt = val;
|
||||
} else if (!childpid) { // Child
|
||||
int devCnt = 1;
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &devCnt, sizeof(devCnt));
|
||||
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else { // failure
|
||||
*pdevCnt = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs test on masked devices
|
||||
*/
|
||||
bool testWithMaskedDevices(int actualNumGPUs) {
|
||||
bool testResult = true;
|
||||
int fd[2];
|
||||
pipe(fd);
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
hipError_t err;
|
||||
char pciBusId[MAX_DEVICE_LENGTH];
|
||||
char visibleDeviceString[MAX_SIZE] = {};
|
||||
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
HIP_CHECK(hipInit(0));
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
#endif
|
||||
|
||||
for (int count = 1;
|
||||
count < actualNumGPUs; count++) {
|
||||
err = hipDeviceGetPCIBusId(pciBusId, MAX_DEVICE_LENGTH, count);
|
||||
if (err == hipSuccess) {
|
||||
testResult &= false;
|
||||
} else {
|
||||
printf("hipGetDeviceProperties: Error Code Returned: '%s'(%d)\n",
|
||||
hipGetErrorString(err), err);
|
||||
}
|
||||
}
|
||||
close(fd[0]);
|
||||
printf("testResult = %d \n", testResult);
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
|
||||
} else if (cPid > 0) { // parent
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
|
||||
} else {
|
||||
printf("fork() failed\n");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
return testResult;
|
||||
}
|
||||
|
||||
|
||||
bool getPciBusId(int deviceCount,
|
||||
char **hipDeviceList) {
|
||||
for (int i = 0; i < deviceCount; i++) {
|
||||
HIP_CHECK(hipDeviceGetPCIBusId(hipDeviceList[i], MAX_DEVICE_LENGTH, i));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace hipDeviceGetPCIBusIdTests
|
||||
|
||||
|
||||
/**
|
||||
* Scenario: Validate behavior of hipDeviceGetPCIBusId for masked devices.
|
||||
*/
|
||||
TEST_CASE("Unit_hipDeviceGetPCIBusId_MaskedDevices") {
|
||||
int count = -1;
|
||||
constexpr int ReqGPUs = 2;
|
||||
bool ret;
|
||||
|
||||
hipDeviceGetPCIBusIdTests::getDeviceCount(&count);
|
||||
|
||||
if (count >= ReqGPUs) {
|
||||
ret = hipDeviceGetPCIBusIdTests::testWithMaskedDevices(count);
|
||||
REQUIRE(ret == true);
|
||||
} else {
|
||||
SUCCEED("Not enough GPUs to run the masked GPU tests");
|
||||
}
|
||||
}
|
||||
|
||||
/* Compare {pciDomainID, pciBusID, pciDeviceID} values
|
||||
* hipDeviceGetPCIBusId vs lspci
|
||||
*/
|
||||
TEST_CASE("Unit_hipDeviceGetPCIBusId_CheckPciBusIDWithLspci") {
|
||||
FILE *fpipe;
|
||||
{
|
||||
// Check if lspci is installed, if not, don't proceed
|
||||
char const *cmd = "lspci --version";
|
||||
char *lspciCheck{nullptr};
|
||||
constexpr auto MaxLen = 50;
|
||||
char temp[MaxLen]{};
|
||||
|
||||
fpipe = popen(cmd, "r");
|
||||
REQUIRE_FALSE(fpipe == nullptr);
|
||||
|
||||
lspciCheck = fgets(temp, MaxLen, fpipe);
|
||||
pclose(fpipe);
|
||||
|
||||
if (lspciCheck == nullptr) {
|
||||
WARN("Skipping test as lspci is not found in system");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int deviceCount = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&deviceCount));
|
||||
REQUIRE_FALSE(deviceCount == 0);
|
||||
// Allocate an array of pointer to characters
|
||||
char **hipDeviceList = new char*[deviceCount];
|
||||
REQUIRE_FALSE(hipDeviceList == nullptr);
|
||||
char **pciDeviceList = new char*[deviceCount];
|
||||
REQUIRE_FALSE(pciDeviceList == nullptr);
|
||||
for (int i = 0; i < deviceCount; i++) {
|
||||
hipDeviceList[i] = new char[MAX_DEVICE_LENGTH];
|
||||
REQUIRE_FALSE(hipDeviceList[i] == nullptr);
|
||||
pciDeviceList[i] = new char[MAX_DEVICE_LENGTH];
|
||||
REQUIRE_FALSE(pciDeviceList[i] == nullptr);
|
||||
}
|
||||
|
||||
hipDeviceGetPCIBusIdTests::getPciBusId(deviceCount, hipDeviceList);
|
||||
char const *command = nullptr;
|
||||
// Get lspci device list and compare with hip device list
|
||||
if ((TestContext::get()).isNvidia()) {
|
||||
command = "lspci -D | grep controller | grep NVIDIA | "
|
||||
"cut -d ' ' -f 1";
|
||||
} else {
|
||||
command = "lspci -D | grep -e controller -e accelerator | grep AMD/ATI | "
|
||||
"cut -d ' ' -f 1";
|
||||
}
|
||||
fpipe = popen(command, "r");
|
||||
REQUIRE_FALSE(fpipe == nullptr);
|
||||
|
||||
int index = 0;
|
||||
int deviceMatchCount = 0;
|
||||
constexpr auto cmpLen = 10;
|
||||
while (fgets(pciDeviceList[index], MAX_DEVICE_LENGTH, fpipe)) {
|
||||
bool bMatchFound = false;
|
||||
for (int deviceNo = 0; deviceNo < deviceCount; deviceNo++) {
|
||||
if (!strncasecmp(pciDeviceList[index], hipDeviceList[deviceNo],
|
||||
cmpLen)) {
|
||||
deviceMatchCount++;
|
||||
bMatchFound = true;
|
||||
}
|
||||
}
|
||||
if (bMatchFound == false) {
|
||||
printf("PCI device: %s is not reported by HIP\n",
|
||||
pciDeviceList[index]);
|
||||
}
|
||||
index++;
|
||||
if (index >= deviceCount) break;
|
||||
}
|
||||
// Deallocate
|
||||
for (int i = 0; i < deviceCount; i++) {
|
||||
delete hipDeviceList[i];
|
||||
}
|
||||
delete[] hipDeviceList;
|
||||
for (int i = 0; i < deviceCount; i++) {
|
||||
delete pciDeviceList[i];
|
||||
}
|
||||
delete[] pciDeviceList;
|
||||
pclose(fpipe);
|
||||
|
||||
REQUIRE(deviceMatchCount == deviceCount);
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
/**
|
||||
* @addtogroup hipDeviceTotalMem hipDeviceTotalMem
|
||||
* @{
|
||||
* @ingroup DriverTest
|
||||
*/
|
||||
|
||||
#define MAX_SIZE 30
|
||||
#define VISIBLE_DEVICE 0
|
||||
|
||||
/**
|
||||
* Fetches Gpu device count
|
||||
*/
|
||||
static void getDeviceCount(int *pdevCnt) {
|
||||
int fd[2], val = 0;
|
||||
pid_t childpid;
|
||||
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
#endif
|
||||
|
||||
childpid = fork();
|
||||
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &val, sizeof(val));
|
||||
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
|
||||
*pdevCnt = val;
|
||||
} else if (!childpid) { // Child
|
||||
int devCnt = 1;
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &devCnt, sizeof(devCnt));
|
||||
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else { // failure
|
||||
*pdevCnt = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Func tries to fetch total memory of masked devices and returns pass/fail.
|
||||
*/
|
||||
static bool getTotalMemoryOfMaskedDevices(int actualNumGPUs) {
|
||||
bool testResult = true;
|
||||
int fd[2];
|
||||
|
||||
pipe(fd);
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
hipError_t err;
|
||||
char visibleDeviceString[MAX_SIZE] = {};
|
||||
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
HIP_CHECK(hipInit(0));
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
#endif
|
||||
|
||||
for (int count = 1;
|
||||
count < actualNumGPUs; count++) {
|
||||
size_t totMem;
|
||||
err = hipDeviceTotalMem(&totMem, count);
|
||||
if (err == hipSuccess) {
|
||||
testResult &= false;
|
||||
} else {
|
||||
printf("hipDeviceTotalMem: Error Code Returned: '%s'(%d)\n",
|
||||
hipGetErrorString(err), err);
|
||||
}
|
||||
}
|
||||
close(fd[0]);
|
||||
printf("testResult = %d \n", testResult);
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
|
||||
} else if (cPid > 0) { // parent
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
|
||||
} else {
|
||||
printf("fork() failed\n");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
return testResult;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Check that total memory is returned correctly when
|
||||
* the devices are masked.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/multiproc/hipDeviceTotalMemMproc.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device test
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipDeviceTotalMem_MaskedDevices") {
|
||||
int count = -1;
|
||||
constexpr int ReqGPUs = 2;
|
||||
bool ret;
|
||||
|
||||
getDeviceCount(&count);
|
||||
|
||||
if (count >= ReqGPUs) {
|
||||
ret = getTotalMemoryOfMaskedDevices(count);
|
||||
REQUIRE(ret == true);
|
||||
} else {
|
||||
SUCCEED("Not enough GPUs to run the masked GPU tests");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group hipDeviceTotalMem.
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,164 @@
|
||||
/*
|
||||
Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* hipGetDeviceAttribute tests
|
||||
* Scenario: Validate behavior of hipGetDeviceAttribute for masked devices.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <iostream>
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#define MAX_SIZE 30
|
||||
#define VISIBLE_DEVICE 0
|
||||
|
||||
/**
|
||||
* Fetches Gpu device count
|
||||
*/
|
||||
static void getDeviceCount(int *pdevCnt) {
|
||||
int fd[2], val = 0;
|
||||
pid_t childpid;
|
||||
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
#endif
|
||||
|
||||
childpid = fork();
|
||||
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &val, sizeof(val));
|
||||
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
|
||||
*pdevCnt = val;
|
||||
} else if (!childpid) { // Child
|
||||
int devCnt = 1;
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &devCnt, sizeof(devCnt));
|
||||
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else { // failure
|
||||
*pdevCnt = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tries to fetch device attribute of masked devices and returns pass/fail.
|
||||
*/
|
||||
static bool validateGetAttributeOfMaskedDevices(int actualNumGPUs) {
|
||||
bool testResult = true;
|
||||
int fd[2];
|
||||
|
||||
pipe(fd);
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
hipError_t err;
|
||||
char visibleDeviceString[MAX_SIZE] = {};
|
||||
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
HIP_CHECK(hipInit(0));
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
#endif
|
||||
|
||||
for (int count = 1;
|
||||
count < actualNumGPUs; count++) {
|
||||
int pi = -1;
|
||||
err = hipDeviceGetAttribute(&pi, hipDeviceAttributePciBusId, count);
|
||||
if (err == hipSuccess) {
|
||||
testResult &= false;
|
||||
} else {
|
||||
printf("hipDeviceGetAttribute: Error Code Returned: '%s'(%d)\n",
|
||||
hipGetErrorString(err), err);
|
||||
}
|
||||
}
|
||||
close(fd[0]);
|
||||
printf("testResult = %d \n", testResult);
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
|
||||
} else if (cPid > 0) { // parent
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
|
||||
} else {
|
||||
printf("fork() failed\n");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
return testResult;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Scenario: Validate behavior of hipDeviceGetAttribute for masked devices.
|
||||
*/
|
||||
TEST_CASE("Unit_hipDeviceGetAttribute_MaskedDevices") {
|
||||
int count = -1;
|
||||
constexpr int ReqGPUs = 2;
|
||||
bool ret;
|
||||
|
||||
getDeviceCount(&count);
|
||||
|
||||
if (count >= ReqGPUs) {
|
||||
ret = validateGetAttributeOfMaskedDevices(count);
|
||||
REQUIRE(ret == true);
|
||||
} else {
|
||||
SUCCEED("Not enough GPUs to run the masked GPU tests");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* hipGetDeviceCount tests
|
||||
* Scenario: Validates the value of numDevices when devices are hidden.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#define MAX_SIZE 30
|
||||
#define VISIBLE_DEVICE 0
|
||||
|
||||
/**
|
||||
* Validate behavior of hipGetDeviceCount for masked devices.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGetDeviceCount_MaskedDevices") {
|
||||
int numDevices = 0;
|
||||
char visibleDeviceString[MAX_SIZE] = {};
|
||||
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
#endif
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
REQUIRE(numDevices == 1);
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,165 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Scenario: Validate behavior of hipGetDeviceProperties for masked devices.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
|
||||
#define MAX_SIZE 30
|
||||
#define VISIBLE_DEVICE 0
|
||||
|
||||
/**
|
||||
* Fetches Gpu device count
|
||||
*/
|
||||
static void getDeviceCount(int *pdevCnt) {
|
||||
int fd[2], val = 0;
|
||||
pid_t childpid;
|
||||
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
#endif
|
||||
|
||||
childpid = fork();
|
||||
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &val, sizeof(val));
|
||||
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
|
||||
*pdevCnt = val;
|
||||
} else if (!childpid) { // Child
|
||||
int devCnt = 1;
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &devCnt, sizeof(devCnt));
|
||||
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else { // failure
|
||||
*pdevCnt = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Tries to fetch device properties of masked devices and returns pass/fail.
|
||||
*/
|
||||
static bool validateGetPropsOfMaskedDevices(int actualNumGPUs) {
|
||||
bool testResult = true;
|
||||
int fd[2];
|
||||
|
||||
pipe(fd);
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
hipError_t err;
|
||||
char visibleDeviceString[MAX_SIZE] = {};
|
||||
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
HIP_CHECK(hipInit(0));
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
#endif
|
||||
|
||||
for (int count = 1;
|
||||
count < actualNumGPUs; count++) {
|
||||
hipDeviceProp_t prop;
|
||||
err = hipGetDeviceProperties(&prop, count);
|
||||
if (err == hipSuccess) {
|
||||
testResult &= false;
|
||||
} else {
|
||||
printf("hipGetDeviceProperties: Error Code Returned: '%s'(%d)\n",
|
||||
hipGetErrorString(err), err);
|
||||
}
|
||||
}
|
||||
close(fd[0]);
|
||||
printf("testResult = %d \n", testResult);
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
|
||||
} else if (cPid > 0) { // parent
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
|
||||
} else {
|
||||
printf("fork() failed\n");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
return testResult;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Scenario: Validate behavior of hipGetDeviceProperties for masked devices.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGetDeviceProperties_MaskedDevices") {
|
||||
int count = -1;
|
||||
constexpr int ReqGPUs = 2;
|
||||
bool ret;
|
||||
|
||||
getDeviceCount(&count);
|
||||
|
||||
if (count >= ReqGPUs) {
|
||||
ret = validateGetPropsOfMaskedDevices(count);
|
||||
REQUIRE(ret == true);
|
||||
} else {
|
||||
SUCCEED("Not enough GPUs to run the masked GPU tests");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,443 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/**
|
||||
* @addtogroup hipIpcGetEventHandle hipIpcGetEventHandle
|
||||
* @{
|
||||
* @ingroup DeviceTest
|
||||
* `hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event)` -
|
||||
* Gets an opaque interprocess handle for an event.
|
||||
* This opaque handle may be copied into other processes and opened with hipIpcOpenEventHandle.
|
||||
*/
|
||||
|
||||
#define BUF_SIZE 4096
|
||||
#define MAX_DEVICES 16
|
||||
|
||||
|
||||
typedef struct ipcEventInfo {
|
||||
int device;
|
||||
pid_t pid;
|
||||
hipIpcEventHandle_t eventHandle;
|
||||
hipIpcMemHandle_t memHandle;
|
||||
} ipcEventInfo_t;
|
||||
|
||||
typedef struct ipcDevices {
|
||||
int count;
|
||||
int ordinals[MAX_DEVICES];
|
||||
} ipcDevices_t;
|
||||
|
||||
typedef struct ipcBarrier {
|
||||
int count;
|
||||
bool sense;
|
||||
bool allExit;
|
||||
} ipcBarrier_t;
|
||||
|
||||
/*
|
||||
Get device count and list down devices with
|
||||
P2P access with Device 0.
|
||||
*/
|
||||
void getDevices(ipcDevices_t *devices) {
|
||||
pid_t pid = fork();
|
||||
|
||||
if (!pid) {
|
||||
// HIP APIs are called in child process,
|
||||
// to avoid HIP Initialization in main process.
|
||||
int i, devCnt{};
|
||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||
|
||||
if (devCnt < 2) {
|
||||
devices->count = 0;
|
||||
WARN("Count less than expected number of devices");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
// Device 0
|
||||
devices->ordinals[0] = 0;
|
||||
devices->count = 1;
|
||||
|
||||
// Check possibility for peer accesses, relevant to our tests
|
||||
INFO("Checking GPU(s) for support of p2p memory access ");
|
||||
INFO("Between GPU0 and other GPU(s)");
|
||||
|
||||
int canPeerAccess_0i, canPeerAccess_i0;
|
||||
for (i = 1; i < devCnt; i++) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&canPeerAccess_0i, 0, i));
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&canPeerAccess_i0, i, 0));
|
||||
|
||||
if (canPeerAccess_0i * canPeerAccess_i0) {
|
||||
devices->ordinals[i] = i;
|
||||
INFO("Two-way peer access is available between GPU"
|
||||
<< devices->ordinals[0] <<" and GPU"
|
||||
<< devices->ordinals[devices->count]);
|
||||
devices->count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
} else {
|
||||
int status;
|
||||
waitpid(pid, &status, 0);
|
||||
HIP_ASSERT(!status);
|
||||
}
|
||||
}
|
||||
|
||||
static ipcBarrier_t *g_Barrier{};
|
||||
static bool g_procSense;
|
||||
static int g_processCnt;
|
||||
|
||||
/*
|
||||
Calling process waits for other processes to signal/complete.
|
||||
*/
|
||||
void processBarrier() {
|
||||
int newCount = __sync_add_and_fetch(&g_Barrier->count, 1);
|
||||
|
||||
if (newCount == g_processCnt) {
|
||||
g_Barrier->count = 0;
|
||||
g_Barrier->sense = !g_procSense;
|
||||
|
||||
} else {
|
||||
while (g_Barrier->sense == g_procSense) {
|
||||
if (!g_Barrier->allExit) {
|
||||
sched_yield();
|
||||
} else {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
g_procSense = !g_procSense;
|
||||
}
|
||||
|
||||
|
||||
__global__ void computeKernel(int *dst, int *src, int num) {
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
dst[idx] = src[idx] / num;
|
||||
}
|
||||
|
||||
/*
|
||||
* 1) Process 0 allocates buffer in GPU0 memory and exports the memory handle.
|
||||
* 2) Other processes opens memory handle of GPU0 memory, performs computation
|
||||
* and records event.
|
||||
* 3) Process 0 synchronizes event and validates the resulting buffer.
|
||||
*/
|
||||
void runMultiProcKernel(ipcEventInfo_t *shmEventInfo, int index) {
|
||||
int *d_ptr;
|
||||
int hData[BUF_SIZE]{};
|
||||
unsigned int seed = time(nullptr);
|
||||
|
||||
// Randomize data before computation
|
||||
for (int i = 0; i < BUF_SIZE; i++) {
|
||||
hData[i] = rand_r(&seed);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipSetDevice(shmEventInfo[index].device));
|
||||
|
||||
if (index == 0) {
|
||||
int h_results[BUF_SIZE * MAX_DEVICES];
|
||||
hipEvent_t event[MAX_DEVICES];
|
||||
|
||||
HIP_CHECK(hipMalloc(&d_ptr, BUF_SIZE * g_processCnt * sizeof(int)));
|
||||
HIP_CHECK(hipIpcGetMemHandle(&shmEventInfo[0].memHandle, d_ptr));
|
||||
HIP_CHECK(hipMemcpy(d_ptr, hData,
|
||||
BUF_SIZE * sizeof(int), hipMemcpyHostToDevice));
|
||||
|
||||
// Barrier 1: Process0 will wait for all processes to create event handles,
|
||||
// signals device memory creation.
|
||||
processBarrier();
|
||||
|
||||
for (int i = 1; i < g_processCnt; i++) {
|
||||
HIP_CHECK(hipIpcOpenEventHandle(&event[i], shmEventInfo[i].eventHandle));
|
||||
}
|
||||
|
||||
// Barrier 2: Process0 waits for kernels to be launched
|
||||
// and the events to be recorded.
|
||||
processBarrier();
|
||||
|
||||
for (int i = 1; i < g_processCnt; i++) {
|
||||
HIP_CHECK(hipEventSynchronize(event[i]));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(h_results, d_ptr + BUF_SIZE,
|
||||
BUF_SIZE * (g_processCnt - 1) * sizeof(int), hipMemcpyDeviceToHost));
|
||||
|
||||
// Barrier 3: Process0 signals event usage is done.
|
||||
processBarrier();
|
||||
HIP_CHECK(hipFree(d_ptr));
|
||||
for (int n = 1; n < g_processCnt; n++) {
|
||||
for (int i = 0; i < BUF_SIZE; i++) {
|
||||
if (hData[i]/(n + 1) != h_results[(n-1) * BUF_SIZE + i]) {
|
||||
WARN("Data validation error at index " << i << " n" << n);
|
||||
g_Barrier->allExit = true;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 1; i < g_processCnt; i++) {
|
||||
HIP_CHECK(hipEventDestroy(event[i]));
|
||||
}
|
||||
} else {
|
||||
hipEvent_t event;
|
||||
HIP_CHECK(hipEventCreateWithFlags(&event,
|
||||
hipEventDisableTiming | hipEventInterprocess));
|
||||
HIP_CHECK(hipIpcGetEventHandle(&shmEventInfo[index].eventHandle, event));
|
||||
|
||||
// Barrier 1 : wait until proc 0 initializes device memory,
|
||||
// signals event creation.
|
||||
processBarrier();
|
||||
HIP_CHECK(hipIpcOpenMemHandle(reinterpret_cast<void **>(&d_ptr),
|
||||
shmEventInfo[0].memHandle,
|
||||
hipIpcMemLazyEnablePeerAccess));
|
||||
const dim3 threads(512, 1);
|
||||
const dim3 blocks(BUF_SIZE / threads.x, 1);
|
||||
hipLaunchKernelGGL(computeKernel, dim3(blocks), dim3(threads), 0, 0,
|
||||
d_ptr + index *BUF_SIZE, d_ptr, index + 1);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipEventRecord(event));
|
||||
|
||||
// Barrier 2 : Signals that event is recorded
|
||||
processBarrier();
|
||||
HIP_CHECK(hipIpcCloseMemHandle(d_ptr));
|
||||
|
||||
// Barrier 3 : wait for all the events to be used up by processes
|
||||
processBarrier();
|
||||
HIP_CHECK(hipEventDestroy(event));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate use case of event handle along with memory handle
|
||||
* across multiple processes with complex scenario.
|
||||
* - Utilizes synchronization of processes and events.
|
||||
* - Lauches kernels and validates computation results.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/multiproc/hipIpcEventHandle.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipIpcEventHandle_Functional") {
|
||||
ipcDevices_t *shmDevices;
|
||||
ipcEventInfo_t *shmEventInfo;
|
||||
shmDevices = reinterpret_cast<ipcDevices_t *> (mmap(NULL, sizeof(*shmDevices),
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0));
|
||||
REQUIRE(MAP_FAILED != shmDevices);
|
||||
|
||||
getDevices(shmDevices);
|
||||
|
||||
if (shmDevices->count < 2) {
|
||||
WARN("Test requires atleast two GPUs with P2P access. Skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
g_processCnt = (shmDevices->count > MAX_DEVICES) ? MAX_DEVICES : shmDevices->count;
|
||||
|
||||
// Barrier is used to synchronize processes created.
|
||||
g_Barrier = reinterpret_cast<ipcBarrier_t *> (mmap(NULL, sizeof(*g_Barrier),
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0));
|
||||
REQUIRE(MAP_FAILED != g_Barrier);
|
||||
memset(g_Barrier, 0, sizeof(*g_Barrier));
|
||||
|
||||
// set local barrier sense flag
|
||||
g_procSense = 0;
|
||||
|
||||
// shared memory for Event and memHandle Info
|
||||
shmEventInfo = reinterpret_cast<ipcEventInfo_t *>(mmap(NULL,
|
||||
g_processCnt * sizeof(*shmEventInfo),
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0));
|
||||
REQUIRE(MAP_FAILED != shmEventInfo);
|
||||
|
||||
// initialize shared memory
|
||||
memset(shmEventInfo, 0, g_processCnt * sizeof(*shmEventInfo));
|
||||
|
||||
int index = 0;
|
||||
|
||||
for (int i = 1; i < g_processCnt; i++) {
|
||||
int pid = fork();
|
||||
|
||||
if (!pid) {
|
||||
index = i;
|
||||
break;
|
||||
} else {
|
||||
shmEventInfo[i].pid = pid;
|
||||
}
|
||||
}
|
||||
|
||||
shmEventInfo[index].device = shmDevices->ordinals[index];
|
||||
|
||||
// Run the test
|
||||
runMultiProcKernel(shmEventInfo, index);
|
||||
|
||||
// Cleanup
|
||||
if (index == 0) {
|
||||
for (int i = 1; i < g_processCnt; i++) {
|
||||
int status;
|
||||
waitpid(shmEventInfo[i].pid, &status, 0);
|
||||
HIP_ASSERT(WIFEXITED(status));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates handling of invalid arguments for
|
||||
* [hipIpcGetEventHandle](@ref hipIpcGetEventHandle):
|
||||
* -# When pointer to the event handle is `nullptr`
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* -# When pointer to the event is `nullptr`
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* -# When both pointers are `nullptr`
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* -# When event is not valid
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* -# When event is created without interprocess flag
|
||||
* - Expected output: return `hipErrorInvalidResourceHandle` or `hipErrorInvalidConfiguration`
|
||||
* -# When event is created without flags
|
||||
* - Expected output: return `hipErrorInvalidResourceHandle`
|
||||
* - Validates handling of invalid arguments for
|
||||
* [hipIpcOpenEventHandle](@ref hipIpcOpenEventHandle)
|
||||
* -# When pointer to the event is `nullptr`
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* -# When pointer to the event handle is `nullptr`
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* -# When attemted to open handle in the process that created it
|
||||
* - Expected output: return `hipErrorInvalidContext`
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/multiproc/hipIpcEventHandle.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Host specific (LINUX)
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipIpcEventHandle_ParameterValidation") {
|
||||
hipEvent_t event;
|
||||
hipIpcEventHandle_t eventHandle;
|
||||
hipError_t ret;
|
||||
HIP_CHECK(hipEventCreateWithFlags(&event,
|
||||
hipEventDisableTiming | hipEventInterprocess));
|
||||
#if HT_AMD
|
||||
// Test disabled for nvidia due to segfault with cuda api
|
||||
SECTION("Get event handle with eventHandle(nullptr)") {
|
||||
ret = hipIpcGetEventHandle(nullptr, event);
|
||||
REQUIRE(ret == hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("Get event handle with event(nullptr)") {
|
||||
ret = hipIpcGetEventHandle(&eventHandle, nullptr);
|
||||
REQUIRE(ret == hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Get event handle with handle == nullptr and event == nullptr") {
|
||||
HIP_CHECK_ERROR(hipIpcGetEventHandle(nullptr, nullptr), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Get event handle with invalid event object") {
|
||||
hipEvent_t eventUninit{};
|
||||
ret = hipIpcGetEventHandle(&eventHandle, eventUninit);
|
||||
REQUIRE(ret == hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Get event handle for event allocated without Interprocess flag") {
|
||||
hipEvent_t eventNoIpc;
|
||||
HIP_CHECK(hipEventCreateWithFlags(&eventNoIpc, hipEventDisableTiming));
|
||||
|
||||
ret = hipIpcGetEventHandle(&eventHandle, eventNoIpc);
|
||||
if ((ret != hipErrorInvalidResourceHandle) &&
|
||||
(ret != hipErrorInvalidConfiguration)) {
|
||||
INFO("Error returned : " << ret);
|
||||
REQUIRE(false);
|
||||
}
|
||||
HIP_CHECK(hipEventDestroy(eventNoIpc));
|
||||
}
|
||||
|
||||
SECTION("Open event handle with event(nullptr)") {
|
||||
hipIpcEventHandle_t ipc_handle{};
|
||||
ret = hipIpcOpenEventHandle(nullptr, ipc_handle);
|
||||
REQUIRE(ret == hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Open event handle with eventHandle as invalid") {
|
||||
hipIpcEventHandle_t ipc_handle{};
|
||||
hipEvent_t eventOut;
|
||||
ret = hipIpcOpenEventHandle(&eventOut, ipc_handle);
|
||||
if ((ret != hipErrorInvalidValue) && (ret != hipErrorMapFailed)) {
|
||||
INFO("Error returned : " << ret);
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Open handle in process that created it") {
|
||||
hipIpcEventHandle_t event_handle;
|
||||
hipEvent_t event1, event2;
|
||||
HIP_CHECK(hipEventCreateWithFlags(&event1, hipEventDisableTiming | hipEventInterprocess));
|
||||
HIP_CHECK(hipIpcGetEventHandle(&event_handle, event1));
|
||||
HIP_CHECK_ERROR(hipIpcOpenEventHandle(&event2, event_handle), hipErrorInvalidContext);
|
||||
HIP_CHECK(hipEventDestroy(event1));
|
||||
}
|
||||
|
||||
// Disabled on AMD because of return value mismatch - EXSWHTEC-41
|
||||
#if HT_NVIDIA
|
||||
SECTION("Event created with no flags") {
|
||||
hipEvent_t event;
|
||||
hipIpcEventHandle_t event_handle;
|
||||
|
||||
HIP_CHECK(hipEventCreate(&event));
|
||||
HIP_CHECK_ERROR(hipIpcGetEventHandle(&event_handle, event), hipErrorInvalidResourceHandle);
|
||||
HIP_CHECK(hipEventDestroy(event));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group hipIpcGetEventHandle.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup hipIpcOpenEventHandle hipIpcOpenEventHandle
|
||||
* @{
|
||||
* @ingroup DeviceTest
|
||||
* `hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle)` -
|
||||
* Opens an interprocess event handles.
|
||||
* Opens an interprocess event handle exported from another process with hipIpcGetEventHandle.
|
||||
* ________________________
|
||||
* Test cases from other modules:
|
||||
* - @ref Unit_hipIpcEventHandle_Functional
|
||||
* - @ref Unit_hipIpcEventHandle_ParameterValidation
|
||||
*/
|
||||
|
||||
/**
|
||||
* End doxygen group hipIpcOpenEventHandle.
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,281 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
#include <fcntl.h>
|
||||
#include <semaphore.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/**
|
||||
* @addtogroup hipIpcOpenMemHandle hipIpcOpenMemHandle
|
||||
* @{
|
||||
* @ingroup DeviceTest
|
||||
* `hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags)` -
|
||||
* Opens an interprocess memory handle exported from another process
|
||||
* and returns a device pointer usable in the local process.
|
||||
*/
|
||||
|
||||
#define NUM_ELMTS 1024
|
||||
#define NUM_THREADS 10
|
||||
|
||||
|
||||
typedef struct mem_handle {
|
||||
int device;
|
||||
hipIpcMemHandle_t memHandle;
|
||||
bool IfTestPassed;
|
||||
} hip_ipc_t;
|
||||
|
||||
|
||||
|
||||
// This testcase verifies the hipIpcMemAccess APIs as follows
|
||||
// The following program spawns a child process and does the following
|
||||
// Parent iterate through each device, create memory -- create hipIpcMemhandle
|
||||
// stores the mem handle in mmaped memory, release the child using sem_post()
|
||||
// and wait for child to release itself(parent process)
|
||||
// child process:
|
||||
// Child process get the ipc mem handle using hipIpcOpenMemHandle
|
||||
// Iterate through all the available gpus and do Device to Device copies
|
||||
// and check for data consistencies and close the hipIpcCloseMemHandle
|
||||
// release the parent and wait for parent to release itself(child)
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verifies that getting and opening mem handle works correctly
|
||||
* in specific scenarion, and handles the case when the same device
|
||||
* is used in both processes.
|
||||
* - Creates memory from the parent process for each device.
|
||||
* - Spawns child process and waits for it to finish.
|
||||
* - Child process gets the handle and check data consistencies.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/multiproc/hipIpcMemAccessTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Host specific (LINUX)
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipIpcMemAccess_Semaphores") {
|
||||
hip_ipc_t *shrd_mem = NULL;
|
||||
pid_t pid;
|
||||
size_t N = 1024;
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr};
|
||||
int *A_h{nullptr}, *C_h{nullptr};
|
||||
sem_t *sem_ob1{nullptr}, *sem_ob2{nullptr};
|
||||
int Num_devices = 0, CanAccessPeer = 0;
|
||||
|
||||
std::string cmd_line = "rm -rf /dev/shm/sem.my-sem-object*";
|
||||
int res = system(cmd_line.c_str());
|
||||
REQUIRE(res != -1);
|
||||
sem_ob1 = sem_open("/my-sem-object1", O_CREAT|O_EXCL, 0660, 0);
|
||||
sem_ob2 = sem_open("/my-sem-object2", O_CREAT|O_EXCL, 0660, 0);
|
||||
REQUIRE(sem_ob1 != SEM_FAILED);
|
||||
REQUIRE(sem_ob2 != SEM_FAILED);
|
||||
|
||||
shrd_mem = reinterpret_cast<hip_ipc_t *>(mmap(NULL, sizeof(hip_ipc_t),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS,
|
||||
0, 0));
|
||||
REQUIRE(shrd_mem != NULL);
|
||||
shrd_mem->IfTestPassed = true;
|
||||
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h, N, false);
|
||||
pid = fork();
|
||||
if (pid != 0) {
|
||||
// Parent process
|
||||
HIP_CHECK(hipGetDeviceCount(&Num_devices));
|
||||
for (int i = 0; i < Num_devices; ++i) {
|
||||
if (shrd_mem->IfTestPassed == true) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipIpcGetMemHandle(reinterpret_cast<hipIpcMemHandle_t *>
|
||||
(&shrd_mem->memHandle),
|
||||
A_d));
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
shrd_mem->device = i;
|
||||
if ((sem_post(sem_ob1)) == -1) {
|
||||
// Need to use inline function to release resources.
|
||||
shrd_mem->IfTestPassed = false;
|
||||
WARN("sem_post() call failed in parent process.");
|
||||
}
|
||||
if ((sem_wait(sem_ob2)) == -1) {
|
||||
shrd_mem->IfTestPassed = false;
|
||||
WARN("sem_wait() call failed in parent process.");
|
||||
}
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Child process
|
||||
HIP_CHECK(hipGetDeviceCount(&Num_devices));
|
||||
for (int j = 0; j < Num_devices; ++j) {
|
||||
HIP_CHECK(hipSetDevice(j));
|
||||
if ((sem_wait(sem_ob1)) == -1) {
|
||||
shrd_mem->IfTestPassed = false;
|
||||
WARN("sem_wait() call failed in child process.");
|
||||
if ((sem_post(sem_ob2)) == -1) {
|
||||
shrd_mem->IfTestPassed = false;
|
||||
WARN("sem_post() call on sem_ob2 failed");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < Num_devices; ++i) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&CanAccessPeer, i, shrd_mem->device));
|
||||
if (CanAccessPeer == 1) {
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
HIP_CHECK(hipIpcOpenMemHandle(reinterpret_cast<void **>(&B_d),
|
||||
shrd_mem->memHandle,
|
||||
hipIpcMemLazyEnablePeerAccess));
|
||||
HIP_CHECK(hipMemcpy(C_d, B_d, Nbytes, hipMemcpyDeviceToDevice));
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HipTest::checkTest<int>(A_h, C_h, N);
|
||||
memset(reinterpret_cast<void*>(C_h), 0, Nbytes);
|
||||
// Checking if the data obtained from Ipc shared memory is consistent
|
||||
HIP_CHECK(hipMemcpy(C_h, B_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HipTest::checkTest<int>(A_h, C_h, N);
|
||||
HIP_CHECK(hipIpcCloseMemHandle(reinterpret_cast<void*>(B_d)));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
}
|
||||
if ((sem_post(sem_ob2)) == -1) {
|
||||
shrd_mem->IfTestPassed = false;
|
||||
WARN("sem_post() call on sem_ob2 failed");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
if ((sem_unlink("/my-sem-object1")) == -1) {
|
||||
WARN("sem_unlink() call on /my-sem-object1 failed");
|
||||
}
|
||||
if ((sem_unlink("/my-sem-object2")) == -1) {
|
||||
WARN("sem_unlink() call on /my-sem-object2 failed");
|
||||
}
|
||||
int rFlag = 0;
|
||||
waitpid(pid, &rFlag, 0);
|
||||
REQUIRE(shrd_mem->IfTestPassed == true);
|
||||
HipTest::freeArrays<int>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates handling of valid and invalid arguments for
|
||||
* [hipIpcGetMemHandle](@ref hipIpcGetMemHandle):
|
||||
* -# When memory handle pointer is `nullptr`
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* -# When device pointer is `nullptr`
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* -# When both pointers are `nullptr`
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* -# When both pointers are valid
|
||||
* - Expected output: return `hipSuccess`
|
||||
* - Validates handling of valid and invalid arguments for
|
||||
* [hipIpcOpenMemHandle](@ref hipIpcOpenMemHandle):
|
||||
* -# When device pointer is `nullptr`
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* -# When memory handle pointer uninitialized
|
||||
* - Expected output: return `hipErrorInvalidValue` or `hipErrorInvalidDevicePointer`
|
||||
* -# When memory handle has random flags
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* - Validates handling of valid and invalid arguments for
|
||||
* [hipIpcCloseMemHandle](@ref hipIpcCloseMemHandle):
|
||||
* -# When device pointer is `nullptr`
|
||||
* - Expected output: return `hipErrorInvalidValue`
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/multiproc/hipIpcMemAccessTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Host specific (LINUX)
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipIpcMemAccess_ParameterValidation") {
|
||||
hipIpcMemHandle_t MemHandle;
|
||||
hipIpcMemHandle_t MemHandleUninit;
|
||||
void *Ad{}, *Ad2{};
|
||||
hipError_t ret;
|
||||
|
||||
HIP_CHECK(hipMalloc(&Ad, 1024));
|
||||
|
||||
#if HT_AMD
|
||||
// Test is disabled for nvidia as api resulting in seg fault.
|
||||
SECTION("Get mem handle with handle as nullptr") {
|
||||
ret = hipIpcGetMemHandle(nullptr, Ad);
|
||||
REQUIRE(ret == hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
SECTION("Get mem handle with devptr as nullptr") {
|
||||
ret = hipIpcGetMemHandle(&MemHandle, nullptr);
|
||||
REQUIRE(ret == hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Get mem handle with handle/devptr as nullptr") {
|
||||
ret = hipIpcGetMemHandle(nullptr, nullptr);
|
||||
REQUIRE(ret == hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Get mem handle with valid devptr") {
|
||||
ret = hipIpcGetMemHandle(&MemHandle, Ad);
|
||||
REQUIRE(ret == hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Open mem handle with devptr as nullptr") {
|
||||
ret = hipIpcOpenMemHandle(nullptr, MemHandle,
|
||||
hipIpcMemLazyEnablePeerAccess);
|
||||
REQUIRE(ret == hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Open mem handle with handle as un-initialized") {
|
||||
ret = hipIpcOpenMemHandle(&Ad2, MemHandleUninit,
|
||||
hipIpcMemLazyEnablePeerAccess);
|
||||
REQUIRE((ret == hipErrorInvalidValue || ret == hipErrorInvalidDevicePointer));
|
||||
}
|
||||
#if HT_AMD
|
||||
// Test is disabled for nvidia as api not returning expected value.
|
||||
SECTION("Open mem handle with flags as random value") {
|
||||
constexpr unsigned int flags = 123;
|
||||
HIP_CHECK(hipIpcGetMemHandle(&MemHandle, Ad));
|
||||
ret = hipIpcOpenMemHandle(&Ad2, MemHandle, flags);
|
||||
REQUIRE(ret == hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
SECTION("Close mem handle with devptr(nullptr)") {
|
||||
ret = hipIpcCloseMemHandle(nullptr);
|
||||
REQUIRE(ret == hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group hipIpcOpenMemHandle.
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
|
||||
1) Run hipMalloc() api/kernel code on same gpu parallely from parent and child
|
||||
processes, validate the results.
|
||||
|
||||
2) Execute hipMalloc() api simultaneously on all the gpus by spawning multiple
|
||||
child processes. Validate buffers allocated after running kernel code.
|
||||
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/wait.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/**
|
||||
* Fetches Gpu device count
|
||||
*/
|
||||
static void getDeviceCount(int* pdevCnt) {
|
||||
int fd[2], val = 0;
|
||||
pid_t childpid;
|
||||
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
#ifdef HT_NVIDIA
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
#endif
|
||||
|
||||
childpid = fork();
|
||||
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &val, sizeof(val));
|
||||
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
// wait for child exit
|
||||
wait(nullptr);
|
||||
|
||||
*pdevCnt = val;
|
||||
} else if (!childpid) { // Child
|
||||
int devCnt = 1;
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &devCnt, sizeof(devCnt));
|
||||
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else { // failure
|
||||
*pdevCnt = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates data consistency on supplied gpu
|
||||
*/
|
||||
static bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) {
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t prevAvl, prevTot, curAvl, curTot;
|
||||
bool TestPassed = true;
|
||||
constexpr auto N = 4 * 1024 * 1024;
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
|
||||
HIP_CHECK(hipSetDevice(gpu));
|
||||
HIP_CHECK(hipMemGetInfo(&prevAvl, &prevTot));
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
HIP_CHECK(hipMemGetInfo(&curAvl, &curTot));
|
||||
|
||||
if (!concurOnOneGPU && (prevAvl < curAvl || prevTot != curTot)) {
|
||||
//In concurrent calls on one GPU, we cannot verify leaking in this way
|
||||
printf("%s : Memory allocation mismatch observed."
|
||||
"Possible memory leak.\n", __func__);
|
||||
TestPassed &= false;
|
||||
}
|
||||
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, 0, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) {
|
||||
printf("Validation PASSED for gpu %d from pid %d\n", gpu, getpid());
|
||||
} else {
|
||||
printf("Validation FAILED for gpu %d from pid %d\n", gpu, getpid());
|
||||
TestPassed = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemGetInfo(&prevAvl, &prevTot));
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipMemGetInfo(&curAvl, &curTot));
|
||||
|
||||
if (!concurOnOneGPU && (curAvl < prevAvl || prevTot != curTot)) {
|
||||
// In concurrent calls on one GPU, we cannot verify leaking in this way
|
||||
UNSCOPED_INFO("validateMemoryOnGPU : Memory allocation mismatch observed."
|
||||
<< "Possible memory leak.");
|
||||
TestPassed = false;
|
||||
}
|
||||
|
||||
if (!concurOnOneGPU && (prevAvl != curAvl || prevTot != curTot)) {
|
||||
// In concurrent calls on one GPU, we cannot verify leaking in this way
|
||||
printf(
|
||||
"%s : Memory allocation mismatch observed."
|
||||
"Possible memory leak.\n",
|
||||
__func__);
|
||||
TestPassed = false;
|
||||
}
|
||||
|
||||
return TestPassed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parallel execution of parent and child on gpu0
|
||||
*/
|
||||
TEST_CASE("Unit_hipMalloc_ChildConcurrencyDefaultGpu") {
|
||||
int devCnt = 0, pid = 0;
|
||||
constexpr auto resSuccess = 1, resFailure = 2;
|
||||
bool TestPassed = true;
|
||||
|
||||
// Get GPU count
|
||||
getDeviceCount(&devCnt);
|
||||
REQUIRE(devCnt > 0);
|
||||
|
||||
if ((pid = fork()) < 0) {
|
||||
INFO("Child_Concurrency_DefaultGpu : fork() returned error : " << pid);
|
||||
HIP_ASSERT(false);
|
||||
|
||||
} else if (!pid) { // Child process
|
||||
bool TestPassedChild = false;
|
||||
|
||||
// Allocates and validates memory on Gpu0 simultaneously with parent
|
||||
TestPassedChild = validateMemoryOnGPU(0, true);
|
||||
|
||||
if (TestPassedChild) {
|
||||
exit(resSuccess); // child exit with success status
|
||||
} else {
|
||||
exit(resFailure); // child exit with failure status
|
||||
}
|
||||
|
||||
} else { // Parent process
|
||||
int exitStatus;
|
||||
|
||||
// Allocates and validates memory on Gpu0 simultaneously with child
|
||||
TestPassed = validateMemoryOnGPU(0, true);
|
||||
|
||||
// Wait and get result from child
|
||||
pid = wait(&exitStatus);
|
||||
if ((WEXITSTATUS(exitStatus) == resFailure) || (pid < 0))
|
||||
TestPassed = false;
|
||||
}
|
||||
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parallel execution of api on multiple gpus from
|
||||
* different child processes.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMalloc_ChildConcurrencyMultiGpu") {
|
||||
int devCnt = 0, pid = 0;
|
||||
constexpr auto resSuccess = 1, resFailure = 2;
|
||||
|
||||
// Get GPU count
|
||||
getDeviceCount(&devCnt);
|
||||
REQUIRE(devCnt > 0);
|
||||
|
||||
// Spawn child for each GPU
|
||||
for (int gpu = 0; gpu < devCnt; gpu++) {
|
||||
if ((pid = fork()) < 0) {
|
||||
INFO("Child_Concurrency_MultiGpu : fork() returned error : " << pid);
|
||||
REQUIRE(false);
|
||||
|
||||
} else if (!pid) { // Child process
|
||||
bool TestPassedChild = false;
|
||||
TestPassedChild = validateMemoryOnGPU(gpu, true);
|
||||
|
||||
if (TestPassedChild) {
|
||||
exit(resSuccess); // child exit with success status
|
||||
} else {
|
||||
exit(resFailure); // child exit with failure status
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parent shall wait for child to complete
|
||||
int passCnt = 0;
|
||||
for (int i = 0; i < devCnt; i++) {
|
||||
int pidwait = 0, exitStatus;
|
||||
pidwait = wait(&exitStatus);
|
||||
|
||||
printf("exitStatus for dev:%d is %d\n", i, WEXITSTATUS(exitStatus));
|
||||
if (pidwait < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (WEXITSTATUS(exitStatus) == resSuccess) passCnt++;
|
||||
}
|
||||
REQUIRE(passCnt == devCnt);
|
||||
}
|
||||
#endif // __linux__
|
||||
@@ -0,0 +1,519 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* Test Case Description:
|
||||
Scenario 3: The test validates if fine grain
|
||||
behavior is observed or not with memory allocated using malloc()
|
||||
Scenario 4: The test validates if coarse grain memory
|
||||
behavior is observed or not with memory allocated using malloc()
|
||||
Scenario 5: The test validates if fine memory
|
||||
behavior is observed or not with memory allocated using mmap()
|
||||
Scenario 6: The test validates if coarse grain memory
|
||||
behavior is observed or not with memory allocated using mmap()
|
||||
Scenario:7 Test Case Description: The following test checks if the memory is
|
||||
accessible when HIP_HOST_COHERENT is set to 0
|
||||
Scenario:8 Test Case Description: The following test checks if the memory
|
||||
exhibits fine grain behavior when HIP_HOST_COHERENT is set to 1
|
||||
*/
|
||||
|
||||
#ifdef __linux__
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_features.hh>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
#include <chrono>
|
||||
#include "../unit/memory/hipSVMCommon.h"
|
||||
|
||||
__global__ void CoherentTst(int *ptr, volatile unsigned int *expired) {
|
||||
// Incrementing the value by 1
|
||||
atomicAdd_system(ptr, 1);
|
||||
// The following while loop checks the value until expiration.
|
||||
while (*expired == 0) {
|
||||
if (atomicCAS_system(ptr, 3, 4) == 3) break;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void SquareKrnl(int *ptr) {
|
||||
// ptr value squared here
|
||||
*ptr = (*ptr) * (*ptr);
|
||||
}
|
||||
|
||||
// The function tests the coherency of allocated memory
|
||||
// Return false on failure, true on success.
|
||||
bool static TstCoherency(int *Ptr, bool HmmMem) {
|
||||
using namespace std::chrono_literals;
|
||||
int *Dptr = nullptr;
|
||||
hipStream_t strm;
|
||||
HIP_CHECK(hipStreamCreate(&strm));
|
||||
// storing value 1 in the memory created above
|
||||
*Ptr = 1;
|
||||
|
||||
unsigned int *expired = nullptr;
|
||||
HIP_CHECK(hipHostMalloc(&expired, sizeof(unsigned int))); // hipHostMallocCoherent by defaut
|
||||
*expired = 0;
|
||||
|
||||
if (!HmmMem) {
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void **>(&Dptr), Ptr, 0));
|
||||
CoherentTst<<<1, 1, 0, strm>>>(Dptr, expired);
|
||||
} else {
|
||||
CoherentTst<<<1, 1, 0, strm>>>(Ptr, expired);
|
||||
}
|
||||
// looping until the value is 2 for 3 seconds
|
||||
std::chrono::steady_clock::time_point start =
|
||||
std::chrono::steady_clock::now();
|
||||
while (std::chrono::duration_cast<std::chrono::seconds>(
|
||||
std::chrono::steady_clock::now() - start).count() < 3) {
|
||||
if (*Ptr == 2) {
|
||||
*Ptr += 1;
|
||||
std::this_thread::sleep_for(200ms); // Make sure kernel gets updated Dptr
|
||||
break;
|
||||
}
|
||||
}
|
||||
*expired = 1; // Notify kernel loop to exit
|
||||
HIP_CHECK(hipStreamSynchronize(strm));
|
||||
HIP_CHECK(hipStreamDestroy(strm));
|
||||
HIP_CHECK(hipHostFree(expired));
|
||||
|
||||
if (*Ptr == 4) {
|
||||
return true;
|
||||
}
|
||||
fprintf(stderr, "TstCoherency: *Ptr=%u\b", *Ptr);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Test case description: The following test validates if fine grain
|
||||
behavior is observed or not with memory allocated using malloc()*/
|
||||
// The following test is failing on Nvidia platform hence disabled it for now
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_malloc_CoherentTst") {
|
||||
CHECK_PCIE_ATOMICS_SUPPORT
|
||||
hipDeviceProp_t prop;
|
||||
HIPCHECK(hipGetDeviceProperties(&prop, 0));
|
||||
char *p = NULL;
|
||||
p = strstr(prop.gcnArchName, "xnack+");
|
||||
if (p) {
|
||||
// Test Case execution begins from here
|
||||
int managed = 0;
|
||||
HIPCHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
|
||||
0));
|
||||
if (managed == 1) {
|
||||
int *Ptr = nullptr, SIZE = sizeof(int);
|
||||
bool HmmMem = true;
|
||||
|
||||
// Allocating hipMallocManaged() memory
|
||||
Ptr = reinterpret_cast<int*>(malloc(SIZE));
|
||||
auto ret = TstCoherency(Ptr, HmmMem);
|
||||
free(Ptr);
|
||||
REQUIRE(ret);
|
||||
}
|
||||
} else {
|
||||
HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Test case description: The following test validates if coarse grain memory
|
||||
behavior is observed or not with memory allocated using malloc()*/
|
||||
// The following test is failing on Nvidia platform hence disabling it for now
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_malloc_CoherentTstWthAdvise") {
|
||||
hipDeviceProp_t prop;
|
||||
HIPCHECK(hipGetDeviceProperties(&prop, 0));
|
||||
char *p = NULL;
|
||||
p = strstr(prop.gcnArchName, "xnack+");
|
||||
if (p) {
|
||||
int managed = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
|
||||
0));
|
||||
if (managed == 1) {
|
||||
int *Ptr = nullptr, SIZE = sizeof(int);
|
||||
|
||||
// Allocating hipMallocManaged() memory
|
||||
Ptr = reinterpret_cast<int*>(malloc(SIZE));
|
||||
*Ptr = 4;
|
||||
hipStream_t strm;
|
||||
HIP_CHECK(hipStreamCreate(&strm));
|
||||
SquareKrnl<<<1, 1, 0, strm>>>(Ptr);
|
||||
HIP_CHECK(hipStreamSynchronize(strm));
|
||||
HIP_CHECK(hipStreamDestroy(strm));
|
||||
REQUIRE (*Ptr == 16);
|
||||
}
|
||||
} else {
|
||||
HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Test case description: The following test validates if fine memory
|
||||
behavior is observed or not with memory allocated using mmap()*/
|
||||
// The following test is failing on Nvidia platform hence disabling it for now
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_mmap_CoherentTst") {
|
||||
CHECK_PCIE_ATOMICS_SUPPORT
|
||||
hipDeviceProp_t prop;
|
||||
HIPCHECK(hipGetDeviceProperties(&prop, 0));
|
||||
char *p = NULL;
|
||||
p = strstr(prop.gcnArchName, "xnack+");
|
||||
if (p) {
|
||||
int managed = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
|
||||
0));
|
||||
if (managed == 1) {
|
||||
bool HmmMem = true;
|
||||
int *Ptr = reinterpret_cast<int*>(mmap(NULL, sizeof(int),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0));
|
||||
if (Ptr == MAP_FAILED) {
|
||||
WARN("Mapping Failed\n");
|
||||
REQUIRE(false);
|
||||
}
|
||||
auto ret = TstCoherency(Ptr, HmmMem);
|
||||
int err = munmap(Ptr, sizeof(int));
|
||||
if (err != 0) {
|
||||
WARN("munmap failed\n");
|
||||
}
|
||||
REQUIRE(ret);
|
||||
}
|
||||
} else {
|
||||
HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Test case description: The following test validates if coarse grain memory
|
||||
behavior is observed or not with memory allocated using mmap()*/
|
||||
// The following test is failing on Nvidia platform hence disabling it for now
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_mmap_CoherentTstWthAdvise") {
|
||||
hipDeviceProp_t prop;
|
||||
HIPCHECK(hipGetDeviceProperties(&prop, 0));
|
||||
char *p = NULL;
|
||||
p = strstr(prop.gcnArchName, "xnack+");
|
||||
if (p) {
|
||||
int managed = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
|
||||
0));
|
||||
if (managed == 1) {
|
||||
int SIZE = sizeof(int);
|
||||
int *Ptr = reinterpret_cast<int*>(mmap(NULL, SIZE,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0));
|
||||
if (Ptr == MAP_FAILED) {
|
||||
WARN("Mapping Failed\n");
|
||||
REQUIRE(false);
|
||||
}
|
||||
HIP_CHECK(hipMemAdvise(Ptr, SIZE, hipMemAdviseSetCoarseGrain, 0));
|
||||
// Initializing the value with 9
|
||||
*Ptr = 9;
|
||||
hipStream_t strm;
|
||||
HIP_CHECK(hipStreamCreate(&strm));
|
||||
SquareKrnl<<<1, 1, 0, strm>>>(Ptr);
|
||||
HIP_CHECK(hipStreamSynchronize(strm));
|
||||
bool IfTstPassed = false;
|
||||
if (*Ptr == 81) {
|
||||
IfTstPassed = true;
|
||||
}
|
||||
int err = munmap(Ptr, SIZE);
|
||||
if (err != 0) {
|
||||
WARN("munmap failed\n");
|
||||
}
|
||||
REQUIRE(IfTstPassed);
|
||||
}
|
||||
} else {
|
||||
HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Test Case Description: The following test checks if the memory is
|
||||
accessible when HIP_HOST_COHERENT is set to 0*/
|
||||
// The following test is AMD specific test hence skipping for Nvidia
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_hipHostMalloc_WthEnv0Flg1") {
|
||||
if ((setenv("HIP_HOST_COHERENT", "0", 1)) != 0) {
|
||||
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
|
||||
REQUIRE(false);
|
||||
}
|
||||
int stat = 0;
|
||||
if (fork() == 0) {
|
||||
int *Ptr = nullptr, *PtrD = nullptr, SIZE = sizeof(int);
|
||||
// Allocating hipHostMalloc() memory
|
||||
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocPortable));
|
||||
*Ptr = 4;
|
||||
hipStream_t strm;
|
||||
HIP_CHECK(hipStreamCreate(&strm));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&PtrD), Ptr, 0));
|
||||
SquareKrnl<<<1, 1, 0, strm>>>(PtrD);
|
||||
HIP_CHECK(hipStreamSynchronize(strm));
|
||||
HIP_CHECK(hipStreamDestroy(strm));
|
||||
if (*Ptr == 16) {
|
||||
// exit() with code 10 which indicates pass
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(10);
|
||||
} else {
|
||||
// exit() with code 9 which indicates fail
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(9);
|
||||
}
|
||||
} else {
|
||||
wait(&stat);
|
||||
int Result = WEXITSTATUS(stat);
|
||||
if (Result != 10) {
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Test Case Description: The following test checks if the memory is
|
||||
accessible when HIP_HOST_COHERENT is set to 0*/
|
||||
// The following test is AMD specific test hence skipping for Nvidia
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_hipHostMalloc_WthEnv0Flg2") {
|
||||
if ((setenv("HIP_HOST_COHERENT", "0", 1)) != 0) {
|
||||
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
|
||||
REQUIRE(false);
|
||||
}
|
||||
int stat = 0;
|
||||
if (fork() == 0) {
|
||||
int *Ptr = nullptr, *PtrD = nullptr, SIZE = sizeof(int);
|
||||
// Allocating hipHostMalloc() memory
|
||||
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocWriteCombined));
|
||||
*Ptr = 4;
|
||||
hipStream_t strm;
|
||||
HIP_CHECK(hipStreamCreate(&strm));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&PtrD), Ptr, 0));
|
||||
SquareKrnl<<<1, 1, 0, strm>>>(PtrD);
|
||||
HIP_CHECK(hipStreamSynchronize(strm));
|
||||
HIP_CHECK(hipStreamDestroy(strm));
|
||||
if (*Ptr == 16) {
|
||||
// exit() with code 10 which indicates pass
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(10);
|
||||
} else {
|
||||
// exit() with code 9 which indicates fail
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(9);
|
||||
}
|
||||
} else {
|
||||
wait(&stat);
|
||||
int Result = WEXITSTATUS(stat);
|
||||
if (Result != 10) {
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Test Case Description: The following test checks if the memory is
|
||||
accessible when HIP_HOST_COHERENT is set to 0*/
|
||||
// The following test is AMD specific test hence skipping for Nvidia
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_hipHostMalloc_WthEnv0Flg3") {
|
||||
if ((setenv("HIP_HOST_COHERENT", "0", 1)) != 0) {
|
||||
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
|
||||
REQUIRE(false);
|
||||
}
|
||||
int stat = 0;
|
||||
if (fork() == 0) {
|
||||
int *Ptr = nullptr, *PtrD = nullptr, SIZE = sizeof(int);
|
||||
// Allocating hipHostMalloc() memory
|
||||
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocNumaUser));
|
||||
*Ptr = 4;
|
||||
hipStream_t strm;
|
||||
HIP_CHECK(hipStreamCreate(&strm));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&PtrD), Ptr, 0));
|
||||
SquareKrnl<<<1, 1, 0, strm>>>(PtrD);
|
||||
HIP_CHECK(hipStreamSynchronize(strm));
|
||||
HIP_CHECK(hipStreamDestroy(strm));
|
||||
if (*Ptr == 16) {
|
||||
// exit() with code 10 which indicates pass
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(10);
|
||||
} else {
|
||||
// exit() with code 9 which indicates fail
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(9);
|
||||
}
|
||||
} else {
|
||||
wait(&stat);
|
||||
int Result = WEXITSTATUS(stat);
|
||||
if (Result != 10) {
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Test Case Description: The following test checks if the memory is
|
||||
accessible when HIP_HOST_COHERENT is set to 0*/
|
||||
// The following test is AMD specific test hence skipping for Nvidia
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_hipHostMalloc_WthEnv0Flg4") {
|
||||
if ((setenv("HIP_HOST_COHERENT", "0", 1)) != 0) {
|
||||
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
|
||||
REQUIRE(false);
|
||||
}
|
||||
int stat = 0;
|
||||
if (fork() == 0) {
|
||||
int *Ptr = nullptr, *PtrD = nullptr, SIZE = sizeof(int);
|
||||
// Allocating hipHostMalloc() memory
|
||||
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocNonCoherent));
|
||||
*Ptr = 4;
|
||||
hipStream_t strm;
|
||||
HIP_CHECK(hipStreamCreate(&strm));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&PtrD), Ptr, 0));
|
||||
SquareKrnl<<<1, 1, 0, strm>>>(PtrD);
|
||||
HIP_CHECK(hipStreamSynchronize(strm));
|
||||
HIP_CHECK(hipStreamDestroy(strm));
|
||||
if (*Ptr == 16) {
|
||||
// exit() with code 10 which indicates pass
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(10);
|
||||
} else {
|
||||
// exit() with code 9 which indicates fail
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(9);
|
||||
}
|
||||
} else {
|
||||
wait(&stat);
|
||||
int Result = WEXITSTATUS(stat);
|
||||
if (Result != 10) {
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Test Case Description: The following test checks if the memory exhibits
|
||||
fine grain behavior when HIP_HOST_COHERENT is set to 1*/
|
||||
// The following test is AMD specific test hence skipping for Nvidia
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_hipHostMalloc_WthEnv1") {
|
||||
if ((setenv("HIP_HOST_COHERENT", "1", 1)) != 0) {
|
||||
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
|
||||
REQUIRE(false);
|
||||
}
|
||||
int stat = 0;
|
||||
if (fork() == 0) { // child process
|
||||
CHECK_PCIE_ATOMICS_SUPPORT
|
||||
int *Ptr = nullptr, SIZE = sizeof(int);
|
||||
bool HmmMem = false;
|
||||
// Allocating hipHostMalloc() memory
|
||||
HIP_CHECK(hipHostMalloc(&Ptr, SIZE));
|
||||
auto ret = TstCoherency(Ptr, HmmMem);
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(ret ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
} else { // parent process
|
||||
wait(&stat);
|
||||
if (WEXITSTATUS(stat) != EXIT_SUCCESS) {
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Test Case Description: The following test checks if the memory exhibits
|
||||
fine grain behavior when HIP_HOST_COHERENT is set to 1*/
|
||||
// The following test is AMD specific test hence skipping for Nvidia
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_hipHostMalloc_WthEnv1Flg1") {
|
||||
if ((setenv("HIP_HOST_COHERENT", "1", 1)) != 0) {
|
||||
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
|
||||
REQUIRE(false);
|
||||
}
|
||||
int stat = 0;
|
||||
if (fork() == 0) { // child process
|
||||
CHECK_PCIE_ATOMICS_SUPPORT
|
||||
int *Ptr = nullptr, SIZE = sizeof(int);
|
||||
bool HmmMem = false;
|
||||
// Allocating hipHostMalloc() memory
|
||||
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocPortable));
|
||||
auto ret = TstCoherency(Ptr, HmmMem);
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(ret ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
} else { // parent process
|
||||
wait(&stat);
|
||||
if (WEXITSTATUS(stat) != EXIT_SUCCESS) {
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Test Case Description: The following test checks if the memory exhibits
|
||||
fine grain behavior when HIP_HOST_COHERENT is set to 1*/
|
||||
// The following test is AMD specific test hence skipping for Nvidia
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_hipHostMalloc_WthEnv1Flg2") {
|
||||
if ((setenv("HIP_HOST_COHERENT", "1", 1)) != 0) {
|
||||
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
|
||||
REQUIRE(false);
|
||||
}
|
||||
int stat = 0;
|
||||
if (fork() == 0) { // child process
|
||||
CHECK_PCIE_ATOMICS_SUPPORT
|
||||
int *Ptr = nullptr, SIZE = sizeof(int);
|
||||
bool HmmMem = false;
|
||||
// Allocating hipHostMalloc() memory
|
||||
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocWriteCombined));
|
||||
auto ret = TstCoherency(Ptr, HmmMem);
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(ret ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
} else { // parent process
|
||||
wait(&stat);
|
||||
if (WEXITSTATUS(stat) != EXIT_SUCCESS) {
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Test Case Description: The following test checks if the memory exhibits
|
||||
fine grain behavior when HIP_HOST_COHERENT is set to 1*/
|
||||
// The following test is AMD specific test hence skipping for Nvidia
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_hipHostMalloc_WthEnv1Flg3") {
|
||||
if ((setenv("HIP_HOST_COHERENT", "1", 1)) != 0) {
|
||||
WARN("Unable to turn on HIP_HOST_COHERENT, hence terminating the Test case!");
|
||||
REQUIRE(false);
|
||||
}
|
||||
int stat = 0;
|
||||
if (fork() == 0) { // child process
|
||||
CHECK_PCIE_ATOMICS_SUPPORT
|
||||
int *Ptr = nullptr, SIZE = sizeof(int);
|
||||
bool HmmMem = false;
|
||||
// Allocating hipHostMalloc() memory
|
||||
HIP_CHECK(hipHostMalloc(&Ptr, SIZE, hipHostMallocNumaUser));
|
||||
auto ret = TstCoherency(Ptr, HmmMem);
|
||||
HIP_CHECK(hipHostFree(Ptr));
|
||||
exit(ret ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
} else { // parent process
|
||||
wait(&stat);
|
||||
if (WEXITSTATUS(stat) != EXIT_SUCCESS) {
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
@@ -0,0 +1,393 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define ReadEnd 0
|
||||
#define WriteEnd 1
|
||||
#define MAX_SIZE 32
|
||||
#define FREE_MEM_TO_HIDE 4294967296
|
||||
#define SIZE_TO_ALLOCATE 2147483648
|
||||
/*
|
||||
* In main process allocate 2 GB of device memory.
|
||||
* Fork() a child process and verify that 2 GB has been
|
||||
* allocated in parent process.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario1") {
|
||||
constexpr size_t size = 2147483648; // 2GB
|
||||
int fd[2], fd1[2], status;
|
||||
status = pipe(fd);
|
||||
REQUIRE(status == 0);
|
||||
status = pipe(fd1);
|
||||
REQUIRE(status == 0);
|
||||
pid_t child_pid;
|
||||
child_pid = fork(); // Create a new child process
|
||||
if (child_pid < 0) {
|
||||
WARN("Fork failed!!!!");
|
||||
} else if (child_pid == 0) { // child
|
||||
close(fd1[WriteEnd]);
|
||||
close(fd[ReadEnd]);
|
||||
int result;
|
||||
size_t free = 0, total = 0;
|
||||
// Wait for signal from parent
|
||||
int check_child;
|
||||
status = read(fd1[ReadEnd], &check_child, sizeof(check_child));
|
||||
REQUIRE(status != -1);
|
||||
close(fd1[ReadEnd]);
|
||||
// Check the total and free memory which is allocated in parent
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
if ((total - free) >= size) {
|
||||
result = 1;
|
||||
} else {
|
||||
result = 0;
|
||||
}
|
||||
// Write the result to parent
|
||||
status = write(fd[WriteEnd], &result, sizeof(result));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[WriteEnd]);
|
||||
exit(0);
|
||||
} else { // Parent
|
||||
close(fd1[ReadEnd]);
|
||||
close(fd[WriteEnd]);
|
||||
// Allocate memory
|
||||
char* A_d = nullptr;
|
||||
HIP_CHECK(hipMalloc(&A_d, size));
|
||||
// Signal the child
|
||||
int check = 0;
|
||||
status = write(fd1[WriteEnd], &check, sizeof(check));
|
||||
REQUIRE(status != -1);
|
||||
close(fd1[WriteEnd]);
|
||||
// Read the result from Child
|
||||
int read_result;
|
||||
status = read(fd[ReadEnd], &read_result, sizeof(read_result));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[ReadEnd]);
|
||||
REQUIRE(read_result == 1);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* From main process Fork() a child process. In the child process allocate
|
||||
* 2 GB of device memory. Signal the parent process. Verify from the parent
|
||||
* process that 2 GB is allocated in the child process.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario2") {
|
||||
constexpr size_t size = 2147483648; // 2GB
|
||||
int fd[2], fd2[2], status;
|
||||
status = pipe(fd);
|
||||
REQUIRE(status == 0);
|
||||
status = pipe(fd2);
|
||||
REQUIRE(status == 0);
|
||||
pid_t child_pid;
|
||||
child_pid = fork(); // Create a new child process
|
||||
if (child_pid < 0) {
|
||||
WARN("Fork failed!!!!");
|
||||
} else if (child_pid == 0) { // Child
|
||||
close(fd[ReadEnd]);
|
||||
close(fd2[WriteEnd]);
|
||||
// Allocate memory
|
||||
float* A_d = nullptr;
|
||||
HIP_CHECK(hipMalloc(&A_d, size));
|
||||
// Signal the parent
|
||||
int data = 0;
|
||||
status = write(fd[WriteEnd], &data, sizeof(data));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[WriteEnd]);
|
||||
int valid = 0;
|
||||
// Wait for Signal from parent before freeing memory and exiting
|
||||
status = read(fd2[ReadEnd], &valid, sizeof(valid));
|
||||
REQUIRE(status != -1);
|
||||
close(fd2[ReadEnd]);
|
||||
// Free allocated device memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
exit(0);
|
||||
} else { // Parent
|
||||
size_t free = 0, total = 0;
|
||||
close(fd[WriteEnd]);
|
||||
close(fd2[ReadEnd]);
|
||||
// Wait for child signal
|
||||
int data = 0;
|
||||
status = read(fd[ReadEnd], &data, sizeof(data));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[ReadEnd]);
|
||||
// Verify the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
REQUIRE((total - free) >= size);
|
||||
// Signal child that validation is over and child can free memory
|
||||
int valid = 0;
|
||||
status = write(fd2[WriteEnd], &valid, sizeof(valid));
|
||||
REQUIRE(status != -1);
|
||||
close(fd2[WriteEnd]);
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* From main process Fork() a child process. In the child process
|
||||
* allocate 2 GB of device memory. Free the memory and exit from
|
||||
* child process. Verify from the parent process that 2 GB is
|
||||
* freed in the child process.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario3") {
|
||||
constexpr size_t size = 2147483648; // 2GB
|
||||
int fd[2], status;
|
||||
status = pipe(fd);
|
||||
REQUIRE(status == 0);
|
||||
pid_t child_pid;
|
||||
child_pid = fork(); // Create a new child process
|
||||
if (child_pid < 0) {
|
||||
WARN("Fork failed!!!!");
|
||||
} else if (child_pid == 0) { // Child
|
||||
close(fd[ReadEnd]);
|
||||
// Allocate the memory
|
||||
void* A_d = nullptr;
|
||||
HIP_CHECK(hipMalloc(&A_d, size));
|
||||
// Free the allocated memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
// Signal the parent about memory free
|
||||
int check = 0;
|
||||
status = write(fd[WriteEnd], &check, sizeof(check));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[WriteEnd]);
|
||||
exit(0);
|
||||
} else { // Parent
|
||||
close(fd[WriteEnd]);
|
||||
// Wait for the signal from child about memory free
|
||||
int check_parent;
|
||||
status = read(fd[ReadEnd], &check_parent, sizeof(check_parent));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[ReadEnd]);
|
||||
size_t free = 0, total = 0;
|
||||
// Verify the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
REQUIRE((total - free) >= 0);
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* From main process Fork() a child process. In the child process allocate
|
||||
* 2 GB of device memory. Exit from child process. Verify from the parent
|
||||
* process that 2 GB is freed in the child process.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_Functional_scenario4") {
|
||||
constexpr size_t size = 2147483648; // 2GB
|
||||
pid_t child_pid;
|
||||
child_pid = fork(); // Create a new child process
|
||||
if (child_pid < 0) {
|
||||
WARN("Fork failed!!!!");
|
||||
} else if (child_pid == 0) { // Child
|
||||
// Allocate the memory
|
||||
void* A_d = nullptr;
|
||||
HIP_CHECK(hipMalloc(&A_d, size));
|
||||
exit(0);
|
||||
} else { // Parent
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
size_t free = 0, total = 0;
|
||||
// Verify the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
REQUIRE((total-free) >= 0);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Multidevice Scenario: In main process allocate 2 GB of device memory
|
||||
* in every device. Verify that 2 GB is allocated using hipMemGetInfo.
|
||||
* Fork() a child process and verify that 2 GB has been allocated from
|
||||
* parent process in every device.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_Functional_MultiDevice_Scenario5") {
|
||||
constexpr size_t size = 2147483648; // 2GB
|
||||
size_t free = 0, total = 0;
|
||||
int fd1[2], fd2[2], status;
|
||||
status = pipe(fd1);
|
||||
REQUIRE(status == 0);
|
||||
status = pipe(fd2);
|
||||
REQUIRE(status == 0);
|
||||
pid_t child_pid;
|
||||
child_pid = fork(); // Create a new child process
|
||||
if (child_pid < 0) {
|
||||
WARN("Fork failed!!!!");
|
||||
} else if (child_pid == 0) { // Child
|
||||
close(fd1[WriteEnd]);
|
||||
close(fd2[ReadEnd]);
|
||||
// Wait for the signal from parent after memory allocatoin
|
||||
int check_child;
|
||||
status = read(fd1[ReadEnd], &check_child, sizeof(check_child));
|
||||
REQUIRE(status != -1);
|
||||
close(fd1[ReadEnd]);
|
||||
int num_devices, result, count = 0;
|
||||
// Get the device count
|
||||
HIP_CHECK(hipGetDeviceCount(&num_devices));
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
// Check the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
if ((total - free) >= size) {
|
||||
count+=1;
|
||||
}
|
||||
}
|
||||
if ( count == num_devices ) {
|
||||
result = 1;
|
||||
} else {
|
||||
result = 0;
|
||||
}
|
||||
// Write the result to Parent
|
||||
status = write(fd2[WriteEnd], &result, sizeof(result));
|
||||
REQUIRE(status != -1);
|
||||
close(fd2[WriteEnd]);
|
||||
exit(0);
|
||||
} else { // Parent
|
||||
close(fd1[ReadEnd]);
|
||||
close(fd2[WriteEnd]);
|
||||
int num_devices;
|
||||
// Get the device count
|
||||
HIP_CHECK(hipGetDeviceCount(&num_devices));
|
||||
std::vector<void*>v(num_devices, nullptr);
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
// verify the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
// Allocate memory
|
||||
HIP_CHECK(hipMalloc(&v[i], size));
|
||||
// Verify the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
}
|
||||
// Signal the child about memory allocation
|
||||
int check = 0;
|
||||
status = write(fd1[WriteEnd], &check, sizeof(check));
|
||||
REQUIRE(status != -1);
|
||||
close(fd1[WriteEnd]);
|
||||
// Read result from child
|
||||
int result_parent;
|
||||
status = read(fd2[ReadEnd], &result_parent, sizeof(result_parent));
|
||||
REQUIRE(status != -1);
|
||||
REQUIRE(result_parent == 1);
|
||||
close(fd2[ReadEnd]);
|
||||
// Free the allocated memory on each device
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipFree(v[i]));
|
||||
}
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
}
|
||||
}
|
||||
|
||||
#if HT_AMD
|
||||
static bool testHiddenFreeMemFromChild() {
|
||||
bool result = true;
|
||||
int testResult = 0, result_dummy = 0;
|
||||
int fd_c2p[2], fd_p2c[2];
|
||||
pipe(fd_c2p);
|
||||
pipe(fd_p2c);
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
size_t free = 0, total = 0, min_size = 0;
|
||||
close(fd_c2p[ReadEnd]);
|
||||
close(fd_p2c[WriteEnd]);
|
||||
int64_t size_tohide = (FREE_MEM_TO_HIDE/(1024*1024)); // in MB
|
||||
// set environment variable from shell
|
||||
unsetenv("HIP_HIDDEN_FREE_MEM");
|
||||
setenv("HIP_HIDDEN_FREE_MEM", std::to_string(size_tohide).c_str(), 1);
|
||||
// allocate memory in device
|
||||
char* d_ptr{nullptr};
|
||||
HIP_CHECK(hipMalloc(&d_ptr, SIZE_TO_ALLOCATE));
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
min_size = (FREE_MEM_TO_HIDE + SIZE_TO_ALLOCATE);
|
||||
if ((total - free) >= min_size) {
|
||||
testResult = 1;
|
||||
}
|
||||
// Write to and signal parent
|
||||
write(fd_c2p[WriteEnd], &testResult, sizeof(testResult));
|
||||
close(fd_c2p[WriteEnd]);
|
||||
// Wait for signal from parent
|
||||
read(fd_p2c[ReadEnd], &result_dummy, sizeof(result_dummy));
|
||||
close(fd_p2c[ReadEnd]);
|
||||
exit(0);
|
||||
} else if (cPid > 0) { // parent
|
||||
close(fd_c2p[WriteEnd]);
|
||||
close(fd_p2c[ReadEnd]);
|
||||
// wait for result from child
|
||||
read(fd_c2p[ReadEnd], &testResult, sizeof(testResult));
|
||||
close(fd_c2p[ReadEnd]);
|
||||
if (testResult) {
|
||||
result &= true;
|
||||
} else {
|
||||
result &= false;
|
||||
}
|
||||
size_t free = 0, total = 0, min_size = SIZE_TO_ALLOCATE;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
if ((total - free) >= min_size) {
|
||||
result &= true;
|
||||
} else {
|
||||
result &= false;
|
||||
}
|
||||
// Write to and signal child
|
||||
write(fd_p2c[WriteEnd], &result_dummy, sizeof(result_dummy));
|
||||
close(fd_p2c[WriteEnd]);
|
||||
wait(NULL);
|
||||
} else {
|
||||
WARN("fork() failed");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: Fork() a child process. In child, get free and total memory.
|
||||
* Set the HIP_HIDDEN_FREE_MEM to 4GB. Allocate 2 GB of device memory.
|
||||
* Get the free and total memory. Free memory available should be
|
||||
* (actual free - 6 GB). Signal parent process. Wait for signal from child
|
||||
* in parent. Get free and total memory. Free memory available should be
|
||||
* actual (actual free - 4 GB).
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_SetHiddenFreeMemFromChild") {
|
||||
REQUIRE(true == testHiddenFreeMemFromChild());
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: Set the HIP_HIDDEN_FREE_MEM to 4GB. Invoke hipMemGetInfo to
|
||||
* verify that 4GB free memory is hidden for all available GPUs.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_VerifyHiddenFreeMemForAllGpu") {
|
||||
int numDevices = 0;
|
||||
int64_t size_tohide = (FREE_MEM_TO_HIDE/(1024*1024)); // in MB
|
||||
// set environment variable from shell
|
||||
unsetenv("HIP_HIDDEN_FREE_MEM");
|
||||
setenv("HIP_HIDDEN_FREE_MEM", std::to_string(size_tohide).c_str(), 1);
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
for (int dev = 0; dev < numDevices; dev++) {
|
||||
HIP_CHECK(hipSetDevice(dev));
|
||||
size_t free = 0, total = 0;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
REQUIRE((total - free) >= FREE_MEM_TO_HIDE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -0,0 +1,561 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Test designed to run on Linux based platforms
|
||||
* Verifies functionality of
|
||||
* -- hipSetDevice and hipGetDevice with different ROCR_VISIBLE_DEVICES and
|
||||
* HIP_VISIBLE_DEVICES values set
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
#define MAX_SIZE 30
|
||||
|
||||
/**
|
||||
* Fetches Gpu device count
|
||||
*/
|
||||
static void getDeviceCount(int *pdevCnt) {
|
||||
int fd[2], val = 0;
|
||||
pid_t childpid;
|
||||
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
|
||||
|
||||
childpid = fork();
|
||||
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &val, sizeof(val));
|
||||
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
|
||||
*pdevCnt = val;
|
||||
} else if (!childpid) { // Child
|
||||
int devCnt = 1;
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &devCnt, sizeof(devCnt));
|
||||
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else { // failure
|
||||
*pdevCnt = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Pass either -1 in deviceNumber or invalid device number
|
||||
static void testInvalidDevice(int numDevices, bool useRocrEnv,
|
||||
int deviceNumber) {
|
||||
bool testResult = true;
|
||||
int device;
|
||||
int tempCount = 0;
|
||||
int setDeviceErrorCheck = 0;
|
||||
int getDeviceErrorCheck = 0;
|
||||
int getDeviceCountErrorCheck = 0;
|
||||
int fd[2];
|
||||
pipe(fd);
|
||||
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
|
||||
char visibleDeviceString[MAX_SIZE] = {};
|
||||
snprintf(visibleDeviceString, MAX_SIZE, "%d", deviceNumber);
|
||||
|
||||
if (cPid == 0) { // child
|
||||
hipError_t err;
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
#else
|
||||
if (true == useRocrEnv) {
|
||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
} else {
|
||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
}
|
||||
#endif
|
||||
err = hipGetDeviceCount(&tempCount);
|
||||
if (err != hipSuccess) {
|
||||
getDeviceCountErrorCheck = 1;
|
||||
}
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
err = hipSetDevice(i);
|
||||
if (err != hipSuccess) {
|
||||
setDeviceErrorCheck+= 1;
|
||||
}
|
||||
|
||||
err = hipGetDevice(&device);
|
||||
if (err != hipSuccess) {
|
||||
getDeviceErrorCheck+= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ((getDeviceCountErrorCheck == 1) && (setDeviceErrorCheck == numDevices)
|
||||
&& (getDeviceErrorCheck == numDevices)) {
|
||||
testResult = true;
|
||||
|
||||
} else {
|
||||
printf("Test failed for invalid device, getDeviceCountErrorCheck %d,"
|
||||
"setDeviceErrorCheck %d, getDeviceErrorCheck %d\n",
|
||||
getDeviceCountErrorCheck, setDeviceErrorCheck,
|
||||
getDeviceErrorCheck);
|
||||
|
||||
testResult = false;
|
||||
}
|
||||
|
||||
close(fd[0]);
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
|
||||
} else if (cPid > 0) { // parent
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
|
||||
} else {
|
||||
printf("fork() failed\n");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
REQUIRE(testResult == true);
|
||||
}
|
||||
|
||||
|
||||
static void testValidDevices(int numDevices, bool useRocrEnv, int *deviceList,
|
||||
int deviceListLength) {
|
||||
bool testResult = true;
|
||||
int tempCount = 0;
|
||||
int device;
|
||||
int setDeviceErrorCheck = 0;
|
||||
int getDeviceErrorCheck = 0;
|
||||
int getDeviceCountErrorCheck = 0;
|
||||
int *deviceListPtr = deviceList;
|
||||
std::string visibleDeviceString;
|
||||
|
||||
if ((NULL == deviceList) || ((deviceListLength < 1) ||
|
||||
deviceListLength > numDevices)) {
|
||||
INFO("Invalid argument for number of devices. Skipping current test");
|
||||
REQUIRE(false);
|
||||
}
|
||||
|
||||
for (int i = 0; i < deviceListLength; i++) {
|
||||
visibleDeviceString.append(std::to_string(*deviceListPtr++) + ",");
|
||||
}
|
||||
|
||||
int fd[2];
|
||||
pipe(fd);
|
||||
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
|
||||
if (cPid == 0) {
|
||||
#ifdef __HIP_PLATFORM_NVIDIA__
|
||||
unsetenv("CUDA_VISIBLE_DEVICES");
|
||||
setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString.c_str(), 1);
|
||||
#else
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
if (true == useRocrEnv) {
|
||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString.c_str(), 1);
|
||||
} else {
|
||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString.c_str(), 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
hipError_t err;
|
||||
err = hipGetDeviceCount(&tempCount);
|
||||
|
||||
if (tempCount == deviceListLength) {
|
||||
getDeviceCountErrorCheck = 1;
|
||||
} else {
|
||||
printf("hipGetDeviceCount failed. return value: %u\n", hipError_t(err));
|
||||
}
|
||||
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
err = hipSetDevice(i);
|
||||
if (err != hipSuccess) {
|
||||
setDeviceErrorCheck+= 1;
|
||||
}
|
||||
|
||||
err = hipGetDevice(&device);
|
||||
if (err != hipSuccess) {
|
||||
getDeviceErrorCheck+= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ((getDeviceCountErrorCheck == 1) && (setDeviceErrorCheck ==
|
||||
(numDevices-deviceListLength)) && (getDeviceErrorCheck == 0)) {
|
||||
testResult = true;
|
||||
|
||||
} else {
|
||||
printf("Test failed for device count %d\n", deviceListLength);
|
||||
testResult = false;
|
||||
}
|
||||
|
||||
close(fd[0]);
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
|
||||
} else if (cPid > 0) {
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
|
||||
} else {
|
||||
printf("fork() failed\n");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
REQUIRE(testResult == true);
|
||||
}
|
||||
|
||||
|
||||
static void Initialize(int *deviceList, int numDevices, int count,
|
||||
std::string& min_visibleDeviceString, std::string& max_visibleDeviceString) {
|
||||
int *deviceListPtr = deviceList;
|
||||
for (int i =0; i < count; i++) {
|
||||
if (i == count-1) {
|
||||
min_visibleDeviceString.append(std::to_string(*deviceListPtr++));
|
||||
} else {
|
||||
min_visibleDeviceString.append(std::to_string(*deviceListPtr++) + ",");
|
||||
}
|
||||
}
|
||||
|
||||
for (int i =0; i < numDevices; i++) {
|
||||
if (i == numDevices-1) {
|
||||
max_visibleDeviceString.append(std::to_string(i));
|
||||
} else {
|
||||
max_visibleDeviceString.append(std::to_string(i) + ",");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void testMaxRvdMinHvd(int numDevices, int *deviceList, int count) {
|
||||
bool testResult = true;
|
||||
int device;
|
||||
int validateCount = 0;
|
||||
std::string min_visibleDeviceString;
|
||||
std::string max_visibleDeviceString;
|
||||
int fd[2];
|
||||
pipe(fd);
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
Initialize(deviceList, numDevices,
|
||||
count, min_visibleDeviceString, max_visibleDeviceString);
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", max_visibleDeviceString.c_str(), 1);
|
||||
setenv("HIP_VISIBLE_DEVICES", min_visibleDeviceString.c_str(), 1);
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
if (device == i) {
|
||||
validateCount+= 1;
|
||||
}
|
||||
}
|
||||
if (count != validateCount) {
|
||||
testResult = false;
|
||||
}
|
||||
} else if (cPid > 0) {
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
} else {
|
||||
printf("fork() failed\n");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
REQUIRE(testResult == true);
|
||||
}
|
||||
|
||||
static void testRvdCvd(int numDevices, int *deviceList, int count) {
|
||||
bool testResult = true;
|
||||
int device;
|
||||
int validateCount = 0;
|
||||
std::string min_visibleDeviceString;
|
||||
std::string max_visibleDeviceString;;
|
||||
int fd[2];
|
||||
pipe(fd);
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
Initialize(deviceList, numDevices, count,
|
||||
min_visibleDeviceString, max_visibleDeviceString);
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", max_visibleDeviceString.c_str(), 1);
|
||||
setenv("CUDA_VISIBLE_DEVICES", min_visibleDeviceString.c_str(), 1);
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
if (device == i) {
|
||||
validateCount+= 1;
|
||||
}
|
||||
}
|
||||
if (count != validateCount) {
|
||||
testResult = false;
|
||||
}
|
||||
} else if (cPid > 0) {
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
} else {
|
||||
printf("fork() failed\n");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
REQUIRE(testResult == true);
|
||||
}
|
||||
|
||||
static void testMinRvdMaxHvd(int numDevices, int *deviceList, int count) {
|
||||
bool testResult = true;
|
||||
int device;
|
||||
int validateCount = 0;
|
||||
std::string min_visibleDeviceString;
|
||||
std::string max_visibleDeviceString;
|
||||
int fd[2];
|
||||
pipe(fd);
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
Initialize(deviceList, numDevices, count,
|
||||
min_visibleDeviceString, max_visibleDeviceString);
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", min_visibleDeviceString.c_str(), 1);
|
||||
setenv("HIP_VISIBLE_DEVICES", max_visibleDeviceString.c_str(), 1);
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
if (device == i) {
|
||||
validateCount+= 1;
|
||||
}
|
||||
}
|
||||
if (count != validateCount) {
|
||||
testResult = false;
|
||||
}
|
||||
close(fd[0]);
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else if (cPid > 0) {
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
} else {
|
||||
printf("fork() failed\n");
|
||||
HIP_ASSERT(false);
|
||||
}
|
||||
|
||||
REQUIRE(testResult == true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario sets Invalid visible device list and checks behavior.
|
||||
*/
|
||||
TEST_CASE("Unit_hipSetDevice_InvalidVisibleDeviceList") {
|
||||
int numDevices = 0;
|
||||
|
||||
getDeviceCount(&numDevices);
|
||||
REQUIRE(numDevices != 0);
|
||||
|
||||
SECTION("Test setting -1 to HIP_VISIBLE_DEVICES") {
|
||||
testInvalidDevice(numDevices, false, -1);
|
||||
}
|
||||
|
||||
SECTION("Test setting invalid device to HIP_VISIBLE_DEVICES") {
|
||||
testInvalidDevice(numDevices, false, numDevices);
|
||||
}
|
||||
#ifndef __HIP_PLATFORM_NVIDIA__
|
||||
SECTION("Test setting -1 to ROCR_VISIBLE_DEVICES") {
|
||||
testInvalidDevice(numDevices, true, -1);
|
||||
}
|
||||
|
||||
SECTION("Test setting invalid device to ROCR_VISIBLE_DEVICES") {
|
||||
testInvalidDevice(numDevices, true, numDevices);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario sets valid visible device list and checks behavior.
|
||||
*/
|
||||
TEST_CASE("Unit_hipSetDevice_ValidVisibleDeviceList") {
|
||||
int numDevices = 0;
|
||||
std::vector<int> deviceList;
|
||||
|
||||
getDeviceCount(&numDevices);
|
||||
REQUIRE(numDevices != 0);
|
||||
|
||||
// Test for all available devices
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
deviceList.push_back(i);
|
||||
}
|
||||
|
||||
SECTION("Test setting valid hip visible device list") {
|
||||
testValidDevices(numDevices, false, deviceList.data(), numDevices);
|
||||
}
|
||||
#ifndef __HIP_PLATFORM_NVIDIA__
|
||||
SECTION("Test setting valid rocr visible device list") {
|
||||
testValidDevices(numDevices, true, deviceList.data(), numDevices);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario sets subset of available devices and checks behavior.
|
||||
*/
|
||||
TEST_CASE("Unit_hipSetDevice_SubsetOfAvailableDevices") {
|
||||
int numDevices = 0;
|
||||
int deviceList[MAX_SIZE];
|
||||
int deviceListLength = 1;
|
||||
|
||||
getDeviceCount(&numDevices);
|
||||
REQUIRE(numDevices != 0);
|
||||
|
||||
// Test for subset of available gpus
|
||||
for (int i=0; i < deviceListLength; i++) {
|
||||
deviceList[i] = deviceListLength-1-i;
|
||||
}
|
||||
|
||||
#ifndef __HIP_PLATFORM_NVIDIA__
|
||||
testValidDevices(numDevices, true, deviceList,
|
||||
deviceListLength);
|
||||
#endif
|
||||
testValidDevices(numDevices, false, deviceList,
|
||||
deviceListLength);
|
||||
}
|
||||
|
||||
#ifndef __HIP_PLATFORM_NVIDIA__
|
||||
/* Following tests apply only for AMD Platforms */
|
||||
|
||||
/**
|
||||
* Scenario tests getDevice behavior with Minimal Len of RVD
|
||||
* and Maximal Len of HVD
|
||||
*/
|
||||
TEST_CASE("Unit_hipSetDevice_MinRvdMaxHvdDevicesList") {
|
||||
int numDevices = 0;
|
||||
std::vector<int> deviceList;
|
||||
int count = 0;
|
||||
|
||||
getDeviceCount(&numDevices);
|
||||
|
||||
REQUIRE(numDevices != 0);
|
||||
|
||||
if (numDevices == 1) {
|
||||
deviceList.push_back(0);
|
||||
count = 1;
|
||||
} else {
|
||||
for (int i=0; i < numDevices; i++) {
|
||||
if (i%2 == 0) {
|
||||
deviceList.push_back(i);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
testMinRvdMaxHvd(numDevices, deviceList.data(), count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario tests getDevice behavior with Maximal Len of RVD
|
||||
* and Minimal Len of HVD
|
||||
*/
|
||||
TEST_CASE("Unit_hipSetDevice_MaxRvdMinHvdDevicesList") {
|
||||
int numDevices = 0;
|
||||
std::vector<int> deviceList;
|
||||
|
||||
getDeviceCount(&numDevices);
|
||||
|
||||
REQUIRE(numDevices != 0);
|
||||
|
||||
if (numDevices == 1) {
|
||||
deviceList.push_back(0);
|
||||
} else {
|
||||
for (int i=0; i < numDevices; i++) {
|
||||
if (i%2 == 0) {
|
||||
deviceList.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
testMaxRvdMinHvd(numDevices, deviceList.data(), deviceList.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario tests getDevice behavior with combination of RVD and CVD
|
||||
*/
|
||||
TEST_CASE("Unit_hipSetDevice_RvdCvdDevicesList") {
|
||||
int numDevices = 0;
|
||||
int deviceList[MAX_SIZE];
|
||||
int count = 0;
|
||||
|
||||
getDeviceCount(&numDevices);
|
||||
|
||||
REQUIRE(numDevices != 0);
|
||||
|
||||
if (numDevices == 1) {
|
||||
deviceList[0] = 0;
|
||||
count = 1;
|
||||
} else {
|
||||
for (int i=0; i < numDevices; i++) {
|
||||
if (i%2 == 0) {
|
||||
deviceList[count] = i;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
testRvdCvd(numDevices, deviceList, count);
|
||||
}
|
||||
#endif // __HIP_PLATFORM_NVIDIA__
|
||||
|
||||
#endif // __linux__
|
||||
@@ -0,0 +1,175 @@
|
||||
|
||||
# Copyright (c) 2016 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
cmake_minimum_required(VERSION 3.16.8)
|
||||
# vc 19.31.31107.0 gives issue while packaging using makefile
|
||||
# To avoid error NMAKE : fatal error U1065: invalid option 'w'
|
||||
# Windows to use Ninja generator like other projects
|
||||
# to skip the simple compiler test
|
||||
set(CMAKE_C_COMPILER_WORKS 1)
|
||||
set(CMAKE_CXX_COMPILER_WORKS 1)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
#############################
|
||||
# Packaging steps
|
||||
#############################
|
||||
if(NOT DEFINED CPACK_INSTALL_PREFIX)
|
||||
set(CPACK_INSTALL_PREFIX "/opt/rocm/")
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
set(PKG_NAME hip-catch-${HIP_PLATFORM})
|
||||
else()
|
||||
set(PKG_NAME hip-catch-${HIP_PLATFORM}-static)
|
||||
endif()
|
||||
set(CPACK_PACKAGE_NAME ${PKG_NAME})
|
||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [CATCH TESTS]")
|
||||
set(CPACK_PACKAGE_DESCRIPTION "HIP:
|
||||
Heterogenous-computing Interface for Portability [CATCH TESTS]")
|
||||
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
|
||||
set(CPACK_PACKAGE_CONTACT "HIP Support <hip.support@amd.com>")
|
||||
set(CPACK_PACKAGE_VERSION ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_PACKAGING_VERSION_PATCH})
|
||||
# by default rpm tries to reduce the file size by stripping the executable
|
||||
# To avoid build/packaging failures SWDEV-375603 and skip stripping
|
||||
set(CPACK_RPM_SPEC_MORE_DEFINE "%global __os_install_post %{nil}")
|
||||
|
||||
set (CPACK_RPM_PACKAGE_AUTOREQ 0)
|
||||
# to remove hip-catch-* package during uninstallation of rocm
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
set (CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core, hip-runtime-amd")
|
||||
set (CPACK_RPM_PACKAGE_REQUIRES "rocm-core, hip-runtime-amd")
|
||||
else()
|
||||
set (CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core, hip-static-dev")
|
||||
set (CPACK_RPM_PACKAGE_REQUIRES "rocm-core, hip-static-devel")
|
||||
endif()
|
||||
|
||||
# Get all the files that needs to be installed in the global property
|
||||
get_property(INSTALL_CTEST_INCLUDE_FILES GLOBAL PROPERTY G_INSTALL_CTEST_INCLUDE_FILES)
|
||||
get_property(INSTALL_EXE_TARGETS GLOBAL PROPERTY G_INSTALL_EXE_TARGETS)
|
||||
get_property(INSTALL_CUSTOM_TARGETS GLOBAL PROPERTY G_INSTALL_CUSTOM_TARGETS)
|
||||
get_property(INSTALL_SRC_FILES GLOBAL PROPERTY G_INSTALL_SRC_FILES)
|
||||
get_property(INSTALL_HEADER_FILES GLOBAL PROPERTY G_INSTALL_HEADER_FILES)
|
||||
# Create top level CTestTestfile.cmake
|
||||
file(WRITE ${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/script/CTestTestfile.cmake ${_autogen})
|
||||
foreach(CTEST_INCLUDE_FILE ${INSTALL_CTEST_INCLUDE_FILES})
|
||||
get_filename_component(INCLUDE_FILE_NAME ${CTEST_INCLUDE_FILE} NAME)
|
||||
file(APPEND ${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/script/CTestTestfile.cmake
|
||||
"include(script/${INCLUDE_FILE_NAME})\n")
|
||||
endforeach()
|
||||
file(WRITE ${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/script/windows/CTestTestfile.cmake
|
||||
"${_autogen}\nsubdirs(${CATCH_BUILD_DIR})")
|
||||
|
||||
if(NOT WIN32)
|
||||
set(INSTALL_DIR ${CMAKE_INSTALL_DATADIR}/hip)
|
||||
set(CPACK_GENERATOR "TGZ;DEB;RPM" CACHE STRING "Linux package types for catch tests")
|
||||
set(CPACK_BINARY_DEB "ON")
|
||||
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
|
||||
set(CPACK_DEBIAN_PACKAGE_PROVIDES "catch")
|
||||
set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
|
||||
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt")
|
||||
if (CPACK_PACKAGE_VERSION MATCHES "local" )
|
||||
#If building locally default value will cause build failure
|
||||
#DEBUG SYMBOL pacaking require SOURCE_DIR to be small
|
||||
set(CPACK_RPM_BUILD_SOURCE_DIRS_PREFIX ${CPACK_INSTALL_PREFIX})
|
||||
endif()
|
||||
|
||||
if (DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
else()
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
else()
|
||||
set(CPACK_RPM_PACKAGE_RELEASE "local")
|
||||
endif()
|
||||
|
||||
execute_process( COMMAND rpm --eval %{?dist}
|
||||
RESULT_VARIABLE PROC_RESULT
|
||||
OUTPUT_VARIABLE EVAL_RESULT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE )
|
||||
# Add os distribution tag to rpm package name . For deb package its set from build env
|
||||
if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" )
|
||||
string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}")
|
||||
endif()
|
||||
|
||||
set(CPACK_SOURCE_GENERATOR "TGZ")
|
||||
# Install license file
|
||||
|
||||
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/../LICENSE.txt" )
|
||||
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${INSTALL_DIR})
|
||||
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
|
||||
|
||||
else()
|
||||
# windows packaging
|
||||
set(INSTALL_DIR .)
|
||||
set(CPACK_INSTALL_PREFIX "")
|
||||
set(CPACK_SYSTEM_NAME "")
|
||||
set(CPACK_GENERATOR "ZIP" CACHE STRING "Windows package types for catch tests")
|
||||
set(CPACK_TEST_ZIP "ON")
|
||||
set(CPACK_ZIP_TEST_PACKAGE_NAME "catch")
|
||||
endif()
|
||||
|
||||
set(INSTALL_DIR_TESTS ${INSTALL_DIR}/${CATCH_BUILD_DIR})
|
||||
set(INSTALL_DIR_SCRIPT ${INSTALL_DIR}/${CATCH_BUILD_DIR}/script)
|
||||
set(INSTALL_DIR_SRC ${INSTALL_DIR}/${CATCH_BUILD_DIR}/src)
|
||||
set(INSTALL_DIR_HEADERS ${INSTALL_DIR}/${CATCH_BUILD_DIR}/headers)
|
||||
# install catch scripts
|
||||
install(FILES
|
||||
${ADD_SCRIPT_PATH}
|
||||
${CATCH_INCLUDE_PATH}
|
||||
DESTINATION ${INSTALL_DIR_SCRIPT})
|
||||
# install cmake include files
|
||||
foreach(CTEST_INCLUDE_FILE ${INSTALL_CTEST_INCLUDE_FILES})
|
||||
install(FILES ${CTEST_INCLUDE_FILE} DESTINATION ${INSTALL_DIR_SCRIPT})
|
||||
endforeach()
|
||||
# install source files
|
||||
foreach(SRC_FILE ${INSTALL_SRC_FILES})
|
||||
install(FILES ${SRC_FILE} DESTINATION ${INSTALL_DIR_SRC})
|
||||
endforeach()
|
||||
# install header files
|
||||
foreach(HEADER_FILES ${INSTALL_HEADER_FILES})
|
||||
install(FILES ${HEADER_FILES} DESTINATION ${INSTALL_DIR_HEADERS})
|
||||
endforeach()
|
||||
# install executables
|
||||
foreach(EXE_TARGET ${INSTALL_EXE_TARGETS})
|
||||
install(TARGETS ${EXE_TARGET}
|
||||
RUNTIME DESTINATION ${INSTALL_DIR_TESTS} OPTIONAL)
|
||||
endforeach()
|
||||
# install custom targets
|
||||
foreach(CUSTOM_TARGET ${INSTALL_CUSTOM_TARGETS})
|
||||
install(FILES ${CUSTOM_TARGET} DESTINATION ${INSTALL_DIR_TESTS})
|
||||
endforeach()
|
||||
# install ctest file
|
||||
install(FILES
|
||||
${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/script/CTestTestfile.cmake
|
||||
DESTINATION ${INSTALL_DIR_TESTS})
|
||||
install(FILES
|
||||
${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/script/windows/CTestTestfile.cmake
|
||||
DESTINATION ${INSTALL_DIR})
|
||||
# install catchinfo
|
||||
install(FILES ${PROJECT_BINARY_DIR}/${CATCH_BUILD_DIR}/catchInfo.txt DESTINATION ${INSTALL_DIR})
|
||||
# install hip test config
|
||||
install(DIRECTORY
|
||||
${HIP_TEST_CONFIG_BINARY_DIR}
|
||||
DESTINATION ${INSTALL_DIR_TESTS})
|
||||
include(CPack)
|
||||
@@ -0,0 +1,27 @@
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
add_subdirectory(memset)
|
||||
add_subdirectory(memcpy)
|
||||
add_subdirectory(kernelLaunch)
|
||||
add_subdirectory(stream)
|
||||
add_subdirectory(event)
|
||||
add_subdirectory(warpSync)
|
||||
add_subdirectory(example)
|
||||
@@ -0,0 +1,34 @@
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
set(TEST_SRC
|
||||
hipEventCreate.cc
|
||||
hipEventCreateWithFlags.cc
|
||||
hipEventRecord.cc
|
||||
hipEventDestroy.cc
|
||||
hipEventSynchronize.cc
|
||||
hipEventElapsedTime.cc
|
||||
hipEventQuery.cc
|
||||
)
|
||||
|
||||
hip_add_exe_to_target(NAME EventPerformance
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup event event
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for all hipEvent related HIP APIs.
|
||||
*/
|
||||
|
||||
class HipEventCreateBenchmark : public Benchmark<HipEventCreateBenchmark> {
|
||||
public:
|
||||
void operator()() {
|
||||
hipEvent_t event;
|
||||
|
||||
TIMED_SECTION(kTimerTypeCpu) { HIP_CHECK(hipEventCreate(&event)); }
|
||||
|
||||
HIP_CHECK(hipEventDestroy(event));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipEventCreate`
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/event/hipEventCreate.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipEventCreate") {
|
||||
HipEventCreateBenchmark benchmark;
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group PerformanceTest.
|
||||
* @}
|
||||
*/
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup event event
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class HipEventCreateWithFlagsBenchmark : public Benchmark<HipEventCreateWithFlagsBenchmark> {
|
||||
public:
|
||||
void operator()(unsigned flag) {
|
||||
hipEvent_t event;
|
||||
|
||||
TIMED_SECTION(kTimerTypeCpu) { HIP_CHECK(hipEventCreateWithFlags(&event, flag)); }
|
||||
|
||||
HIP_CHECK(hipEventDestroy(event));
|
||||
}
|
||||
};
|
||||
|
||||
static std::string GetEventCreateFlagName(unsigned flag) {
|
||||
switch (flag) {
|
||||
case hipEventDefault:
|
||||
return "hipEventDefault";
|
||||
case hipEventBlockingSync:
|
||||
return "hipEventBlockingSync";
|
||||
case hipEventDisableTiming:
|
||||
return "hipEventDisableTiming";
|
||||
case hipEventInterprocess:
|
||||
return "hipEventInterprocess";
|
||||
default:
|
||||
return "flag combination";
|
||||
}
|
||||
}
|
||||
|
||||
static void RunBenchmark(unsigned flag) {
|
||||
HipEventCreateWithFlagsBenchmark benchmark;
|
||||
benchmark.AddSectionName(GetEventCreateFlagName(flag));
|
||||
benchmark.Run(flag);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipEventCreateWithFlags` with all flags:
|
||||
* -# Flags
|
||||
* - hipEventDefault
|
||||
* - hipEventBlockingSync
|
||||
* - hipEventDisableTiming
|
||||
* - hipEventInterprocess (currently disabled)
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/event/hipEventCreate.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipEventCreateWithFlags") {
|
||||
const auto flag = GENERATE(
|
||||
hipEventDefault, hipEventBlockingSync,
|
||||
hipEventDisableTiming /*, hipEventInterprocess disabled until fixed (EXSWHTEC-25) */);
|
||||
RunBenchmark(flag);
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group PerformanceTest.
|
||||
* @}
|
||||
*/
|
||||
برخی از فایل ها نشان داده نشدند زیرا تعداد زیادی فایل در این تفاوت تغییر کرده اند نمایش بیشتر
مرجع در شماره جدید
Block a user