d4a33cf33a
Squashed commit of the following:
commit f029195705a15700380c6f832ba5d15d46fd6de7
Author: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>
Date: Thu Jul 13 14:38:56 2023 -0500
Formatting workflows for source (clang-format) and cmake (cmake-format) (#4)
* Add .cmake-format.yaml file
* Add formatting workflow
* provide base input for creating PR
* Update scheme for extracting branch name
- disable running formatting on push to amd-staging branch
* patch .cmake-format.yaml for find_package signature
- apparently cmake-format doesn't format the full signature of find_package
* run formatting (clang-format v11) (#7)
Co-authored-by: jrmadsen <jrmadsen@users.noreply.github.com>
* run cmake formatting (cmake-format) (#6)
Co-authored-by: jrmadsen <jrmadsen@users.noreply.github.com>
---------
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
commit bc4d135fdd8a1a9e51235f18a5d575fd2b3735e6
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Thu Jul 13 12:55:17 2023 -0500
Removing Build cache for potential issues with auto-generated header files (#5)
Change-Id: I9e2319f4335e2f88585ffa6fac2bd88a1c952e6e
commit ce86dea6a311d44d880fa684eb78f3329295e2a4
Author: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>
Date: Thu Jul 13 11:08:58 2023 -0500
Fix decltype(<hsa-function>) function pointer usage (#3)
- the following is done in several places:
decltype(hsa_memory_allocate)* hsa_memory_allocate
- above can cause compiler errors
- replace decltype(<hsa-function>) with decltype(::<hsa-function>)
- this ensures that the type within the decltype is recognized as the global scope HSA function, not the variable
- in many places, the variable has a "_fn" suffix to prevent this issue but added '::' anyway for consistency
commit ac49fdd92a72e9c99394253a02da413a6c2e3b3a
Merge: a07946a 03a0855
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Wed Jul 12 11:36:24 2023 -0500
Merge pull request #2 from ROCm-Developer-Tools/gerrit-amd-staging
Pull from gerrit
commit 03a085588cffe863e8f466de67be1cfb205b675a
Merge: e88cad2 a07946a
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Wed Jul 12 10:57:30 2023 -0500
Merge branch 'amd-staging' into gerrit-amd-staging
commit a07946a5cd4c670c83c27ad1a076a9d4567ce6d7
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 15:46:04 2023 +0000
Enabling Cached Builds
commit 525e494a7f13941077a8fd4ad6840904db4d27d4
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 04:53:54 2023 +0000
Updating missed GPU Targets
commit 42c75862f628c9bee7cfb7dc04dff2619430efbc
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 04:43:02 2023 +0000
Adding V1 Testing
commit 9d72fd4aee85e4b0c12e717060d2730fa5b73be1
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 03:34:31 2023 +0000
Fixing Artifacts directory path
commit f4000cc558b3b2e4676f7994f7ce8c8e6f94518e
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 03:27:26 2023 +0000
Fixing CMake for test build job
commit 2ce8115d4c33948c3c8f957f545a95a04e1d6cd2
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 03:16:18 2023 +0000
Fixing Ubuntu CMake for ubuntu test build
commit 6d0ed439191be900748d0c025157f9d689a73ec7
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 01:28:41 2023 +0000
Removing Navi21
commit e349a7642e5ae5eb03ab9fcd0a0f74f09f78cab5
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 01:14:14 2023 +0000
Removing Navi21
commit fefd02fe68d2a4bca7ec2e381960ad004ee9fc5b
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 00:42:48 2023 +0000
Fixing CMake Job
commit 2ea46abf7bf92643efa8c549fa70346ffbd79d65
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 00:35:13 2023 +0000
Fixing CMake Job
commit d99d681ed1999c5fcf291dc678b11a77205fb0f3
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Wed Jul 12 00:32:13 2023 +0000
Fixing Pull Latest Dockers and CMake Jobs
commit dfc4498072d13b4a1df3a63047d34c682c3d9a29
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Tue Jul 11 23:54:21 2023 +0000
Fixing CMake job
commit 919efe04de707f7c702031be15c3e2c5f8442cbb
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Tue Jul 11 23:52:13 2023 +0000
Adding Pull Last dockers job
commit be1b1256e8b0e05308e8f7e7e69bee3acca55281
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Tue Jul 11 18:25:40 2023 -0500
Update cmake.yml
commit 212299fa4355ae6ec18f9aaacbb79c51ea6c6f97
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Tue Jul 11 18:23:35 2023 -0500
Update cmake.yml
commit 7c2c1327086a61466cc6cac39f70865c051a8bc7
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Tue Jul 11 18:18:53 2023 -0500
Update cmake.yml
commit 191b5ce007e612e814c1d7a3afb4ad398f3852e1
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Tue Jul 11 16:03:22 2023 -0500
Update cmake.yml
commit 8824113d95f3e13c7ce4d0af8e0d9d8f522a6c4a
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Tue Jul 11 16:28:09 2023 +0000
Fixing Pull from Gerrit job name
Change-Id: I9e7ed9a27a13ca49d62c93bdadb30f0057e4d385
commit cc3d5e4b02ffb439e8cc2b3efa53527c376f9982
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Tue Jul 11 16:21:43 2023 +0000
Adding Staging sync job
Change-Id: I0551f43878b0678ce4b3e74e27d62357cf95ad95
commit b9be2eee71380a2e6dd34d520e92d0c4209277a0
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date: Tue Jul 11 15:57:11 2023 +0000
Fixing build.sh
Change-Id: Ia987b0244f0875370d5fe69907b3f5e9cea914de
commit 9eee33a95a1abd656a7ac5ca10a9f245e9825431
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 21:39:46 2023 -0500
Update cmake.yml
commit 7093b85a78497140e8b52632ca2a002bdaeacd62
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 21:33:29 2023 -0500
Update cmake.yml
commit f54697172c72a67740f9fdfa0c217b6ea6931576
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 21:01:26 2023 -0500
Update cmake.yml
commit 1b6620e16f8940386b0f4f04e69e2410d21c0e26
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 20:21:02 2023 -0500
Update cmake.yml
commit a94bec740c6b42c4b79c87bca20fa87b99bf060d
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 19:46:35 2023 -0500
Update cmake.yml
commit 85d6b29d4375a69d575c18ece8542c50f2ddfcc3
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 19:34:39 2023 -0500
Update cmake.yml
commit 8c004887cf1435f1a6214c3d2455299a8a27bd4c
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 19:31:17 2023 -0500
Update cmake.yml
commit a14a9168e17d9348a53c6e9c9a47ba1edb4c4509
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 19:25:46 2023 -0500
Update cmake.yml
commit 000f2f40b84e6a2f7d4becdbf5aed01436ca4c83
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 19:08:18 2023 -0500
Update cmake.yml
commit a28a53d56731cad848fa9133d1c4dbaa8fc7afa7
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 19:03:39 2023 -0500
Update cmake.yml
commit a6a2db01027f0b01fdfbb5997ddb772c7f51b649
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 18:21:53 2023 -0500
Update cmake.yml
commit 118ef2a88b2d44e3207c31c343da3e5e5ec6f176
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 17:55:57 2023 -0500
Update cmake.yml
commit 03c4c232396440cd0be6d2dd7baf4ceea1c2589d
Author: Ammar ELWazir <aelwazir@amd.com>
Date: Mon Jul 10 17:48:49 2023 -0500
Create cmake.yml
Change-Id: I77992f15694e77cbae49c56f9ff02f4f9079235d
373 satır
15 KiB
C++
373 satır
15 KiB
C++
/*
|
|
* =============================================================================
|
|
* ROC Runtime Conformance Release License
|
|
* =============================================================================
|
|
* The University of Illinois/NCSA
|
|
* Open Source License (NCSA)
|
|
*
|
|
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* Developed by:
|
|
*
|
|
* AMD Research and AMD ROC Software Development
|
|
*
|
|
* Advanced Micro Devices, Inc.
|
|
*
|
|
* www.amd.com
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal with the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimers.
|
|
* - Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimers in
|
|
* the documentation and/or other materials provided with the distribution.
|
|
* - Neither the names of <Name of Development Group, Name of Institution>,
|
|
* nor the names of its contributors may be used to endorse or promote
|
|
* products derived from this Software without specific prior written
|
|
* permission.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS WITH THE SOFTWARE.
|
|
*
|
|
*/
|
|
#include <cassert>
|
|
#include <iostream>
|
|
|
|
#include "hsa/hsa.h"
|
|
#include "hsa/hsa_ext_amd.h"
|
|
|
|
#define RET_IF_HSA_ERR(err) \
|
|
{ \
|
|
if ((err) != HSA_STATUS_SUCCESS) { \
|
|
const char* msg = 0; \
|
|
hsa_status_string(err, &msg); \
|
|
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << __FILE__ \
|
|
<< ". Call returned " << err << std::endl; \
|
|
std::cout << msg << std::endl; \
|
|
return (err); \
|
|
} \
|
|
}
|
|
|
|
static const uint32_t kTestFillValue1 = 0xabcdef12;
|
|
static const uint32_t kTestFillValue2 = 0xba5eba11;
|
|
static const uint32_t kTestFillValue3 = 0xfeed5a1e;
|
|
static const uint32_t kTestInitValue = 0xbaadf00d;
|
|
|
|
// This structure holds an agent pointer and associated memory pool to be used
|
|
// for this test program.
|
|
struct async_mem_cpy_agent {
|
|
hsa_agent_t dev;
|
|
hsa_amd_memory_pool_t pool;
|
|
size_t granule;
|
|
void* ptr;
|
|
};
|
|
struct async_mem_cpy_pool_query {
|
|
async_mem_cpy_agent* pool_info;
|
|
hsa_agent_t peer_device;
|
|
};
|
|
struct callback_args {
|
|
struct async_mem_cpy_agent cpu;
|
|
struct async_mem_cpy_agent gpu1;
|
|
struct async_mem_cpy_agent gpu2;
|
|
};
|
|
// Find the least common multiple of 2 numbers
|
|
static uint32_t lcm(uint32_t a, uint32_t b) {
|
|
int tmp_a;
|
|
int tmp_b;
|
|
tmp_a = a;
|
|
tmp_b = b;
|
|
while (tmp_a != tmp_b) {
|
|
if (tmp_a < tmp_b) {
|
|
tmp_a = tmp_a + a;
|
|
} else {
|
|
tmp_b = tmp_b + b;
|
|
}
|
|
}
|
|
return tmp_a;
|
|
}
|
|
// This function is a callback for hsa_amd_agent_iterate_memory_pools()
|
|
// and will test whether the provided memory pool is 1) in the GLOBAL
|
|
// segment, 2) allows allocation and 3) is accessible by the provided
|
|
// agent. The "data" input parameter is assumed to be pointing to a
|
|
// struct async_mem_cpy_agent. If the provided pool meets these criteria,
|
|
// HSA_STATUS_INFO_BREAK is returned.
|
|
static hsa_status_t FindPool(hsa_amd_memory_pool_t in_pool, void* data) {
|
|
hsa_amd_segment_t segment;
|
|
hsa_status_t err;
|
|
if (nullptr == data) {
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
struct async_mem_cpy_pool_query* args = (struct async_mem_cpy_pool_query*)data;
|
|
err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
|
RET_IF_HSA_ERR(err);
|
|
if (segment != HSA_AMD_SEGMENT_GLOBAL) {
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
bool canAlloc;
|
|
err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
|
|
&canAlloc);
|
|
RET_IF_HSA_ERR(err);
|
|
if (!canAlloc) {
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
if (args->peer_device.handle != 0) {
|
|
hsa_amd_memory_pool_access_t access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
|
|
err = hsa_amd_agent_memory_pool_get_info(args->peer_device, in_pool,
|
|
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
|
|
RET_IF_HSA_ERR(err);
|
|
if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
}
|
|
err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
|
|
&args->pool_info->granule);
|
|
RET_IF_HSA_ERR(err);
|
|
args->pool_info->pool = in_pool;
|
|
return HSA_STATUS_INFO_BREAK;
|
|
}
|
|
// This function is meant to be a callback to hsa_iterate_agents. For each
|
|
// input agent the iterator provides as input, this function will check to
|
|
// see if the input agent is a CPU agent. If so, it will update the
|
|
// async_mem_cpy_agent structure pointed to by the input parameter "data".
|
|
// Return values:
|
|
// HSA_STATUS_INFO_BREAK -- CPU agent has been found and stored. Iterator
|
|
// should stop iterating
|
|
// HSA_STATUS_SUCCESS -- CPU agent has not yet been found; iterator
|
|
// should keep iterating
|
|
// Other -- Some error occurred
|
|
static hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
|
|
if (data == NULL) {
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
hsa_device_type_t hsa_device_type;
|
|
hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
|
|
RET_IF_HSA_ERR(err);
|
|
if (hsa_device_type == HSA_DEVICE_TYPE_CPU) {
|
|
struct async_mem_cpy_agent* args = (struct async_mem_cpy_agent*)data;
|
|
args->dev = agent;
|
|
async_mem_cpy_pool_query pool_query;
|
|
pool_query.peer_device.handle = 0;
|
|
pool_query.pool_info = args;
|
|
err = hsa_amd_agent_iterate_memory_pools(agent, FindPool, &pool_query);
|
|
if (err == HSA_STATUS_INFO_BREAK) { // we found what we were looking for
|
|
return HSA_STATUS_INFO_BREAK;
|
|
} else {
|
|
args->dev = {0};
|
|
return err;
|
|
}
|
|
}
|
|
// Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
// This function is meant to be a callback to hsa_iterate_agents. It will
|
|
// attempt to find 2, or at least 1 GPU agent suitable for our test. The data
|
|
// input parameter should point to a callback_args struct. The 2 GPU fields
|
|
// will be updated as GPUs are discovered.
|
|
// Return values:
|
|
// HSA_STATUS_INFO_BREAK -- 2 GPU agents have been found and stored. Iterator
|
|
// should stop iterating
|
|
// HSA_STATUS_SUCCESS -- 2 GPU agents have not yet been found; 0 or 1 may
|
|
// have been found; iterator function should keep iterating
|
|
// Other -- Some error occurred
|
|
static hsa_status_t FindGPUs(hsa_agent_t agent, void* data) {
|
|
if (data == NULL) {
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
hsa_device_type_t hsa_device_type;
|
|
hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
|
|
RET_IF_HSA_ERR(err);
|
|
if (hsa_device_type != HSA_DEVICE_TYPE_GPU) {
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
struct callback_args* args = (struct callback_args*)data;
|
|
struct async_mem_cpy_agent* gpu;
|
|
async_mem_cpy_pool_query pool_query = {0, 0};
|
|
if (args->gpu1.dev.handle == 0) {
|
|
gpu = &args->gpu1;
|
|
} else {
|
|
gpu = &args->gpu2;
|
|
// Check that gpu1 has peer access into the selected pool.
|
|
pool_query.peer_device = args->gpu1.dev;
|
|
}
|
|
// Make sure GPU device has pool host can access
|
|
gpu->dev = agent;
|
|
pool_query.pool_info = gpu;
|
|
err = hsa_amd_agent_iterate_memory_pools(agent, FindPool, &pool_query);
|
|
if (err == HSA_STATUS_INFO_BREAK) {
|
|
if (gpu == &args->gpu2) {
|
|
// We found 2 gpu's
|
|
return HSA_STATUS_INFO_BREAK;
|
|
} else {
|
|
// Keep looking for another gpu
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
} else {
|
|
gpu->dev = {0};
|
|
}
|
|
RET_IF_HSA_ERR(err);
|
|
// Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
// This is the main test, showing various paths of async. copy. Source and
|
|
// destination agents and their respective pools should already be discovered.
|
|
// Additionally, buffer from the pools should already be allocated and availble
|
|
// from the input parameters.
|
|
static hsa_status_t AsyncCpyTest(async_mem_cpy_agent* dst, async_mem_cpy_agent* src,
|
|
callback_args* args, size_t sz, uint32_t val) {
|
|
hsa_status_t err;
|
|
hsa_signal_t copy_signal;
|
|
// Initialize the system and destination buffers with a value so we can later
|
|
// validate it has been overwritten
|
|
void* sysPtr = args->cpu.ptr;
|
|
err = hsa_amd_memory_fill(sysPtr, kTestInitValue, sz / sizeof(uint32_t));
|
|
RET_IF_HSA_ERR(err);
|
|
if (dst->ptr != sysPtr) {
|
|
err = hsa_amd_memory_fill(dst->ptr, kTestInitValue, sz / sizeof(uint32_t));
|
|
RET_IF_HSA_ERR(err);
|
|
}
|
|
// Fill the source buffer with the provided uint32_t value
|
|
err = hsa_amd_memory_fill(src->ptr, val, sz / sizeof(uint32_t));
|
|
RET_IF_HSA_ERR(err);
|
|
// Make sure the target and destination agents have access to the buffer.
|
|
hsa_agent_t ag_list[2] = {dst->dev, src->dev};
|
|
err = hsa_amd_agents_allow_access(2, ag_list, NULL, dst->ptr);
|
|
RET_IF_HSA_ERR(err);
|
|
// Create a signal that will be used to inform us when the copy is done
|
|
err = hsa_signal_create(1, 0, NULL, ©_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
// Do the copy...
|
|
err = hsa_amd_memory_async_copy(dst->ptr, dst->dev, src->ptr, src->dev, sz, 0, NULL, copy_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
// Here we do a blocking wait. Alternatively, we could also use a
|
|
// non-blocking wait in a loop, and do other work while waiting.
|
|
if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, -1,
|
|
HSA_WAIT_STATE_BLOCKED) != 0) {
|
|
printf("Async copy returned error value.\n");
|
|
return HSA_STATUS_ERROR;
|
|
}
|
|
// Verify the copy was successful; copy from the dst buffer to the sysBuf,
|
|
// (if the result is not already in sys. mem.) and check the sysBuf values
|
|
if (dst->ptr != sysPtr) {
|
|
if (src->ptr != sysPtr) {
|
|
// In this case, we need to give the gpu dev that owns dst->ptr access
|
|
// to the system memory we are going to copy to.
|
|
hsa_agent_t ag_list_ck[2] = {dst->dev, args->cpu.dev};
|
|
err = hsa_amd_agents_allow_access(2, ag_list_ck, NULL, sysPtr);
|
|
RET_IF_HSA_ERR(err);
|
|
}
|
|
// Reset signal to 1
|
|
hsa_signal_store_screlease(copy_signal, 1);
|
|
err = hsa_amd_memory_async_copy(sysPtr, args->cpu.dev, dst->ptr, dst->dev, sz, 0, NULL,
|
|
copy_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, -1,
|
|
HSA_WAIT_STATE_BLOCKED) != 0) {
|
|
printf("Async copy returned error value.\n");
|
|
return HSA_STATUS_ERROR;
|
|
}
|
|
}
|
|
// Check that the contents of the buffer are what is expected.
|
|
for (uint32_t i = 0; i < sz / sizeof(uint32_t); ++i) {
|
|
if (reinterpret_cast<uint32_t*>(sysPtr)[i] != val) {
|
|
fprintf(stdout, "Expected 0x%x but got 0x%x in buffer at index %d.\n", val,
|
|
reinterpret_cast<uint32_t*>(sysPtr)[i], i);
|
|
return HSA_STATUS_ERROR;
|
|
}
|
|
}
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
// This program illustrates the usage of the asynchronous copy capability of
|
|
// the RocR runtime library. The program will create a system memory buffer and
|
|
// a local buffer for each GPU, up to 2 GPUs, if the system has at least 2
|
|
// GPUs. The program will copy data to/from the host from/to the GPU. If 2
|
|
// GPUs are available, the program will also copy data from one to the other.
|
|
int main() {
|
|
hsa_status_t err;
|
|
struct callback_args args;
|
|
bool twoGPUs = false;
|
|
err = hsa_init();
|
|
RET_IF_HSA_ERR(err);
|
|
// First, find the cpu agent and associated pool
|
|
args.cpu = {0, 0, 0};
|
|
err = hsa_iterate_agents(FindCPUDevice, reinterpret_cast<void*>(&args.cpu));
|
|
assert(err == HSA_STATUS_INFO_BREAK);
|
|
if (err != HSA_STATUS_INFO_BREAK) {
|
|
return -1;
|
|
}
|
|
// Now, find 1 or 2 (if possible) GPUs and associated pool(s) for our test
|
|
args.gpu1 = {0, 0, 0};
|
|
args.gpu2 = {0, 0, 0};
|
|
err = hsa_iterate_agents(FindGPUs, &args);
|
|
if (err == HSA_STATUS_INFO_BREAK) {
|
|
twoGPUs = true;
|
|
} else {
|
|
// See if we at least have 1 GPU
|
|
if (args.gpu1.dev.handle == 0) {
|
|
fprintf(stdout, "GPU with accessible VRAM not found; at least 1 required. Exiting\n");
|
|
return -1;
|
|
}
|
|
fprintf(stdout,
|
|
"Only 1 GPU found with required VRAM. "
|
|
"Peer-to-Peer copy will be skipped.\n");
|
|
}
|
|
// We will use the smallest amount of allocatable memory that works for all
|
|
// potential sources and destinations of the copy
|
|
size_t sz = lcm(args.cpu.granule, args.gpu1.granule);
|
|
// Allocate memory on each source/destination
|
|
if (twoGPUs) {
|
|
sz = lcm(sz, args.gpu2.granule);
|
|
err = hsa_amd_memory_pool_allocate(args.gpu2.pool, sz, 0,
|
|
reinterpret_cast<void**>(&args.gpu2.ptr));
|
|
RET_IF_HSA_ERR(err);
|
|
}
|
|
err = hsa_amd_memory_pool_allocate(args.cpu.pool, sz, 0, reinterpret_cast<void**>(&args.cpu.ptr));
|
|
RET_IF_HSA_ERR(err);
|
|
err =
|
|
hsa_amd_memory_pool_allocate(args.gpu1.pool, sz, 0, reinterpret_cast<void**>(&args.gpu1.ptr));
|
|
RET_IF_HSA_ERR(err);
|
|
char name[64];
|
|
err = hsa_agent_get_info(args.cpu.dev, HSA_AGENT_INFO_NAME, &name);
|
|
fprintf(stdout, "CPU is \"%s\"\n", name);
|
|
err = hsa_agent_get_info(args.gpu1.dev, HSA_AGENT_INFO_NAME, &name);
|
|
fprintf(stdout, "GPU1 is \"%s\"\n", name);
|
|
if (twoGPUs) {
|
|
err = hsa_agent_get_info(args.gpu2.dev, HSA_AGENT_INFO_NAME, &name);
|
|
fprintf(stdout, "GPU2 is \"%s\"\n", name);
|
|
}
|
|
fprintf(stdout, "Copying %lu bytes from gpu1 memory to system memory...\n", sz);
|
|
err = AsyncCpyTest(&args.cpu, &args.gpu1, &args, sz, kTestFillValue1);
|
|
RET_IF_HSA_ERR(err);
|
|
fprintf(stdout, "Success!\n");
|
|
fprintf(stdout, "Copying %lu bytes from system memory to gpu1 memory...\n", sz);
|
|
err = AsyncCpyTest(&args.gpu1, &args.cpu, &args, sz, kTestFillValue2);
|
|
RET_IF_HSA_ERR(err);
|
|
fprintf(stdout, "Success!\n");
|
|
if (twoGPUs) {
|
|
fprintf(stdout, "Copying %lu bytes from gpu1 memory to gpu2 memory...\n", sz);
|
|
err = AsyncCpyTest(&args.gpu2, &args.gpu1, &args, sz, kTestFillValue3);
|
|
RET_IF_HSA_ERR(err);
|
|
fprintf(stdout, "Success!\n");
|
|
}
|
|
// Clean up
|
|
err = hsa_amd_memory_pool_free(args.cpu.ptr);
|
|
RET_IF_HSA_ERR(err);
|
|
err = hsa_amd_memory_pool_free(args.gpu1.ptr);
|
|
RET_IF_HSA_ERR(err);
|
|
if (twoGPUs) {
|
|
err = hsa_amd_memory_pool_free(args.gpu2.ptr);
|
|
RET_IF_HSA_ERR(err);
|
|
}
|
|
}
|