e79af13068
Add VMM test for memory accounting.
1663 خطوط
59 KiB
C++
1663 خطوط
59 KiB
C++
/*
|
|
* =============================================================================
|
|
* ROC Runtime Conformance Release License
|
|
* =============================================================================
|
|
* The University of Illinois/NCSA
|
|
* Open Source License (NCSA)
|
|
*
|
|
* Copyright (c) 2018, Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* Developed by:
|
|
*
|
|
* AMD Research and AMD ROC Software Development
|
|
*
|
|
* Advanced Micro Devices, Inc.
|
|
*
|
|
* www.amd.com
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal with the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimers.
|
|
* - Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimers in
|
|
* the documentation and/or other materials provided with the distribution.
|
|
* - Neither the names of <Name of Development Group, Name of Institution>,
|
|
* nor the names of its contributors may be used to endorse or promote
|
|
* products derived from this Software without specific prior written
|
|
* permission.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS WITH THE SOFTWARE.
|
|
*
|
|
*/
|
|
|
|
|
|
#include <sys/mman.h>
|
|
#include <fcntl.h>
|
|
#include <algorithm>
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <memory>
|
|
#include <sys/socket.h>
|
|
|
|
#include "suites/functional/virtual_memory.h"
|
|
#include "common/base_rocr_utils.h"
|
|
#include "common/common.h"
|
|
#include "common/helper_funcs.h"
|
|
#include "common/hsatimer.h"
|
|
#include "common/concurrent_utils.h"
|
|
#include "gtest/gtest.h"
|
|
#include "hsa/hsa.h"
|
|
|
|
// Wrap printf to add first or second process indicator
|
|
#define PROCESS_LOG(format, ...) \
|
|
{ \
|
|
if (verbosity() >= VERBOSE_STANDARD || !parentProcess_) { \
|
|
fprintf(stdout, "line:%d P%u: " format, __LINE__, static_cast<int>(!parentProcess_), \
|
|
##__VA_ARGS__); \
|
|
} \
|
|
}
|
|
|
|
// Fork safe ASSERT_EQ.
|
|
#define MSG(y, msg, ...) msg
|
|
#define Y(y, ...) y
|
|
|
|
#define FORK_ASSERT_EQ(x, ...) \
|
|
if ((x) != (Y(__VA_ARGS__))) { \
|
|
if ((x) != (Y(__VA_ARGS__))) { \
|
|
std::cout << MSG(__VA_ARGS__, ""); \
|
|
if (parentProcess_) { \
|
|
shared_->parent_status = -1; \
|
|
} else { \
|
|
shared_->child_status = -1; \
|
|
} \
|
|
ASSERT_EQ(x, Y(__VA_ARGS__)); \
|
|
} \
|
|
}
|
|
|
|
static const char kSubTestSeparator[] = " **************************";
|
|
|
|
static void PrintMemorySubtestHeader(const char* header) {
|
|
std::cout << " *** Virtual Memory Functional Subtest: " << header << " ***" << std::endl;
|
|
}
|
|
|
|
static void PrintAgentNameAndType(hsa_agent_t agent) {
|
|
hsa_status_t err;
|
|
|
|
char ag_name[64];
|
|
hsa_device_type_t ag_type;
|
|
|
|
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, ag_name);
|
|
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
|
|
|
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type);
|
|
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
|
|
|
std::cout << " Agent: " << ag_name << " (";
|
|
switch (ag_type) {
|
|
case HSA_DEVICE_TYPE_CPU:
|
|
std::cout << "CPU)";
|
|
break;
|
|
case HSA_DEVICE_TYPE_GPU:
|
|
std::cout << "GPU)";
|
|
break;
|
|
case HSA_DEVICE_TYPE_DSP:
|
|
std::cout << "DSP)";
|
|
break;
|
|
case HSA_DEVICE_TYPE_AIE:
|
|
std::cout << "AIE)";
|
|
break;
|
|
}
|
|
std::cout << std::endl;
|
|
return;
|
|
}
|
|
|
|
VirtMemoryTestBasic::VirtMemoryTestBasic(void) : TestBase() {
|
|
set_title("ROCr Virtual Memory Basic Tests");
|
|
set_description(" Tests virtual memory API functions");
|
|
}
|
|
|
|
VirtMemoryTestBasic::~VirtMemoryTestBasic(void) {}
|
|
|
|
void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_pool_t pool) {
|
|
hsa_agent_t* agents_accessible;
|
|
hsa_amd_pointer_info_t ptrInfo = {};
|
|
uint32_t num_agents_accessible = 0;
|
|
std::vector<hsa_agent_t> gpus;
|
|
rocrtst::pool_info_t pool_i;
|
|
hsa_device_type_t ag_type;
|
|
char ag_name[64];
|
|
void* addrRangeUnmapped;
|
|
hsa_status_t err;
|
|
void* addrRange;
|
|
|
|
ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type));
|
|
|
|
ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(pool, &pool_i));
|
|
|
|
if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed) return;
|
|
|
|
size_t granule_size = pool_i.alloc_granule;
|
|
const size_t sizeof_addrRangeUnmapped = 10 * granule_size;
|
|
const size_t sizeof_addrRange = 20 * granule_size;
|
|
|
|
ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, sizeof_addrRange, 0, 0));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRangeUnmapped, sizeof_addrRangeUnmapped, 0, 0));
|
|
|
|
/* Verify that pointer info for unmapped VA's return expected values */
|
|
ptrInfo.size = sizeof(ptrInfo);
|
|
ASSERT_SUCCESS(hsa_amd_pointer_info(addrRangeUnmapped, &ptrInfo, &malloc, &num_agents_accessible,
|
|
&agents_accessible));
|
|
ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_RESERVED_ADDR);
|
|
ASSERT_EQ(ptrInfo.hostBaseAddress, addrRangeUnmapped);
|
|
/* For unmapped VA, then size is equal to size of address reservation */
|
|
ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_addrRangeUnmapped);
|
|
ASSERT_EQ(num_agents_accessible, 0);
|
|
|
|
/* Verify that pointer info for unmapped VA offset return expected values */
|
|
ptrInfo.size = sizeof(ptrInfo);
|
|
ASSERT_SUCCESS(hsa_amd_pointer_info(reinterpret_cast<uint8_t*>(addrRangeUnmapped) + 10, &ptrInfo, &malloc,
|
|
&num_agents_accessible, &agents_accessible));
|
|
ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_RESERVED_ADDR);
|
|
ASSERT_EQ(ptrInfo.hostBaseAddress,
|
|
addrRangeUnmapped); // hostBaseAddress is address of reservation instead of offset.
|
|
/* For unmapped VA, then size is equal to size of address reservation */
|
|
ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_addrRangeUnmapped);
|
|
ASSERT_EQ(num_agents_accessible, 0);
|
|
|
|
hsa_amd_vmem_alloc_handle_t mem_handle;
|
|
const size_t sizeof_mem_handle = 10 * granule_size;
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_handle_create(pool, sizeof_mem_handle, MEMORY_TYPE_NONE, 0, &mem_handle));
|
|
|
|
/* Test alloc properties returns correct memory type and pool handle */
|
|
hsa_amd_memory_pool_t poolRet;
|
|
hsa_amd_memory_type_t memTypeRet;
|
|
ASSERT_SUCCESS(hsa_amd_vmem_get_alloc_properties_from_handle(mem_handle, &poolRet, &memTypeRet));
|
|
|
|
ASSERT_EQ(poolRet.handle, pool.handle);
|
|
ASSERT_EQ(memTypeRet, MEMORY_TYPE_NONE);
|
|
|
|
hsa_amd_vmem_alloc_handle_t mem_handleTypePinned;
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_create(pool, sizeof_mem_handle, MEMORY_TYPE_PINNED, 0,
|
|
&mem_handleTypePinned));
|
|
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_get_alloc_properties_from_handle(mem_handleTypePinned, &poolRet, &memTypeRet));
|
|
ASSERT_EQ(poolRet.handle, pool.handle);
|
|
ASSERT_EQ(memTypeRet, MEMORY_TYPE_PINNED);
|
|
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, sizeof_mem_handle, 0, mem_handle, 0));
|
|
|
|
/* Verity pointer info on mapped addresses */
|
|
ptrInfo.size = sizeof(ptrInfo);
|
|
ASSERT_SUCCESS(hsa_amd_pointer_info(addrRange, &ptrInfo, &malloc, &num_agents_accessible,
|
|
&agents_accessible));
|
|
ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_HSA_VMEM);
|
|
ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_mem_handle); // size matches memory handle
|
|
ASSERT_EQ(num_agents_accessible, 0);
|
|
|
|
// Access to each GPU should be None
|
|
for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
|
|
hsa_access_permission_t perm = HSA_ACCESS_PERMISSION_RW;
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_get_access(addrRange, &perm, *gpuIt));
|
|
ASSERT_EQ(perm, HSA_ACCESS_PERMISSION_NONE);
|
|
}
|
|
|
|
/* Set RO Access to all GPUs */
|
|
{
|
|
int descIndex = 0;
|
|
hsa_amd_memory_access_desc_t desc[gpus.size()];
|
|
for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
|
|
desc[descIndex++] = {HSA_ACCESS_PERMISSION_RO, *gpuIt};
|
|
}
|
|
ASSERT_SUCCESS(hsa_amd_vmem_set_access(addrRange, 10 * granule_size, desc, gpus.size()));
|
|
}
|
|
|
|
/* Verity pointer info accessible agents on mapped addresses */
|
|
ptrInfo.size = sizeof(ptrInfo);
|
|
ASSERT_SUCCESS(hsa_amd_pointer_info(addrRange, &ptrInfo, &malloc, &num_agents_accessible,
|
|
&agents_accessible));
|
|
ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_HSA_VMEM);
|
|
ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_mem_handle); // size matches memory handle
|
|
ASSERT_EQ(num_agents_accessible, gpus.size());
|
|
ASSERT_NE(agents_accessible, nullptr);
|
|
|
|
/* Verify agents_accessible is valid */
|
|
for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
|
|
bool found = false;
|
|
for (auto i = 0; i < gpus.size(); i++) {
|
|
if (agents_accessible[i].handle == (*gpuIt).handle) {
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
ASSERT_EQ(found, true);
|
|
}
|
|
|
|
free(agents_accessible);
|
|
|
|
for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
|
|
hsa_access_permission_t perm = HSA_ACCESS_PERMISSION_NONE;
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_get_access(addrRange, &perm, *gpuIt));
|
|
ASSERT_EQ(perm, HSA_ACCESS_PERMISSION_RO);
|
|
|
|
/* addrRangeUnmapped was never mapped, so this is an invalid mapping */
|
|
err = hsa_amd_vmem_get_access(addrRangeUnmapped, &perm, *gpuIt);
|
|
ASSERT_EQ(err, HSA_STATUS_ERROR_INVALID_ALLOCATION);
|
|
}
|
|
|
|
if (gpus.size() > 1) {
|
|
/* Call set_access with a smaller list of agents, this should leave access to
|
|
* the other GPUs unchanged */
|
|
hsa_amd_memory_access_desc_t desc = {HSA_ACCESS_PERMISSION_RW, gpus[1]};
|
|
ASSERT_SUCCESS(hsa_amd_vmem_set_access(addrRange, 10 * granule_size, &desc, 1));
|
|
|
|
size_t i = 0;
|
|
for (i = 0; i < gpus.size(); i++) {
|
|
hsa_access_permission_t perm = HSA_ACCESS_PERMISSION_NONE;
|
|
|
|
/* Only 2nd GPU should have RW access */
|
|
ASSERT_SUCCESS(hsa_amd_vmem_get_access(addrRange, &perm, gpus[i]));
|
|
if (i == 1) {
|
|
ASSERT_EQ(perm, HSA_ACCESS_PERMISSION_RW);
|
|
} else {
|
|
ASSERT_EQ(perm, HSA_ACCESS_PERMISSION_RO);
|
|
}
|
|
}
|
|
}
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, sizeof_mem_handle));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, sizeof_addrRange));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRangeUnmapped, sizeof_addrRangeUnmapped));
|
|
}
|
|
|
|
void VirtMemoryTestBasic::TestCreateDestroy(void) {
|
|
hsa_status_t err;
|
|
std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
|
|
|
|
if (verbosity() > 0) {
|
|
PrintMemorySubtestHeader("CreateDestroy Test");
|
|
}
|
|
bool supp = false;
|
|
ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
|
|
if (!supp) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Virtual Memory API not supported on this system - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
ASSERT_SUCCESS(rocrtst::GetAgentPools(&agent_pools));
|
|
|
|
auto pool_idx = 0;
|
|
for (auto a : agent_pools) {
|
|
for (auto p : a->pools) {
|
|
TestCreateDestroy(a->agent, p);
|
|
}
|
|
}
|
|
|
|
if (verbosity() > 0) {
|
|
std::cout << " Subtest finished" << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
}
|
|
|
|
void VirtMemoryTestBasic::TestRefCount(hsa_agent_t agent, hsa_amd_memory_pool_t pool) {
|
|
rocrtst::pool_info_t pool_i;
|
|
hsa_device_type_t ag_type;
|
|
char ag_name[64];
|
|
void* addrRangeUnmapped;
|
|
hsa_status_t err;
|
|
void* addrRange;
|
|
|
|
ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, ag_name));
|
|
ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type));
|
|
ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(pool, &pool_i));
|
|
|
|
if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed) return;
|
|
|
|
size_t granule_size = pool_i.alloc_granule;
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 10 * granule_size, 0, 0));
|
|
|
|
hsa_amd_vmem_alloc_handle_t mem_handleA1;
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_handle_create(pool, 10 * granule_size, MEMORY_TYPE_NONE, 0, &mem_handleA1));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, 10 * granule_size, 0, mem_handleA1, 0));
|
|
|
|
/* Allocate duplicate handle */
|
|
hsa_amd_vmem_alloc_handle_t mem_handleA1Dup;
|
|
ASSERT_SUCCESS(hsa_amd_vmem_retain_alloc_handle(&mem_handleA1Dup, addrRange));
|
|
|
|
/* Try to unmap with incorrect size */
|
|
err = hsa_amd_vmem_unmap(addrRange, 5 * granule_size);
|
|
ASSERT_NE(err, HSA_STATUS_SUCCESS);
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handleA1));
|
|
|
|
/* Try to release duplicate handle twice - second time should fail */
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handleA1Dup));
|
|
|
|
/* Already released so should fail*/
|
|
err = hsa_amd_vmem_handle_release(mem_handleA1Dup);
|
|
ASSERT_NE(err, HSA_STATUS_SUCCESS);
|
|
|
|
/* Unmap with correct size - un-mapping after releasing the handle is valid */
|
|
ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, 10 * granule_size));
|
|
|
|
/* Try to free with incorrect size */
|
|
err = hsa_amd_vmem_address_free(addrRange, 5 * granule_size);
|
|
ASSERT_NE(err, HSA_STATUS_SUCCESS);
|
|
|
|
/* Free with correct size */
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, 10 * granule_size));
|
|
}
|
|
|
|
void VirtMemoryTestBasic::TestRefCount(void) {
|
|
hsa_status_t err;
|
|
std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
|
|
|
|
if (verbosity() > 0) {
|
|
PrintMemorySubtestHeader("Reference Count Test");
|
|
}
|
|
bool supp = false;
|
|
ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
|
|
if (!supp) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Virtual Memory API not supported on this system - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
ASSERT_SUCCESS(rocrtst::GetAgentPools(&agent_pools));
|
|
|
|
auto pool_idx = 0;
|
|
for (auto a : agent_pools) {
|
|
for (auto p : a->pools) TestRefCount(a->agent, p);
|
|
}
|
|
|
|
if (verbosity() > 0) {
|
|
std::cout << " Subtest finished" << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
}
|
|
|
|
void VirtMemoryTestBasic::TestPartialMapping(hsa_agent_t agent, hsa_amd_memory_pool_t pool) {
|
|
rocrtst::pool_info_t pool_i;
|
|
hsa_device_type_t ag_type;
|
|
char ag_name[64];
|
|
void* addrRangeUnmapped;
|
|
hsa_status_t err;
|
|
void* addrRange;
|
|
|
|
ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type));
|
|
|
|
ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(pool, &pool_i));
|
|
|
|
if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed) return;
|
|
|
|
size_t granule_size = pool_i.alloc_granule;
|
|
|
|
/************************************************************************************************
|
|
Map partial chunks within the address range and confirm what overlaps fail.
|
|
Units below are in multiples of granule_size.
|
|
|
|
------------------------------------------------------------------
|
|
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
|
|
------------------------------------------------------------------
|
|
Step 1: A A A A A A
|
|
Step 2: B B B
|
|
Step 3: B
|
|
Step 4: B B B
|
|
|
|
***********************************************************************************************/
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 15 * granule_size, 0, 0));
|
|
|
|
hsa_amd_vmem_alloc_handle_t mem_handleA;
|
|
|
|
// Step 1
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_handle_create(pool, 8 * granule_size, MEMORY_TYPE_NONE, 0, &mem_handleA));
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map((void*)((uint64_t)addrRange + (2 * granule_size)),
|
|
6 * granule_size, 0, mem_handleA, 0));
|
|
|
|
// Step 2
|
|
hsa_amd_vmem_alloc_handle_t mem_handleB;
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_handle_create(pool, 8 * granule_size, MEMORY_TYPE_NONE, 0, &mem_handleB));
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map((void*)((uint64_t)addrRange + (11 * granule_size)),
|
|
3 * granule_size, 0, mem_handleB, 0));
|
|
|
|
// Step 3
|
|
// Should fail as this is exceeding size of address range
|
|
err = hsa_amd_vmem_map((void*)((uint64_t)addrRange + (14 * granule_size)),
|
|
2 * granule_size, 0, mem_handleB, 0);
|
|
ASSERT_NE(err, HSA_STATUS_SUCCESS);
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map((void*)((uint64_t)addrRange + (14 * granule_size)),
|
|
1 * granule_size, 0, mem_handleB, 0));
|
|
|
|
// Step 4
|
|
// Should fail as this is overlapping with AddressRange[11] already mapped
|
|
err = hsa_amd_vmem_map((void*)((uint64_t)addrRange + (8 * granule_size)),
|
|
4 * granule_size, 0, mem_handleB, 0);
|
|
ASSERT_NE(err, HSA_STATUS_SUCCESS);
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map((void*)((uint64_t)addrRange + (8 * granule_size)),
|
|
3 * granule_size, 0, mem_handleB, 0));
|
|
|
|
// Done, unmap all
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_unmap((void*)((uint64_t)addrRange + (2 * granule_size)), 6 * granule_size));
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_unmap((void*)((uint64_t)addrRange + (8 * granule_size)), 3 * granule_size));
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_unmap((void*)((uint64_t)addrRange + (11 * granule_size)), 3 * granule_size));
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_unmap((void*)((uint64_t)addrRange + (14 * granule_size)), 1 * granule_size));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, 15 * granule_size));
|
|
}
|
|
|
|
void VirtMemoryTestBasic::TestPartialMapping(void) {
|
|
hsa_status_t err;
|
|
std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
|
|
|
|
if (verbosity() > 0) {
|
|
PrintMemorySubtestHeader("Partial Mapping Test");
|
|
}
|
|
|
|
bool supp = false;
|
|
ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
|
|
if (!supp) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Virtual Memory API not supported on this system - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
ASSERT_SUCCESS(rocrtst::GetAgentPools(&agent_pools));
|
|
|
|
auto pool_idx = 0;
|
|
for (auto a : agent_pools) {
|
|
for (auto p : a->pools) TestPartialMapping(a->agent, p);
|
|
}
|
|
|
|
if (verbosity() > 0) {
|
|
std::cout << " Subtest finished" << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
}
|
|
|
|
typedef struct __attribute__((aligned(16))) args_t {
|
|
int* a;
|
|
int* b;
|
|
int* c;
|
|
} args;
|
|
|
|
args* kernArgsVirt = NULL;
|
|
|
|
// Test to check CPU can read & write to GPU memory
|
|
void VirtMemoryTestBasic::CPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent,
|
|
hsa_amd_memory_pool_t device_pool) {
|
|
hsa_status_t err;
|
|
|
|
rocrtst::pool_info_t pool_i;
|
|
ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(device_pool, &pool_i));
|
|
|
|
if (!(pool_i.segment == HSA_AMD_SEGMENT_GLOBAL &&
|
|
pool_i.global_flag == HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED))
|
|
return;
|
|
|
|
hsa_amd_memory_pool_access_t access;
|
|
hsa_amd_agent_memory_pool_get_info(cpuAgent, device_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
|
|
&access);
|
|
if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Test not applicable as system is not large bar - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 || pool_i.alloc_alignment == 0) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Test not applicable. Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
auto max_alloc_size = pool_i.alloc_granule * 100;
|
|
unsigned int max_element = max_alloc_size / sizeof(unsigned int);
|
|
unsigned int* dev_data = NULL;
|
|
unsigned int* host_data = NULL;
|
|
host_data = (unsigned int*)malloc(max_alloc_size);
|
|
|
|
ASSERT_NE(host_data, nullptr);
|
|
|
|
for (unsigned int i = 0; i < max_element; ++i) {
|
|
host_data[i] = i;
|
|
}
|
|
|
|
hsa_amd_memory_access_desc_t permsAccess[] = {{HSA_ACCESS_PERMISSION_RW, cpuAgent},
|
|
{HSA_ACCESS_PERMISSION_RW, gpuAgent}};
|
|
|
|
hsa_amd_vmem_alloc_handle_t mem_handle_host, mem_handle_dev;
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_address_reserve(reinterpret_cast<void**>(&dev_data), max_alloc_size, 0, 0));
|
|
|
|
ASSERT_NE(dev_data, nullptr);
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_create(device_pool, max_alloc_size, MEMORY_TYPE_NONE, 0,
|
|
&mem_handle_dev));
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_map(reinterpret_cast<void*>(dev_data), max_alloc_size, 0, mem_handle_dev, 0));
|
|
|
|
// Give device access to host data
|
|
ASSERT_SUCCESS(hsa_amd_vmem_set_access(dev_data, max_alloc_size, permsAccess, 2));
|
|
|
|
// Verify CPU can read & write to GPU memory
|
|
std::cout << " Verify CPU can read & write to GPU memory" << std::endl;
|
|
for (unsigned int i = 0; i < max_element; ++i) {
|
|
dev_data[i] = i; // Write to gpu memory directly
|
|
}
|
|
|
|
for (unsigned int i = 0; i < max_element; ++i) {
|
|
if (host_data[i] != dev_data[i]) { // Reading GPU memory
|
|
fprintf(stdout,
|
|
" Values not mathing !! host_data[%d]:%d ,"
|
|
"dev_data[%d]\n",
|
|
host_data[i], i, dev_data[i]);
|
|
}
|
|
}
|
|
std::cout << " CPU have read & write to GPU memory successfully" << std::endl;
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_unmap(dev_data, max_alloc_size));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle_dev));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_free(reinterpret_cast<void*>(dev_data), max_alloc_size));
|
|
free(host_data);
|
|
}
|
|
|
|
void VirtMemoryTestBasic::CPUAccessToGPUMemoryTest(void) {
|
|
hsa_status_t err;
|
|
// find all cpu agents
|
|
std::vector<hsa_agent_t> cpus;
|
|
ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus));
|
|
|
|
// find all gpu agents
|
|
std::vector<hsa_agent_t> gpus;
|
|
ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));
|
|
|
|
if (verbosity() > 0) PrintMemorySubtestHeader("CPU To GPU Access test");
|
|
|
|
bool supp = false;
|
|
ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
|
|
if (!supp) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Virtual Memory API not supported on this system - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
for (unsigned int i = 0; i < gpus.size(); ++i) {
|
|
hsa_amd_memory_pool_t gpu_pool;
|
|
memset(&gpu_pool, 0, sizeof(gpu_pool));
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_agent_iterate_memory_pools(gpus[i], rocrtst::GetGlobalMemoryPool, &gpu_pool));
|
|
if (gpu_pool.handle == 0) {
|
|
std::cout << " No global mempool in gpu agent" << std::endl;
|
|
return;
|
|
}
|
|
CPUAccessToGPUMemoryTest(cpus[0], gpus[i], gpu_pool);
|
|
}
|
|
if (verbosity() > 0) {
|
|
std::cout << " Subtest finished" << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
}
|
|
|
|
// Test to check GPU can read & write to CPU memory
|
|
void VirtMemoryTestBasic::GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent,
|
|
hsa_amd_memory_pool_t device_pool) {
|
|
rocrtst::pool_info_t pool_i;
|
|
hsa_device_type_t ag_type;
|
|
char ag_name[64];
|
|
hsa_status_t err;
|
|
|
|
ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(device_pool, &pool_i));
|
|
|
|
if (!pool_i.alloc_allowed || pool_i.segment != HSA_AMD_SEGMENT_GLOBAL ||
|
|
pool_i.global_flag != HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)
|
|
return;
|
|
|
|
hsa_amd_memory_pool_access_t access;
|
|
ASSERT_SUCCESS(hsa_amd_agent_memory_pool_get_info(
|
|
cpuAgent, device_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access));
|
|
|
|
if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Test not applicable as system is not large bar - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
return;
|
|
}
|
|
}
|
|
|
|
hsa_queue_t* queue = NULL; // command queue
|
|
hsa_signal_t signal = {0}; // completion signal
|
|
|
|
size_t& granule_size = pool_i.alloc_granule;
|
|
size_t alloc_size = granule_size * 100;
|
|
static const int kMemoryAllocSize = 1024;
|
|
unsigned int max_element = alloc_size / sizeof(unsigned int);
|
|
|
|
// get queue size
|
|
uint32_t queue_size = 0;
|
|
ASSERT_SUCCESS(hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size));
|
|
|
|
// create queue
|
|
ASSERT_SUCCESS(
|
|
hsa_queue_create(gpuAgent, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue));
|
|
|
|
// Find a memory pool that supports kernel arguments.
|
|
hsa_amd_memory_pool_t kernarg_pool;
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_agent_iterate_memory_pools(cpuAgent, rocrtst::GetKernArgMemoryPool, &kernarg_pool));
|
|
|
|
// Get System Memory Pool on the cpuAgent to allocate host side buffers
|
|
hsa_amd_memory_pool_t global_pool;
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_agent_iterate_memory_pools(cpuAgent, rocrtst::GetGlobalMemoryPool, &global_pool));
|
|
|
|
struct host_data_t {
|
|
int data[kMemoryAllocSize * 4];
|
|
int dup_data[kMemoryAllocSize * 4];
|
|
int result[kMemoryAllocSize * 4];
|
|
};
|
|
|
|
struct dev_data_t {
|
|
int result[kMemoryAllocSize * 4];
|
|
};
|
|
|
|
|
|
struct host_data_t* host_data;
|
|
struct dev_data_t* dev_data;
|
|
|
|
ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(global_pool, sizeof(*host_data), 0,
|
|
reinterpret_cast<void**>(&host_data)));
|
|
|
|
// Allow gpuAgent access to all allocated system memory.
|
|
ASSERT_SUCCESS(hsa_amd_agents_allow_access(1, &gpuAgent, NULL, host_data));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve((void**)&dev_data, sizeof(*dev_data), 0, 0));
|
|
|
|
hsa_amd_vmem_alloc_handle_t mem_handle;
|
|
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_handle_create(device_pool, sizeof(*dev_data), MEMORY_TYPE_NONE, 0, &mem_handle));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map(dev_data, sizeof(*dev_data), 0, mem_handle, 0));
|
|
|
|
// Give host and device access to device data
|
|
hsa_amd_memory_access_desc_t permsAccess[] = {{HSA_ACCESS_PERMISSION_RW, gpuAgent},
|
|
{HSA_ACCESS_PERMISSION_RW, cpuAgent}};
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_set_access(dev_data, sizeof(*dev_data), permsAccess, 2));
|
|
|
|
// Allocate the kernel argument buffer from the kernarg_pool.
|
|
ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
|
|
reinterpret_cast<void**>(&kernArgsVirt)));
|
|
|
|
// initialize the host buffers
|
|
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
|
unsigned int seed = time(NULL);
|
|
host_data->data[i] = 1 + rand_r(&seed) % 1;
|
|
host_data->dup_data[i] = host_data->data[i];
|
|
}
|
|
|
|
memset(host_data->result, 0, sizeof(host_data->result));
|
|
memset(dev_data->result, 0, sizeof(dev_data->result));
|
|
|
|
ASSERT_SUCCESS(hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArgsVirt));
|
|
|
|
kernArgsVirt->a = host_data->data;
|
|
kernArgsVirt->b = host_data->result; // system memory passed to gpu for write
|
|
kernArgsVirt->c = dev_data->result; // gpu memory to verify that gpu read system data
|
|
|
|
// Create the executable, get symbol by name and load the code object
|
|
set_kernel_file_name("gpuReadWrite_kernels.hsaco");
|
|
set_kernel_name("gpuReadWrite");
|
|
ASSERT_SUCCESS(rocrtst::LoadKernelFromObjFile(this, &gpuAgent));
|
|
|
|
// Fill the dispatch packet with
|
|
// workgroup_size, grid_size, kernelArgs and completion signal
|
|
// Put it on the queue and launch the kernel by ringing the doorbell
|
|
|
|
// create completion signal
|
|
ASSERT_SUCCESS(hsa_signal_create(1, 0, NULL, &signal));
|
|
|
|
// create aql packet
|
|
hsa_kernel_dispatch_packet_t aql;
|
|
memset(&aql, 0, sizeof(aql));
|
|
|
|
// initialize aql packet
|
|
aql.workgroup_size_x = 256;
|
|
aql.workgroup_size_y = 1;
|
|
aql.workgroup_size_z = 1;
|
|
aql.grid_size_x = kMemoryAllocSize;
|
|
aql.grid_size_y = 1;
|
|
aql.grid_size_z = 1;
|
|
aql.private_segment_size = 0;
|
|
aql.group_segment_size = 0;
|
|
aql.kernel_object = kernel_object(); // kernel_code;
|
|
aql.kernarg_address = kernArgsVirt;
|
|
aql.completion_signal = signal;
|
|
|
|
// const uint32_t queue_size = queue->size;
|
|
const uint32_t queue_mask = queue->size - 1;
|
|
|
|
// write to command queue
|
|
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
|
|
hsa_queue_store_write_index_relaxed(queue, index + 1);
|
|
|
|
rocrtst::WriteAQLToQueueLoc(queue, index, &aql);
|
|
|
|
hsa_kernel_dispatch_packet_t* q_base_addr =
|
|
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue->base_address);
|
|
rocrtst::AtomicSetPacketHeader(
|
|
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
|
(1 << HSA_PACKET_HEADER_BARRIER) |
|
|
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
|
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE),
|
|
(1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS),
|
|
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(&q_base_addr[index & queue_mask]));
|
|
|
|
// ringdoor bell
|
|
hsa_signal_store_relaxed(queue->doorbell_signal, index);
|
|
// wait for the signal and reset it for future use
|
|
while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
|
|
HSA_WAIT_STATE_ACTIVE)) {
|
|
}
|
|
hsa_signal_store_relaxed(signal, 1);
|
|
|
|
// compare device and host side results
|
|
if (verbosity() > 0) {
|
|
std::cout << " Check GPU has read the system memory" << std::endl;
|
|
}
|
|
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
|
// printf("Verifying data at index[%d]\n", i);
|
|
ASSERT_EQ(dev_data->result[i], host_data->dup_data[i]);
|
|
}
|
|
|
|
if (verbosity() > 0) {
|
|
std::cout << " GPU has read the system memory successfully" << std::endl;
|
|
std::cout << " Check GPU has written to system memory" << std::endl;
|
|
}
|
|
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
|
ASSERT_EQ(host_data->result[i], i);
|
|
}
|
|
|
|
if (verbosity() > 0) {
|
|
std::cout << " GPU has written to system memory successfully" << std::endl;
|
|
}
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_unmap(dev_data, sizeof(*dev_data)));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle));
|
|
|
|
if (dev_data) {
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_free(dev_data, sizeof(*dev_data)));
|
|
}
|
|
|
|
if (host_data) hsa_memory_free(host_data);
|
|
if (kernArgsVirt) {
|
|
hsa_memory_free(kernArgsVirt);
|
|
}
|
|
if (signal.handle) {
|
|
hsa_signal_destroy(signal);
|
|
}
|
|
if (queue) {
|
|
hsa_queue_destroy(queue);
|
|
}
|
|
}
|
|
|
|
void VirtMemoryTestBasic::GPUAccessToCPUMemoryTest(void) {
|
|
hsa_status_t err;
|
|
// find all cpu agents
|
|
std::vector<hsa_agent_t> cpus;
|
|
ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus));
|
|
|
|
// find all gpu agents
|
|
std::vector<hsa_agent_t> gpus;
|
|
ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));
|
|
|
|
if (verbosity() > 0) PrintMemorySubtestHeader("CPU To GPU Access test");
|
|
|
|
bool supp = false;
|
|
ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
|
|
if (!supp) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Virtual Memory API not supported on this system - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
for (unsigned int i = 0; i < gpus.size(); ++i) {
|
|
hsa_amd_memory_pool_t gpu_pool;
|
|
memset(&gpu_pool, 0, sizeof(gpu_pool));
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_agent_iterate_memory_pools(gpus[i], rocrtst::GetGlobalMemoryPool, &gpu_pool));
|
|
if (gpu_pool.handle == 0) {
|
|
std::cout << "no global mempool in GPU agent" << std::endl;
|
|
return;
|
|
}
|
|
GPUAccessToCPUMemoryTest(cpus[0], gpus[i], gpu_pool);
|
|
}
|
|
if (verbosity() > 0) {
|
|
std::cout << " Subtest finished" << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
}
|
|
|
|
// Test to check GPU can read & write to GPU memory
|
|
void VirtMemoryTestBasic::GPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent,
|
|
hsa_amd_memory_pool_t device_pool) {
|
|
rocrtst::pool_info_t pool_i;
|
|
hsa_device_type_t ag_type;
|
|
char ag_name[64];
|
|
hsa_status_t err;
|
|
|
|
ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(device_pool, &pool_i));
|
|
|
|
if (!pool_i.alloc_allowed || pool_i.segment != HSA_AMD_SEGMENT_GLOBAL ||
|
|
pool_i.global_flag != HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)
|
|
return;
|
|
|
|
hsa_amd_memory_pool_access_t access;
|
|
ASSERT_SUCCESS(hsa_amd_agent_memory_pool_get_info(
|
|
cpuAgent, device_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access));
|
|
|
|
if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Test not applicable as system is not large bar - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
return;
|
|
}
|
|
}
|
|
|
|
hsa_queue_t* queue = NULL; // command queue
|
|
hsa_signal_t signal = {0}; // completion signal
|
|
|
|
size_t& granule_size = pool_i.alloc_granule;
|
|
size_t alloc_size = granule_size * 100;
|
|
static const int kMemoryAllocSize = 4096;
|
|
unsigned int max_element = alloc_size / sizeof(unsigned int);
|
|
|
|
// get queue size
|
|
uint32_t queue_size = 0;
|
|
ASSERT_SUCCESS(hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size));
|
|
|
|
// create queue
|
|
ASSERT_SUCCESS(
|
|
hsa_queue_create(gpuAgent, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue));
|
|
|
|
// Find a memory pool that supports kernel arguments.
|
|
hsa_amd_memory_pool_t kernarg_pool;
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_agent_iterate_memory_pools(cpuAgent, rocrtst::GetKernArgMemoryPool, &kernarg_pool));
|
|
|
|
// Get System Memory Pool on the cpuAgent to allocate host side buffers
|
|
hsa_amd_memory_pool_t global_pool;
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_agent_iterate_memory_pools(cpuAgent, rocrtst::GetGlobalMemoryPool, &global_pool));
|
|
|
|
struct host_data_t {
|
|
int data[kMemoryAllocSize * 4];
|
|
int gpuWrite[kMemoryAllocSize * 4];
|
|
int result[kMemoryAllocSize * 4];
|
|
};
|
|
|
|
struct dev_data_t {
|
|
int data[kMemoryAllocSize * 4];
|
|
int result[kMemoryAllocSize * 4];
|
|
};
|
|
|
|
|
|
struct host_data_t* host_data;
|
|
struct dev_data_t* dev_data;
|
|
|
|
ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(global_pool, sizeof(*host_data), 0,
|
|
reinterpret_cast<void**>(&host_data)));
|
|
|
|
// Allow gpuAgent access to all allocated system memory.
|
|
ASSERT_SUCCESS(hsa_amd_agents_allow_access(1, &gpuAgent, NULL, host_data));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve((void**)&dev_data, sizeof(*dev_data), 0, 0));
|
|
|
|
hsa_amd_vmem_alloc_handle_t mem_handle;
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_create(device_pool, sizeof(*dev_data), MEMORY_TYPE_PINNED, 0,
|
|
&mem_handle));
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map(dev_data, sizeof(*dev_data), 0, mem_handle, 0));
|
|
|
|
// Give host and device access to device data
|
|
hsa_amd_memory_access_desc_t permsAccess[] = {{HSA_ACCESS_PERMISSION_RW, gpuAgent}};
|
|
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_vmem_set_access(dev_data, sizeof(*dev_data), permsAccess, ARRAY_SIZE(permsAccess)));
|
|
|
|
// Allocate the kernel argument buffer from the kernarg_pool.
|
|
ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
|
|
reinterpret_cast<void**>(&kernArgsVirt)));
|
|
|
|
// create completion signal
|
|
ASSERT_SUCCESS(hsa_signal_create(1, 0, NULL, &signal));
|
|
|
|
// initialize the host buffers
|
|
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
|
unsigned int seed = time(NULL);
|
|
host_data->data[i] = 1 + rand_r(&seed) % 1;
|
|
}
|
|
|
|
ASSERT_SUCCESS(hsa_amd_memory_async_copy(dev_data->data, gpuAgent, host_data->data, cpuAgent,
|
|
kMemoryAllocSize * 4, 0, NULL, signal));
|
|
|
|
while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
|
|
HSA_WAIT_STATE_ACTIVE)) {
|
|
}
|
|
hsa_signal_store_relaxed(signal, 1);
|
|
|
|
memset(host_data->result, 0, sizeof(host_data->result));
|
|
|
|
ASSERT_SUCCESS(hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArgsVirt));
|
|
|
|
|
|
kernArgsVirt->a = dev_data->data;
|
|
kernArgsVirt->b = host_data->gpuWrite; // system memory passed to gpu for write
|
|
kernArgsVirt->c = dev_data->result; // gpu memory to verify that gpu read system data
|
|
|
|
// Create the executable, get symbol by name and load the code object
|
|
set_kernel_file_name("gpuReadWrite_kernels.hsaco");
|
|
set_kernel_name("gpuReadWrite");
|
|
ASSERT_SUCCESS(rocrtst::LoadKernelFromObjFile(this, &gpuAgent));
|
|
|
|
// Fill the dispatch packet with
|
|
// workgroup_size, grid_size, kernelArgs and completion signal
|
|
// Put it on the queue and launch the kernel by ringing the doorbell
|
|
|
|
// create aql packet
|
|
hsa_kernel_dispatch_packet_t aql;
|
|
memset(&aql, 0, sizeof(aql));
|
|
|
|
// initialize aql packet
|
|
aql.workgroup_size_x = 256;
|
|
aql.workgroup_size_y = 1;
|
|
aql.workgroup_size_z = 1;
|
|
aql.grid_size_x = kMemoryAllocSize;
|
|
aql.grid_size_y = 1;
|
|
aql.grid_size_z = 1;
|
|
aql.private_segment_size = 0;
|
|
aql.group_segment_size = 0;
|
|
aql.kernel_object = kernel_object(); // kernel_code;
|
|
aql.kernarg_address = kernArgsVirt;
|
|
aql.completion_signal = signal;
|
|
|
|
const uint32_t queue_mask = queue->size - 1;
|
|
|
|
// write to command queue
|
|
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
|
|
hsa_queue_store_write_index_relaxed(queue, index + 1);
|
|
|
|
rocrtst::WriteAQLToQueueLoc(queue, index, &aql);
|
|
|
|
hsa_kernel_dispatch_packet_t* q_base_addr =
|
|
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue->base_address);
|
|
rocrtst::AtomicSetPacketHeader(
|
|
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
|
(1 << HSA_PACKET_HEADER_BARRIER) |
|
|
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
|
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE),
|
|
(1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS),
|
|
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(&q_base_addr[index & queue_mask]));
|
|
|
|
// ringdoor bell
|
|
hsa_signal_store_relaxed(queue->doorbell_signal, index);
|
|
// wait for the signal and reset it for future use
|
|
while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
|
|
HSA_WAIT_STATE_ACTIVE)) {
|
|
}
|
|
hsa_signal_store_relaxed(signal, 1);
|
|
|
|
ASSERT_SUCCESS(hsa_amd_memory_async_copy(host_data->result, cpuAgent, dev_data->result, gpuAgent,
|
|
kMemoryAllocSize * 4, 0, NULL, signal));
|
|
|
|
while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
|
|
HSA_WAIT_STATE_ACTIVE)) {
|
|
}
|
|
// compare device and host side results
|
|
if (verbosity() > 0) {
|
|
std::cout << " Check GPU has read the system memory" << std::endl;
|
|
}
|
|
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
|
// printf("Verifying data at index[%d]\n", i);
|
|
ASSERT_EQ(host_data->result[i], host_data->data[i]);
|
|
}
|
|
|
|
if (verbosity() > 0) {
|
|
std::cout << " GPU has read the system memory successfully" << std::endl;
|
|
std::cout << " Check GPU has written to system memory" << std::endl;
|
|
}
|
|
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
|
ASSERT_EQ(host_data->gpuWrite[i], i);
|
|
}
|
|
|
|
if (verbosity() > 0) {
|
|
std::cout << " GPU has written to system memory successfully" << std::endl;
|
|
}
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_unmap(dev_data, sizeof(*dev_data)));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle));
|
|
|
|
if (dev_data) {
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_free(dev_data, sizeof(*dev_data)));
|
|
}
|
|
|
|
if (host_data) hsa_memory_free(host_data);
|
|
if (kernArgsVirt) {
|
|
hsa_memory_free(kernArgsVirt);
|
|
}
|
|
if (signal.handle) {
|
|
hsa_signal_destroy(signal);
|
|
}
|
|
if (queue) {
|
|
hsa_queue_destroy(queue);
|
|
}
|
|
}
|
|
|
|
void VirtMemoryTestBasic::GPUAccessToGPUMemoryTest(void) {
|
|
hsa_status_t err;
|
|
// find all cpu agents
|
|
std::vector<hsa_agent_t> cpus;
|
|
ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus));
|
|
|
|
// find all gpu agents
|
|
std::vector<hsa_agent_t> gpus;
|
|
ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));
|
|
|
|
if (verbosity() > 0) PrintMemorySubtestHeader("GPU To GPU Access test");
|
|
|
|
bool supp = false;
|
|
ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
|
|
if (!supp) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Virtual Memory API not supported on this system - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
for (unsigned int i = 0; i < gpus.size(); ++i) {
|
|
hsa_amd_memory_pool_t gpu_pool;
|
|
memset(&gpu_pool, 0, sizeof(gpu_pool));
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_agent_iterate_memory_pools(gpus[i], rocrtst::GetGlobalMemoryPool, &gpu_pool));
|
|
if (gpu_pool.handle == 0) {
|
|
std::cout << "no global mempool in GPU agent" << std::endl;
|
|
return;
|
|
}
|
|
GPUAccessToGPUMemoryTest(cpus[0], gpus[i], gpu_pool);
|
|
}
|
|
if (verbosity() > 0) {
|
|
std::cout << " Subtest finished" << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
}
|
|
|
|
void VirtMemoryTestBasic::MemoryAccountingTest(hsa_agent_t agent, hsa_amd_memory_pool_t pool) {
|
|
if (verbosity() > 0) {
|
|
PrintAgentNameAndType(agent);
|
|
}
|
|
|
|
hsa_status_t err;
|
|
hsa_amd_memory_pool_access_t access;
|
|
ASSERT_SUCCESS(hsa_amd_agent_memory_pool_get_info(agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access));
|
|
if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) return;
|
|
|
|
rocrtst::pool_info_t pool_info;
|
|
err = rocrtst::AcquirePoolInfo(pool, &pool_info);
|
|
if (err != HSA_STATUS_SUCCESS || !pool_info.alloc_allowed) return;
|
|
|
|
if (pool_info.segment != HSA_AMD_SEGMENT_GLOBAL) return;
|
|
|
|
hsa_device_type_t device_type;
|
|
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
|
|
if (err != HSA_STATUS_SUCCESS || device_type != HSA_DEVICE_TYPE_GPU) return;
|
|
|
|
bool vmem_supported = false;
|
|
err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, &vmem_supported);
|
|
if (err != HSA_STATUS_SUCCESS || !vmem_supported) return;
|
|
|
|
rocrtst::pool_info_t pool_i;
|
|
|
|
ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(pool, &pool_i));
|
|
|
|
size_t granule_size = pool_i.alloc_rec_granule;
|
|
size_t allocation_size = 10 * granule_size;
|
|
|
|
size_t amount_begin = 0, amount_current = 0;
|
|
void* reserved_addr = nullptr;
|
|
hsa_amd_vmem_alloc_handle_t mem_handle;
|
|
|
|
ASSERT_SUCCESS(hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL, &amount_begin));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&reserved_addr, allocation_size, 0, 0));
|
|
|
|
hsa_amd_pointer_info_t ptr_info = {};
|
|
ptr_info.size = sizeof(ptr_info);
|
|
ASSERT_SUCCESS(hsa_amd_pointer_info(reserved_addr, &ptr_info, nullptr, nullptr, nullptr));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_create(pool, allocation_size, MEMORY_TYPE_NONE, 0, &mem_handle));
|
|
|
|
ASSERT_SUCCESS(hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL, &amount_current));
|
|
ASSERT_NEAR(amount_begin - amount_current, allocation_size, 4096);
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map(reserved_addr, allocation_size, 0, mem_handle, 0));
|
|
|
|
hsa_amd_memory_access_desc_t access_desc = {HSA_ACCESS_PERMISSION_RW, agent};
|
|
ASSERT_SUCCESS(hsa_amd_vmem_set_access(reserved_addr, allocation_size, &access_desc, 1));
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_unmap(reserved_addr, allocation_size));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_free(reserved_addr, allocation_size));
|
|
|
|
ASSERT_SUCCESS(hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL, &amount_current));
|
|
ASSERT_EQ(amount_begin, amount_current);
|
|
}
|
|
|
|
void VirtMemoryTestBasic::MemoryAccountingTest(void) {
|
|
hsa_status_t err;
|
|
std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
|
|
|
|
if (verbosity() > 0) {
|
|
PrintMemorySubtestHeader("Memory Accounting Test");
|
|
}
|
|
|
|
err = rocrtst::GetAgentPools(&agent_pools);
|
|
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
|
|
|
auto pool_idx = 0;
|
|
for (auto a : agent_pools) {
|
|
for (auto p : a->pools) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Pool " << pool_idx++ << ":" << std::endl;
|
|
}
|
|
MemoryAccountingTest(a->agent, p);
|
|
}
|
|
}
|
|
|
|
if (verbosity() > 0) {
|
|
std::cout << " Subtest finished" << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
}
|
|
|
|
void VirtMemoryTestBasic::NonContiguousChunks(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent,
|
|
hsa_amd_memory_pool_t device_pool) {
|
|
rocrtst::pool_info_t pool_i;
|
|
hsa_device_type_t ag_type;
|
|
char ag_name[64];
|
|
hsa_status_t err;
|
|
|
|
ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(device_pool, &pool_i));
|
|
|
|
if (!pool_i.alloc_allowed || pool_i.segment != HSA_AMD_SEGMENT_GLOBAL ||
|
|
pool_i.global_flag != HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)
|
|
return;
|
|
|
|
hsa_amd_memory_pool_access_t access;
|
|
ASSERT_SUCCESS(hsa_amd_agent_memory_pool_get_info(
|
|
cpuAgent, device_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access));
|
|
|
|
if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Test not applicable as system is not large bar - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
return;
|
|
}
|
|
}
|
|
|
|
size_t& granule_size = pool_i.alloc_granule;
|
|
size_t alloc_size = granule_size * 512;
|
|
const unsigned NUM_BUFFERS = 6;
|
|
|
|
void* addr;
|
|
void* addr_chunks[NUM_BUFFERS];
|
|
hsa_amd_vmem_alloc_handle_t mem_handles[NUM_BUFFERS];
|
|
|
|
static const int kMemoryAllocSize = 4096;
|
|
unsigned int max_element = alloc_size / sizeof(unsigned int);
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve((void**)&addr, NUM_BUFFERS * alloc_size, 0, 0));
|
|
|
|
for (unsigned i = 0; i < NUM_BUFFERS; i++) {
|
|
// Allocate 6 separate memory memory handles
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_create(device_pool, alloc_size, MEMORY_TYPE_PINNED, 0,
|
|
&(mem_handles[i])));
|
|
addr_chunks[i] = ((uint8_t*)addr) + (i * alloc_size);
|
|
}
|
|
|
|
for (unsigned i = 0; i < NUM_BUFFERS; i++) {
|
|
// Map each chunk in reverse order
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map(addr_chunks[i], alloc_size, 0, mem_handles[NUM_BUFFERS - i - 1],
|
|
alloc_size));
|
|
}
|
|
|
|
hsa_amd_memory_access_desc_t permsAccess[] = {{HSA_ACCESS_PERMISSION_RW, gpuAgent}};
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_set_access(addr, NUM_BUFFERS * alloc_size, permsAccess,
|
|
ARRAY_SIZE(permsAccess)));
|
|
|
|
for (unsigned i = 0; i < NUM_BUFFERS; i++) {
|
|
// TODO Map them in opposite order
|
|
ASSERT_SUCCESS(hsa_amd_vmem_unmap(addr_chunks[i], alloc_size));
|
|
}
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_free(addr, NUM_BUFFERS * alloc_size));
|
|
}
|
|
|
|
void VirtMemoryTestBasic::NonContiguousChunks(void) {
|
|
hsa_status_t err;
|
|
|
|
if (verbosity() > 0) PrintMemorySubtestHeader("GPU To GPU Access test");
|
|
|
|
bool supp = false;
|
|
ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
|
|
if (!supp) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Virtual Memory API not supported on this system - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
// find all cpu agents
|
|
std::vector<hsa_agent_t> cpus;
|
|
ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus));
|
|
|
|
// find all gpu agents
|
|
std::vector<hsa_agent_t> gpus;
|
|
ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));
|
|
|
|
for (unsigned int i = 0; i < gpus.size(); ++i) {
|
|
hsa_amd_memory_pool_t gpu_pool;
|
|
memset(&gpu_pool, 0, sizeof(gpu_pool));
|
|
ASSERT_SUCCESS(
|
|
hsa_amd_agent_iterate_memory_pools(gpus[i], rocrtst::GetGlobalMemoryPool, &gpu_pool));
|
|
if (gpu_pool.handle == 0) {
|
|
std::cout << "no global mempool in GPU agent" << std::endl;
|
|
return;
|
|
}
|
|
NonContiguousChunks(cpus[0], gpus[i], gpu_pool);
|
|
}
|
|
if (verbosity() > 0) {
|
|
std::cout << " Subtest finished" << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
}
|
|
|
|
void VirtMemoryTestBasic::SetUp(void) {
|
|
hsa_status_t err;
|
|
|
|
TestBase::SetUp();
|
|
|
|
ASSERT_SUCCESS(rocrtst::SetDefaultAgents(this));
|
|
ASSERT_SUCCESS(rocrtst::SetPoolsTypical(this));
|
|
|
|
return;
|
|
}
|
|
|
|
void VirtMemoryTestBasic::Run(void) {
|
|
// Compare required profile for this test case with what we're actually
|
|
// running on
|
|
if (!rocrtst::CheckProfile(this)) {
|
|
return;
|
|
}
|
|
|
|
TestBase::Run();
|
|
}
|
|
|
|
void VirtMemoryTestBasic::DisplayTestInfo(void) { TestBase::DisplayTestInfo(); }
|
|
|
|
void VirtMemoryTestBasic::DisplayResults(void) const {
|
|
// Compare required profile for this test case with what we're actually
|
|
// running on
|
|
if (!rocrtst::CheckProfile(this)) {
|
|
return;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
void VirtMemoryTestBasic::Close() {
|
|
// This will close handles opened within rocrtst utility calls and call
|
|
// hsa_shut_down(), so it should be done after other hsa cleanup
|
|
TestBase::Close();
|
|
}
|
|
|
|
VirtMemoryTestInterProcess::VirtMemoryTestInterProcess(void) : TestBase() {
|
|
set_title("ROCr Virtual Memory Test - InterProcess ");
|
|
set_description(" Tests Virtual Memory API with memory shared between two processes");
|
|
}
|
|
|
|
VirtMemoryTestInterProcess::~VirtMemoryTestInterProcess(void) {}
|
|
|
|
// See if the other process wrote an error value to the token; if not, write
|
|
// the newVal to the token.
|
|
static int CheckAndSetToken(std::atomic<int>* token, int newVal) {
|
|
if (*token == -1) {
|
|
return -1;
|
|
} else {
|
|
*token = newVal;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void ClearShared(SharedVirtMem* s) {
|
|
s->token = 0;
|
|
s->count = 0;
|
|
s->size = 0;
|
|
s->child_status = 0;
|
|
s->parent_status = 0;
|
|
memset(&s->sv, 0, sizeof(s->sv));
|
|
}
|
|
|
|
// Any 1-time setup involving member variables used in the rest of the test
|
|
// should be done here.
|
|
void VirtMemoryTestInterProcess::SetUp(void) {
|
|
hsa_status_t err;
|
|
|
|
// We must fork process before doing HSA stuff, specifically, hsa_init, as
|
|
// each process needs to do this.
|
|
// Allocate linux shared_ memory.
|
|
shared_ = reinterpret_cast<SharedVirtMem*>(mmap(
|
|
nullptr, sizeof(SharedVirtMem), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0));
|
|
ASSERT_NE(shared_, MAP_FAILED) << "mmap failed to allocated shared_ memory";
|
|
|
|
// Initialize shared control block to zeros. The field "token"
|
|
// is used to signal state changes between the 2 processes.
|
|
ClearShared(shared_);
|
|
|
|
if (socketpair(AF_UNIX, SOCK_DGRAM, 0, shared_->sv) != 0) {
|
|
std::cout << "Failed to create Unix-domain socket pair" << std::endl;
|
|
ASSERT_EQ(0, 1);
|
|
}
|
|
|
|
// Spawn second process and verify communication
|
|
child_ = 0;
|
|
child_ = fork();
|
|
ASSERT_NE(-1, child_) << "fork failed";
|
|
std::atomic<int>* token = &shared_->token;
|
|
if (child_ != 0) {
|
|
parentProcess_ = true;
|
|
|
|
// Signal to other process we are waiting, and then wait...
|
|
*token = 1;
|
|
while (*token == 1) {
|
|
sched_yield();
|
|
}
|
|
|
|
PROCESS_LOG("Second process observed, handshake...\n");
|
|
*token = 1;
|
|
while (*token == 1) {
|
|
sched_yield();
|
|
}
|
|
|
|
} else {
|
|
parentProcess_ = false;
|
|
set_verbosity(0);
|
|
PROCESS_LOG("Second process running.\n");
|
|
|
|
while (*token == 0) {
|
|
sched_yield();
|
|
}
|
|
|
|
int ret;
|
|
ret = CheckAndSetToken(token, 0);
|
|
ASSERT_EQ(0, ret) << "Error detected in child process\n";
|
|
// Wait for handshake
|
|
while (*token == 0) {
|
|
sched_yield();
|
|
}
|
|
ret = CheckAndSetToken(token, 0);
|
|
ASSERT_EQ(0, ret) << "Error detected in child process\n";
|
|
}
|
|
|
|
TestBase::SetUp();
|
|
|
|
ASSERT_SUCCESS(rocrtst::SetDefaultAgents(this));
|
|
ASSERT_SUCCESS(rocrtst::SetPoolsTypical(this));
|
|
|
|
ASSERT_SUCCESS(hsa_amd_memory_pool_get_info(
|
|
device_pool(), HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &min_gpu_mem_granule));
|
|
|
|
ASSERT_SUCCESS(hsa_amd_memory_pool_get_info(
|
|
device_pool(), HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE, &rec_gpu_mem_granule));
|
|
|
|
return;
|
|
}
|
|
|
|
void VirtMemoryTestInterProcess::Run(void) {
|
|
// Compare required profile for this test case with what we're actually
|
|
// running on
|
|
if (!rocrtst::CheckProfile(this)) {
|
|
return;
|
|
}
|
|
|
|
TestBase::Run();
|
|
|
|
// Note: Close() (and hsa_shut_down()) will be called from main()
|
|
// processOne is true for parent process, false for child process
|
|
if (parentProcess_) {
|
|
ParentProcessImpl();
|
|
} else {
|
|
ChildProcessImpl();
|
|
exit(0);
|
|
}
|
|
}
|
|
|
|
void VirtMemoryTestInterProcess::DisplayTestInfo(void) { TestBase::DisplayTestInfo(); }
|
|
|
|
void VirtMemoryTestInterProcess::DisplayResults(void) const {
|
|
// Compare required profile for this test case with what we're actually
|
|
// running on
|
|
if (!rocrtst::CheckProfile(this)) {
|
|
return;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
void VirtMemoryTestInterProcess::Close() {
|
|
// This will close handles opened within rocrtst utility calls and call
|
|
// hsa_shut_down(), so it should be done after other hsa cleanup
|
|
TestBase::Close();
|
|
}
|
|
|
|
/* Send the dmabuf_fd to another process via Unix socket */
|
|
int VirtMemoryTestInterProcess::SendDmaBufFd(int socket, int dmabuf_fd) {
|
|
char* iov_str = (char*)"rocrtst";
|
|
struct msghdr msg = {0};
|
|
char buf[CMSG_SPACE(sizeof(dmabuf_fd))];
|
|
|
|
memset(buf, '\0', sizeof(buf));
|
|
|
|
struct iovec io = {.iov_base = iov_str, .iov_len = strlen(iov_str)};
|
|
|
|
msg.msg_iov = &io;
|
|
msg.msg_iovlen = 1;
|
|
msg.msg_control = buf;
|
|
msg.msg_controllen = sizeof(buf);
|
|
|
|
struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
|
|
cmsg->cmsg_level = SOL_SOCKET;
|
|
cmsg->cmsg_type = SCM_RIGHTS;
|
|
cmsg->cmsg_len = CMSG_LEN(sizeof(dmabuf_fd));
|
|
|
|
// memmove(CMSG_DATA(cmsg), &dmabuf_fd, sizeof(dmabuf_fd));
|
|
memcpy(CMSG_DATA(cmsg), &dmabuf_fd, sizeof(dmabuf_fd));
|
|
|
|
msg.msg_controllen = CMSG_SPACE(sizeof(dmabuf_fd));
|
|
|
|
size_t sent = sendmsg(socket, &msg, 0);
|
|
|
|
return (sent < 0) ? -1 : 0;
|
|
}
|
|
|
|
/* Receive the dmabuf_fd to from process via Unix socket */
|
|
int VirtMemoryTestInterProcess::ReceiveDmaBufFd(int socket) {
|
|
struct msghdr msg = {0};
|
|
|
|
/* On Mac OS X, the struct iovec is needed, even if it points to minimal data */
|
|
char m_buffer[1];
|
|
struct iovec io = {.iov_base = m_buffer, .iov_len = sizeof(m_buffer)};
|
|
msg.msg_iov = &io;
|
|
msg.msg_iovlen = 1;
|
|
|
|
char c_buffer[256];
|
|
msg.msg_control = c_buffer;
|
|
msg.msg_controllen = sizeof(c_buffer);
|
|
|
|
size_t rcv = recvmsg(socket, &msg, 0);
|
|
if (rcv < 0) return -1;
|
|
|
|
struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
|
|
|
|
int fd;
|
|
memmove(&fd, CMSG_DATA(cmsg), sizeof(fd));
|
|
|
|
return fd;
|
|
}
|
|
|
|
void VirtMemoryTestInterProcess::ParentProcessImpl() {
|
|
hsa_status_t err;
|
|
|
|
void* addrRange = NULL;
|
|
|
|
bool supp = false;
|
|
ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
|
|
if (!supp) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Virtual Memory API not supported on this system - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 20 * rec_gpu_mem_granule, 0, 0));
|
|
|
|
hsa_amd_vmem_alloc_handle_t exported_handle;
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_create(device_pool(), 20 * rec_gpu_mem_granule,
|
|
MEMORY_TYPE_NONE, 0, &exported_handle));
|
|
|
|
int dmabuf_fd;
|
|
ASSERT_SUCCESS(hsa_amd_vmem_export_shareable_handle(&dmabuf_fd, exported_handle, 0));
|
|
ASSERT_GE(dmabuf_fd, 0);
|
|
|
|
// Signal child process that the gpu buffer is ready to read.
|
|
PROCESS_LOG("Parent: Signalling child proces process\n");
|
|
CheckAndSetToken(&shared_->token, 1);
|
|
|
|
close(shared_->sv[1]);
|
|
ASSERT_EQ(SendDmaBufFd(shared_->sv[0], dmabuf_fd), 0);
|
|
|
|
hsa_amd_vmem_alloc_handle_t imported_handle;
|
|
ASSERT_SUCCESS(hsa_amd_vmem_import_shareable_handle(dmabuf_fd, &imported_handle));
|
|
|
|
/* Test importing same handle twice */
|
|
hsa_amd_vmem_alloc_handle_t imported_handle2;
|
|
ASSERT_SUCCESS(hsa_amd_vmem_import_shareable_handle(dmabuf_fd, &imported_handle2));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, 10 * rec_gpu_mem_granule, 0, imported_handle, 0));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, 10 * rec_gpu_mem_granule));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(imported_handle));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(imported_handle2));
|
|
|
|
PROCESS_LOG("Parent: Waiting for child process to signal\n");
|
|
while (shared_->token == 1) {
|
|
sched_yield();
|
|
}
|
|
if (shared_->token != 2) {
|
|
shared_->token = -1;
|
|
}
|
|
FORK_ASSERT_EQ(2, shared_->token, "Parent: Error detected in signaling token\n");
|
|
PROCESS_LOG("Parent: Waking upon signal from child process\n");
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(exported_handle));
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, 20 * rec_gpu_mem_granule));
|
|
|
|
PROCESS_LOG("Parent: Virtual Memory test PASSED\n");
|
|
}
|
|
|
|
void VirtMemoryTestInterProcess::ChildProcessImpl() {
|
|
int dmabuf_fd = -1;
|
|
bool supp = false;
|
|
hsa_status_t err;
|
|
ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
|
|
if (!supp) {
|
|
if (verbosity() > 0) {
|
|
std::cout << " Virtual Memory API not supported on this system - Skipping." << std::endl;
|
|
std::cout << kSubTestSeparator << std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
void* addrRange = NULL;
|
|
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 20 * rec_gpu_mem_granule, 0, 0));
|
|
|
|
// Yield until shared token value changes i.e. is updated by parent.
|
|
// Validate parent's update is per expectation
|
|
PROCESS_LOG("Child: Waiting for parent process to signal\n");
|
|
while (shared_->token == 0) {
|
|
sched_yield();
|
|
}
|
|
if (shared_->token != 1) {
|
|
shared_->token = -1;
|
|
}
|
|
FORK_ASSERT_EQ(1, shared_->token, "Child: Error detected in signaling token\n");
|
|
PROCESS_LOG("Child: Waking upon signal from parent process\n");
|
|
|
|
close(shared_->sv[0]);
|
|
dmabuf_fd = ReceiveDmaBufFd(shared_->sv[1]);
|
|
|
|
hsa_amd_vmem_alloc_handle_t imported_handle;
|
|
ASSERT_SUCCESS(hsa_amd_vmem_import_shareable_handle(dmabuf_fd, &imported_handle));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, 10 * rec_gpu_mem_granule, 0, imported_handle, 0));
|
|
ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, 10 * rec_gpu_mem_granule));
|
|
|
|
PROCESS_LOG("Child: Signalling parent process\n");
|
|
CheckAndSetToken(&shared_->token, 2);
|
|
|
|
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(imported_handle));
|
|
|
|
PROCESS_LOG("Child: Virtual Memory test PASSED\n");
|
|
}
|