From 875fb40a032695b673d09fb1ff1b38cfda1ea589 Mon Sep 17 00:00:00 2001 From: David Yat Sin <77975354+dayatsin-amd@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:21:47 -0400 Subject: [PATCH] Dayatsin/develop vmm pointer info (#305) * rocr: hsa_amd_pointer_info to support VMEM pointers Extend hsa_amd_pointer_info to support virtual memory addresses. If hsa_amd_pointer_info is called on an address that is reserved but not mapped to memory, then the pointer type will be reported as HSA_EXT_POINTER_TYPE_RESERVED_ADDR. If hsa_amd_pointer_info is called on an address that is mapped, then the pointer type will be reported as HSA_EXT_POINTER_TYPE_HSA_VMEM * rocrtst: VirtMemory_Basic_Test test for pointer info Extend rocrtstFunc.VirtMemory_Basic_Test to test for hsa_amd_pointer_info * rocrtst: Add SVM Memory Test --- .../rocrtst/suites/functional/svm_memory.cc | 362 ++++++++++++++++++ .../rocrtst/suites/functional/svm_memory.h | 82 ++++ .../suites/functional/virtual_memory.cc | 75 +++- .../rocrtst/suites/test_common/main.cc | 9 + .../runtime/hsa-runtime/core/inc/runtime.h | 4 + .../hsa-runtime/core/runtime/runtime.cpp | 120 ++++-- .../runtime/hsa-runtime/inc/hsa_ext_amd.h | 9 +- 7 files changed, 624 insertions(+), 37 deletions(-) create mode 100644 projects/rocr-runtime/rocrtst/suites/functional/svm_memory.cc create mode 100644 projects/rocr-runtime/rocrtst/suites/functional/svm_memory.h diff --git a/projects/rocr-runtime/rocrtst/suites/functional/svm_memory.cc b/projects/rocr-runtime/rocrtst/suites/functional/svm_memory.cc new file mode 100644 index 0000000000..0e7b97dd7d --- /dev/null +++ b/projects/rocr-runtime/rocrtst/suites/functional/svm_memory.cc @@ -0,0 +1,362 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2025, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include +#include +#include + +#include "suites/functional/svm_memory.h" +#include "common/base_rocr_utils.h" +#include "common/common.h" +#include "common/helper_funcs.h" +#include "common/hsatimer.h" +//#include "common/concurrent_utils.h" +#include "gtest/gtest.h" +#include "hsa/hsa.h" + +// Wrap printf to add first or second process indicator +#define PROCESS_LOG(format, ...) \ + { \ + if (verbosity() >= VERBOSE_STANDARD || !parentProcess_) { \ + fprintf(stdout, "line:%d P%u: " format, __LINE__, static_cast(!parentProcess_), \ + ##__VA_ARGS__); \ + } \ + } + +// Fork safe ASSERT_EQ. +#define MSG(y, msg, ...) msg +#define Y(y, ...) y + +#define FORK_ASSERT_EQ(x, ...) \ + if ((x) != (Y(__VA_ARGS__))) { \ + if ((x) != (Y(__VA_ARGS__))) { \ + std::cout << MSG(__VA_ARGS__, ""); \ + if (parentProcess_) { \ + shared_->parent_status = -1; \ + } else { \ + shared_->child_status = -1; \ + } \ + ASSERT_EQ(x, Y(__VA_ARGS__)); \ + } \ + } + +static const char kSubTestSeparator[] = " **************************"; + +static void PrintMemorySubtestHeader(const char* header) { + std::cout << " *** Virtual Memory Functional Subtest: " << header << " ***" << std::endl; +} + +SvmMemoryTestBasic::SvmMemoryTestBasic(void) : TestBase() { + set_title("ROCr SVM Memory Basic Tests"); + set_description(" Tests SVM memory API functions"); +} + +SvmMemoryTestBasic::~SvmMemoryTestBasic(void) {} + +// Test to check that GPU can read and write to SVM memory. +void SvmMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_pool_t pool) { + hsa_agent_t* agents_accessible; + hsa_amd_pointer_info_t ptrInfo = {}; + uint32_t num_agents_accessible = 0; + std::vector gpus; + rocrtst::pool_info_t pool_i; + hsa_device_type_t ag_type; + char ag_name[64]; + void* addressRange; + hsa_status_t err; + hsa_agent_t cpu_agent; + + typedef struct __attribute__((aligned(16))) args_t { + int* a; + int* b; + int* c; + } args; + args* kernArgs = NULL; + + static const int kMemoryAllocSize = 1024; + + ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type)); + ASSERT_SUCCESS(hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NEAREST_CPU, &cpu_agent)); + + ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(pool, &pool_i)); + + if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed) return; + + hsa_queue_t* queue = NULL; // command queue + hsa_signal_t signal = {0}; // completion signal + + /* Create a queue to enqueue kernel */ + // get queue size + uint32_t queue_size = 0; + ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size)); + + // create queue + ASSERT_SUCCESS( + hsa_queue_create(agent, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue)); + + // Find a memory pool that supports kernel arguments. + hsa_amd_memory_pool_t kernarg_pool; + ASSERT_SUCCESS( + hsa_amd_agent_iterate_memory_pools(cpu_agent, rocrtst::GetKernArgMemoryPool, &kernarg_pool)); + + struct host_data_t { + int data[kMemoryAllocSize * 4]; + int dup_data[kMemoryAllocSize * 4]; + int result[kMemoryAllocSize * 4]; + }; + + struct dev_data_t { + int result[kMemoryAllocSize * 4]; + }; + + struct host_data_t* host_data = NULL; + struct dev_data_t* dev_data = NULL; + + /* Set up host_data */ + ASSERT_SUCCESS(hsa_amd_vmem_address_reserve((void**)&host_data, sizeof(host_data_t), 0, HSA_AMD_VMEM_ADDRESS_NO_REGISTER)); + ASSERT_NE(host_data, nullptr); + + /* Verify that pointer info for unmapped VA's return expected values */ + ptrInfo.size = sizeof(ptrInfo); + ASSERT_SUCCESS(hsa_amd_pointer_info(host_data, &ptrInfo, nullptr, nullptr, nullptr)); + ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_RESERVED_ADDR); + ASSERT_EQ(ptrInfo.hostBaseAddress, host_data); + /* For unmapped VA, then size is equal to size of address reservation */ + ASSERT_EQ(ptrInfo.sizeInBytes, sizeof(host_data_t)); + ASSERT_EQ(num_agents_accessible, 0); + + ptrInfo.size = sizeof(ptrInfo); + ASSERT_SUCCESS(hsa_amd_pointer_info(&host_data->result, &ptrInfo, nullptr, nullptr, nullptr)); + ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_RESERVED_ADDR); + ASSERT_EQ(ptrInfo.hostBaseAddress, host_data); + /* For unmapped VA, then size is equal to size of address reservation */ + ASSERT_EQ(ptrInfo.sizeInBytes, sizeof(host_data_t)); + ASSERT_EQ(num_agents_accessible, 0); + if (verbosity() > 0) { + std::cout << " Pointer info on reserved address OK" << std::endl; + } + + std::vector host_attrs; + host_attrs.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, cpu_agent.handle}); + host_attrs.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE, agent.handle}); + ASSERT_SUCCESS(hsa_amd_svm_attributes_set(host_data, sizeof(host_data_t), host_attrs.data(), host_attrs.size())); + + /* Set up dev_data */ + ASSERT_SUCCESS(hsa_amd_vmem_address_reserve((void**)&dev_data, sizeof(dev_data_t), 0, HSA_AMD_VMEM_ADDRESS_NO_REGISTER)); + ASSERT_NE(dev_data, nullptr); + + std::vector dev_attrs; + dev_attrs.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, agent.handle}); + dev_attrs.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE, agent.handle}); + + ASSERT_SUCCESS(hsa_amd_svm_attributes_set(dev_data, sizeof(dev_data_t), dev_attrs.data(), dev_attrs.size())); + + // initialize the host buffers + for (int i = 0; i < kMemoryAllocSize; ++i) { + unsigned int seed = time(NULL); + host_data->data[i] = 1 + rand_r(&seed) % 1; + host_data->dup_data[i] = host_data->data[i]; + } + + memset(host_data->result, 0, sizeof(host_data->result)); + memset(dev_data->result, 0, sizeof(dev_data->result)); + + // Allocate the kernel argument buffer from the kernarg_pool. + ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0, + reinterpret_cast(&kernArgs))); + + ASSERT_SUCCESS(hsa_amd_agents_allow_access(1, &agent, NULL, kernArgs)); + kernArgs->a = host_data->data; + kernArgs->b = host_data->result; // system memory passed to gpu for write + kernArgs->c = dev_data->result; // gpu memory to verify that gpu read system data + + // Create the executable, get symbol by name and load the code object + set_kernel_file_name("gpuReadWrite_kernels.hsaco"); + set_kernel_name("gpuReadWrite"); + ASSERT_SUCCESS(rocrtst::LoadKernelFromObjFile(this, &agent)); + + ASSERT_SUCCESS(hsa_signal_create(1, 0, NULL, &signal)); + + // create aql packet + hsa_kernel_dispatch_packet_t aql; + memset(&aql, 0, sizeof(aql)); + + // initialize aql packet + aql.workgroup_size_x = 256; + aql.workgroup_size_y = 1; + aql.workgroup_size_z = 1; + aql.grid_size_x = kMemoryAllocSize; + aql.grid_size_y = 1; + aql.grid_size_z = 1; + aql.private_segment_size = 0; + aql.group_segment_size = 0; + aql.kernel_object = kernel_object(); // kernel_code; + aql.kernarg_address = kernArgs; + aql.completion_signal = signal; + + const uint32_t queue_mask = queue->size - 1; + + // write to command queue + uint64_t index = hsa_queue_load_write_index_relaxed(queue); + hsa_queue_store_write_index_relaxed(queue, index + 1); + + rocrtst::WriteAQLToQueueLoc(queue, index, &aql); + + hsa_kernel_dispatch_packet_t* q_base_addr = + reinterpret_cast(queue->base_address); + rocrtst::AtomicSetPacketHeader( + (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | + (1 << HSA_PACKET_HEADER_BARRIER) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE), + (1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS), + reinterpret_cast(&q_base_addr[index & queue_mask])); + + // ringdoor bell + hsa_signal_store_relaxed(queue->doorbell_signal, index); + // wait for the signal and reset it for future use + while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1, + HSA_WAIT_STATE_ACTIVE)) { + } + + // compare device and host side results + if (verbosity() > 0) { + std::cout << " Check GPU has read the host memory" << std::endl; + } + for (int i = 0; i < kMemoryAllocSize; ++i) { + // printf("Verifying data at index[%d]\n", i); + ASSERT_EQ(dev_data->result[i], host_data->dup_data[i]); + } + + if (verbosity() > 0) { + std::cout << " GPU has read the host memory successfully" << std::endl; + std::cout << " Check GPU has written to host memory" << std::endl; + } + for (int i = 0; i < kMemoryAllocSize; ++i) { + ASSERT_EQ(host_data->result[i], i); + } + + if (verbosity() > 0) { + std::cout << " GPU has written to host memory successfully" << std::endl; + } + + if (kernArgs) { + hsa_memory_free(kernArgs); + } + + if (signal.handle) { + hsa_signal_destroy(signal); + } + if (queue) { + hsa_queue_destroy(queue); + } +} + +void SvmMemoryTestBasic::TestCreateDestroy(void) { + hsa_status_t err; + std::vector> agent_pools; + + if (verbosity() > 0) { + PrintMemorySubtestHeader("CreateDestroy Test"); + } + + ASSERT_SUCCESS(rocrtst::GetAgentPools(&agent_pools)); + + auto pool_idx = 0; + for (auto a : agent_pools) { + for (auto p : a->pools) { + TestCreateDestroy(a->agent, p); + } + } + + if (verbosity() > 0) { + std::cout << " Subtest finished" << std::endl; + std::cout << kSubTestSeparator << std::endl; + } +} + +void SvmMemoryTestBasic::SetUp(void) { + hsa_status_t err; + + TestBase::SetUp(); + + ASSERT_SUCCESS(rocrtst::SetDefaultAgents(this)); + ASSERT_SUCCESS(rocrtst::SetPoolsTypical(this)); + + return; +} + +void SvmMemoryTestBasic::Run(void) { + // Compare required profile for this test case with what we're actually + // running on + if (!rocrtst::CheckProfile(this)) { + return; + } + + TestBase::Run(); +} + +void SvmMemoryTestBasic::DisplayTestInfo(void) { TestBase::DisplayTestInfo(); } + +void SvmMemoryTestBasic::DisplayResults(void) const { + // Compare required profile for this test case with what we're actually + // running on + if (!rocrtst::CheckProfile(this)) { + return; + } + + return; +} + +void SvmMemoryTestBasic::Close() { + // This will close handles opened within rocrtst utility calls and call + // hsa_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} diff --git a/projects/rocr-runtime/rocrtst/suites/functional/svm_memory.h b/projects/rocr-runtime/rocrtst/suites/functional/svm_memory.h new file mode 100644 index 0000000000..c88c854c3c --- /dev/null +++ b/projects/rocr-runtime/rocrtst/suites/functional/svm_memory.h @@ -0,0 +1,82 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2025, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef ROCRTST_SUITES_FUNCTIONAL_SVM_MEMORY_H_ +#define ROCRTST_SUITES_FUNCTIONAL_SVM_MEMORY_H_ + +#include + +#include "common/base_rocr.h" +#include "hsa/hsa.h" +#include "suites/test_common/test_base.h" + +class SvmMemoryTestBasic : public TestBase { + public: + SvmMemoryTestBasic(); + + // @Brief: Destructor for test case of SvmMemoryTestBasic + virtual ~SvmMemoryTestBasic(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); + + void TestCreateDestroy(void); + + private: + void TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_pool_t pool); +}; + +#endif // ROCRTST_SUITES_FUNCTIONAL_SVM_MEMORY_H_ diff --git a/projects/rocr-runtime/rocrtst/suites/functional/virtual_memory.cc b/projects/rocr-runtime/rocrtst/suites/functional/virtual_memory.cc index 4200d246d5..802b9c82c1 100644 --- a/projects/rocr-runtime/rocrtst/suites/functional/virtual_memory.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/virtual_memory.cc @@ -101,6 +101,9 @@ VirtMemoryTestBasic::VirtMemoryTestBasic(void) : TestBase() { VirtMemoryTestBasic::~VirtMemoryTestBasic(void) {} void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_pool_t pool) { + hsa_agent_t* agents_accessible; + hsa_amd_pointer_info_t ptrInfo = {}; + uint32_t num_agents_accessible = 0; std::vector gpus; rocrtst::pool_info_t pool_i; hsa_device_type_t ag_type; @@ -116,14 +119,38 @@ void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_po if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed) return; size_t granule_size = pool_i.alloc_granule; + const size_t sizeof_addrRangeUnmapped = 10 * granule_size; + const size_t sizeof_addrRange = 20 * granule_size; ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus)); - ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 20 * granule_size, 0, 0)); - ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRangeUnmapped, 10 * granule_size, 0, 0)); + ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, sizeof_addrRange, 0, 0)); + ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRangeUnmapped, sizeof_addrRangeUnmapped, 0, 0)); + + /* Verify that pointer info for unmapped VA's return expected values */ + ptrInfo.size = sizeof(ptrInfo); + ASSERT_SUCCESS(hsa_amd_pointer_info(addrRangeUnmapped, &ptrInfo, &malloc, &num_agents_accessible, + &agents_accessible)); + ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_RESERVED_ADDR); + ASSERT_EQ(ptrInfo.hostBaseAddress, addrRangeUnmapped); + /* For unmapped VA, then size is equal to size of address reservation */ + ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_addrRangeUnmapped); + ASSERT_EQ(num_agents_accessible, 0); + + /* Verify that pointer info for unmapped VA offset return expected values */ + ptrInfo.size = sizeof(ptrInfo); + ASSERT_SUCCESS(hsa_amd_pointer_info(reinterpret_cast(addrRangeUnmapped) + 10, &ptrInfo, &malloc, + &num_agents_accessible, &agents_accessible)); + ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_RESERVED_ADDR); + ASSERT_EQ(ptrInfo.hostBaseAddress, + addrRangeUnmapped); // hostBaseAddress is address of reservation instead of offset. + /* For unmapped VA, then size is equal to size of address reservation */ + ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_addrRangeUnmapped); + ASSERT_EQ(num_agents_accessible, 0); hsa_amd_vmem_alloc_handle_t mem_handle; + const size_t sizeof_mem_handle = 10 * granule_size; ASSERT_SUCCESS( - hsa_amd_vmem_handle_create(pool, 10 * granule_size, MEMORY_TYPE_NONE, 0, &mem_handle)); + hsa_amd_vmem_handle_create(pool, sizeof_mem_handle, MEMORY_TYPE_NONE, 0, &mem_handle)); /* Test alloc properties returns correct memory type and pool handle */ hsa_amd_memory_pool_t poolRet; @@ -134,7 +161,7 @@ void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_po ASSERT_EQ(memTypeRet, MEMORY_TYPE_NONE); hsa_amd_vmem_alloc_handle_t mem_handleTypePinned; - ASSERT_SUCCESS(hsa_amd_vmem_handle_create(pool, 10 * granule_size, MEMORY_TYPE_PINNED, 0, + ASSERT_SUCCESS(hsa_amd_vmem_handle_create(pool, sizeof_mem_handle, MEMORY_TYPE_PINNED, 0, &mem_handleTypePinned)); ASSERT_SUCCESS( @@ -143,7 +170,15 @@ void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_po ASSERT_EQ(memTypeRet, MEMORY_TYPE_PINNED); - ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, 10 * granule_size, 0, mem_handle, 0)); + ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, sizeof_mem_handle, 0, mem_handle, 0)); + + /* Verity pointer info on mapped addresses */ + ptrInfo.size = sizeof(ptrInfo); + ASSERT_SUCCESS(hsa_amd_pointer_info(addrRange, &ptrInfo, &malloc, &num_agents_accessible, + &agents_accessible)); + ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_HSA_VMEM); + ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_mem_handle); // size matches memory handle + ASSERT_EQ(num_agents_accessible, 0); // Access to each GPU should be None for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) { @@ -160,10 +195,32 @@ void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_po for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) { desc[descIndex++] = {HSA_ACCESS_PERMISSION_RO, *gpuIt}; } - ASSERT_SUCCESS(hsa_amd_vmem_set_access(addrRange, 10 * granule_size, desc, gpus.size())); } + /* Verity pointer info accessible agents on mapped addresses */ + ptrInfo.size = sizeof(ptrInfo); + ASSERT_SUCCESS(hsa_amd_pointer_info(addrRange, &ptrInfo, &malloc, &num_agents_accessible, + &agents_accessible)); + ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_HSA_VMEM); + ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_mem_handle); // size matches memory handle + ASSERT_EQ(num_agents_accessible, gpus.size()); + ASSERT_NE(agents_accessible, nullptr); + + /* Verify agents_accessible is valid */ + for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) { + bool found = false; + for (auto i = 0; i < gpus.size(); i++) { + if (agents_accessible[i].handle == (*gpuIt).handle) { + found = true; + break; + } + } + ASSERT_EQ(found, true); + } + + free(agents_accessible); + for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) { hsa_access_permission_t perm = HSA_ACCESS_PERMISSION_NONE; @@ -195,10 +252,10 @@ void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_po } } - ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, 10 * granule_size)); + ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, sizeof_mem_handle)); ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle)); - ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, 20 * granule_size)); - ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRangeUnmapped, 10 * granule_size)); + ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, sizeof_addrRange)); + ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRangeUnmapped, sizeof_addrRangeUnmapped)); } void VirtMemoryTestBasic::TestCreateDestroy(void) { diff --git a/projects/rocr-runtime/rocrtst/suites/test_common/main.cc b/projects/rocr-runtime/rocrtst/suites/test_common/main.cc index 694ffb4b06..5e43ead140 100644 --- a/projects/rocr-runtime/rocrtst/suites/test_common/main.cc +++ b/projects/rocr-runtime/rocrtst/suites/test_common/main.cc @@ -58,6 +58,7 @@ #include "suites/functional/memory_allocation.h" #include "suites/functional/deallocation_notifier.h" #include "suites/functional/virtual_memory.h" +#include "suites/functional/svm_memory.h" #include "suites/performance/dispatch_time.h" #include "suites/performance/memory_async_copy.h" #include "suites/performance/memory_async_copy_numa.h" @@ -382,6 +383,14 @@ TEST(rocrtstFunc, AgentPropertiesTests) { RunCustomTestEpilog(&propTest); } +TEST(rocrtstFunc, SvmMemory_Basic_Test) { + SvmMemoryTestBasic smt; + + RunCustomTestProlog(&smt); + smt.TestCreateDestroy(); + RunCustomTestEpilog(&smt); +} + TEST(rocrtstFunc, VirtMemory_Basic_Test) { VirtMemoryTestBasic vmt; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h index b9e45a8350..799a1e4d19 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h @@ -906,6 +906,10 @@ class Runtime { }; std::map mapped_handle_map_; // Indexed by VA + AddressHandle* VMemoryFindReservedAddressHandle(const void* va); + hsa_status_t VMemoryPtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t), + uint32_t* num_agents_accessible, hsa_agent_t** accessible); + hsa_status_t VMemoryMapAllowAccess(const void *va, hsa_access_permission_t perm, const hsa_agent_t *agents, diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index e756ec7902..4ab5a877cd 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -924,6 +924,80 @@ hsa_status_t Runtime::InteropUnmap(void* ptr) { return HSA_STATUS_SUCCESS; } +/* This should be called memory_lock_ held */ +Runtime::AddressHandle* Runtime::VMemoryFindReservedAddressHandle(const void* va) { + auto reservedAddressIt = reserved_address_map_.upper_bound(va); + if (reservedAddressIt != reserved_address_map_.begin()) { + reservedAddressIt--; + if ((reservedAddressIt->first <= va) && + ((reinterpret_cast(va)) <= + (reinterpret_cast(reservedAddressIt->first) + + reservedAddressIt->second.size))) { + return &(reservedAddressIt->second); + } + } + return nullptr; +} + +/* This should be called memory_lock_ held */ +hsa_status_t Runtime::VMemoryPtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, + void* (*alloc)(size_t), uint32_t* num_agents_accessible, + hsa_agent_t** accessible) { + /* Check if this memory was allocated via VMM */ + auto mappedHandleIt = mapped_handle_map_.upper_bound(ptr); + if (mappedHandleIt != mapped_handle_map_.begin()) { + mappedHandleIt--; + + if ((reinterpret_cast(mappedHandleIt->first) + mappedHandleIt->second.size) > + ptr) { + /* Allocation found */ + info->type = HSA_EXT_POINTER_TYPE_HSA_VMEM; + info->agentBaseAddress = const_cast(ptr); + info->hostBaseAddress = const_cast(ptr); + info->sizeInBytes = mappedHandleIt->second.size; + info->agentOwner = mappedHandleIt->second.mem_handle->agentOwner()->public_handle(); + + if (alloc && num_agents_accessible && accessible) { + std::vector allowed_agents; + + for (auto agentPermsIt = mappedHandleIt->second.allowed_agents.begin(); + agentPermsIt != mappedHandleIt->second.allowed_agents.end(); agentPermsIt++) { + allowed_agents.push_back((*agentPermsIt).second.targetAgent->public_handle()); + } + + AMD::callback_t Alloc(alloc); + + *accessible = (hsa_agent_t*)Alloc(sizeof(hsa_agent_t) * allowed_agents.size()); + if ((*accessible) == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + + *num_agents_accessible = allowed_agents.size(); + memcpy(*accessible, allowed_agents.data(), sizeof(hsa_agent_t) * allowed_agents.size()); + } + + return HSA_STATUS_SUCCESS; + } + } + + /* This is not a mapped address. Check if it is a reserved address range */ + auto addressHandle = VMemoryFindReservedAddressHandle(ptr); + if (addressHandle) { + info->type = HSA_EXT_POINTER_TYPE_RESERVED_ADDR; + info->agentBaseAddress = NULL; + info->hostBaseAddress = addressHandle->os_addr; + info->sizeInBytes = addressHandle->size; + info->agentOwner = {}; + + if (num_agents_accessible) { + *num_agents_accessible = 0; + } + return HSA_STATUS_SUCCESS; + } + /* Allocation not found */ + info->type = HSA_EXT_POINTER_TYPE_UNKNOWN; + + return HSA_STATUS_SUCCESS; +} + hsa_status_t Runtime::PtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t), uint32_t* num_agents_accessible, hsa_agent_t** accessible, PtrInfoBlockData* block_info) { @@ -955,6 +1029,12 @@ hsa_status_t Runtime::PtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, voi // change with calls to memory APIs. ScopedAcquire lock(&memory_lock_); + if (VMemoryPtrInfo(ptr, &retInfo, alloc, num_agents_accessible, accessible) == + HSA_STATUS_SUCCESS) { + memcpy(info, &retInfo, retInfo.size); + return HSA_STATUS_SUCCESS; + } + // We don't care if this returns an error code. // The type will be HSA_EXT_POINTER_TYPE_UNKNOWN if so. auto err = HSAKMT_CALL(hsaKmtQueryPointerInfo(ptr, &thunkInfo)); @@ -3269,19 +3349,14 @@ hsa_status_t Runtime::VMemoryHandleMap(void* va, size_t size, size_t in_offset, int drm_fd, dmabuf_fd = 0; uint64_t offset = 0, ret; uint64_t drm_cpu_addr = 0; - bool reservedAddressFound = false; ScopedAcquire lock(&memory_lock_); - auto reservedAddressIt = reserved_address_map_.upper_bound(va); - if (reservedAddressIt != reserved_address_map_.begin()) { - reservedAddressIt--; - if ((reservedAddressIt->first <= va) && - ((reinterpret_cast(va) + size) <= - (reinterpret_cast(reservedAddressIt->first) + reservedAddressIt->second.size))) { - reservedAddressFound = true; - } + auto addressHandle = VMemoryFindReservedAddressHandle(va); + if (addressHandle == nullptr || + reinterpret_cast(va) + size > + reinterpret_cast(addressHandle->os_addr) + addressHandle->size) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - if (!reservedAddressFound) return HSA_STATUS_ERROR_INVALID_ARGUMENT; /* Confirm that this VA range has not been mapped yet */ auto upperMappedHandleIt = mapped_handle_map_.upper_bound(va); @@ -3329,12 +3404,11 @@ hsa_status_t Runtime::VMemoryHandleMap(void* va, size_t size, size_t in_offset, mapped_handle_map_.emplace( std::piecewise_construct, std::forward_as_tuple(va), - std::forward_as_tuple(&memoryHandleIt->second, &reservedAddressIt->second, - offset, size, drm_fd, - reinterpret_cast(drm_cpu_addr), - HSA_ACCESS_PERMISSION_NONE, shareable_handle)); + std::forward_as_tuple(&memoryHandleIt->second, addressHandle, offset, size, drm_fd, + reinterpret_cast(drm_cpu_addr), HSA_ACCESS_PERMISSION_NONE, + shareable_handle)); - reservedAddressIt->second.use_count++; + addressHandle->use_count++; memoryHandleIt->second.use_count++; return HSA_STATUS_SUCCESS; @@ -3519,7 +3593,6 @@ hsa_status_t Runtime::VMemorySetAccess(void* va, size_t size, const hsa_amd_memory_access_desc_t* desc, const size_t desc_cnt) { std::list> mappedHandles; - bool reservedAddressFound = false; // Validate all agents for (int i = 0; i < desc_cnt; i++) { @@ -3530,17 +3603,12 @@ hsa_status_t Runtime::VMemorySetAccess(void* va, size_t size, ScopedAcquire lock(&memory_lock_); - auto reservedAddressIt = reserved_address_map_.upper_bound(va); - if (reservedAddressIt != reserved_address_map_.begin()) { - reservedAddressIt--; - if ((reservedAddressIt->first <= va) && - ((reinterpret_cast(va) + size) <= - (reinterpret_cast(reservedAddressIt->first) + - reservedAddressIt->second.size))) { - reservedAddressFound = true; - } + auto addressHandle = VMemoryFindReservedAddressHandle(va); + if (addressHandle == nullptr || + reinterpret_cast(va) + size > + reinterpret_cast(addressHandle->os_addr) + addressHandle->size) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - if (!reservedAddressFound) return HSA_STATUS_ERROR_INVALID_ARGUMENT; // va + size may consist of multiple MappedHandle's. Build a list lf MappedHandles within this VA // range diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h index ff2fc30336..ed44a44781 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -62,9 +62,10 @@ * - 1.9 - hsa_amd_portable_export_dmabuf_v2 * - 1.10 - hsa_amd_vmem_address_reserve: HSA_AMD_VMEM_ADDRESS_NO_REGISTER * - 1.11 - hsa_amd_agent_info_t: HSA_AMD_AGENT_INFO_CLOCK_COUNTERS + * - 1.12 - hsa_amd_pointer_info: HSA_EXT_POINTER_TYPE_HSA_VMEM and HSA_EXT_POINTER_TYPE_RESERVED_ADDR */ #define HSA_AMD_INTERFACE_VERSION_MAJOR 1 -#define HSA_AMD_INTERFACE_VERSION_MINOR 11 +#define HSA_AMD_INTERFACE_VERSION_MINOR 12 #ifdef __cplusplus extern "C" { @@ -2362,7 +2363,11 @@ typedef enum { /* No backend memory but virtual address */ - HSA_EXT_POINTER_TYPE_RESERVED_ADDR = 5 + HSA_EXT_POINTER_TYPE_RESERVED_ADDR = 5, + /* + Memory was allocated with an HSA virtual memory allocator + */ + HSA_EXT_POINTER_TYPE_HSA_VMEM = 6 } hsa_amd_pointer_type_t; /**