Dayatsin/develop vmm pointer info (#305)
* rocr: hsa_amd_pointer_info to support VMEM pointers Extend hsa_amd_pointer_info to support virtual memory addresses. If hsa_amd_pointer_info is called on an address that is reserved but not mapped to memory, then the pointer type will be reported as HSA_EXT_POINTER_TYPE_RESERVED_ADDR. If hsa_amd_pointer_info is called on an address that is mapped, then the pointer type will be reported as HSA_EXT_POINTER_TYPE_HSA_VMEM * rocrtst: VirtMemory_Basic_Test test for pointer info Extend rocrtstFunc.VirtMemory_Basic_Test to test for hsa_amd_pointer_info * rocrtst: Add SVM Memory Test
Αυτή η υποβολή περιλαμβάνεται σε:
@@ -0,0 +1,362 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2025, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <sys/socket.h>
|
||||
|
||||
#include "suites/functional/svm_memory.h"
|
||||
#include "common/base_rocr_utils.h"
|
||||
#include "common/common.h"
|
||||
#include "common/helper_funcs.h"
|
||||
#include "common/hsatimer.h"
|
||||
//#include "common/concurrent_utils.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "hsa/hsa.h"
|
||||
|
||||
// Wrap printf to add first or second process indicator
|
||||
#define PROCESS_LOG(format, ...) \
|
||||
{ \
|
||||
if (verbosity() >= VERBOSE_STANDARD || !parentProcess_) { \
|
||||
fprintf(stdout, "line:%d P%u: " format, __LINE__, static_cast<int>(!parentProcess_), \
|
||||
##__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
|
||||
// Fork safe ASSERT_EQ.
|
||||
#define MSG(y, msg, ...) msg
|
||||
#define Y(y, ...) y
|
||||
|
||||
#define FORK_ASSERT_EQ(x, ...) \
|
||||
if ((x) != (Y(__VA_ARGS__))) { \
|
||||
if ((x) != (Y(__VA_ARGS__))) { \
|
||||
std::cout << MSG(__VA_ARGS__, ""); \
|
||||
if (parentProcess_) { \
|
||||
shared_->parent_status = -1; \
|
||||
} else { \
|
||||
shared_->child_status = -1; \
|
||||
} \
|
||||
ASSERT_EQ(x, Y(__VA_ARGS__)); \
|
||||
} \
|
||||
}
|
||||
|
||||
static const char kSubTestSeparator[] = " **************************";
|
||||
|
||||
static void PrintMemorySubtestHeader(const char* header) {
|
||||
std::cout << " *** Virtual Memory Functional Subtest: " << header << " ***" << std::endl;
|
||||
}
|
||||
|
||||
SvmMemoryTestBasic::SvmMemoryTestBasic(void) : TestBase() {
|
||||
set_title("ROCr SVM Memory Basic Tests");
|
||||
set_description(" Tests SVM memory API functions");
|
||||
}
|
||||
|
||||
SvmMemoryTestBasic::~SvmMemoryTestBasic(void) {}
|
||||
|
||||
// Test to check that GPU can read and write to SVM memory.
|
||||
void SvmMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_pool_t pool) {
|
||||
hsa_agent_t* agents_accessible;
|
||||
hsa_amd_pointer_info_t ptrInfo = {};
|
||||
uint32_t num_agents_accessible = 0;
|
||||
std::vector<hsa_agent_t> gpus;
|
||||
rocrtst::pool_info_t pool_i;
|
||||
hsa_device_type_t ag_type;
|
||||
char ag_name[64];
|
||||
void* addressRange;
|
||||
hsa_status_t err;
|
||||
hsa_agent_t cpu_agent;
|
||||
|
||||
typedef struct __attribute__((aligned(16))) args_t {
|
||||
int* a;
|
||||
int* b;
|
||||
int* c;
|
||||
} args;
|
||||
args* kernArgs = NULL;
|
||||
|
||||
static const int kMemoryAllocSize = 1024;
|
||||
|
||||
ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type));
|
||||
ASSERT_SUCCESS(hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NEAREST_CPU, &cpu_agent));
|
||||
|
||||
ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(pool, &pool_i));
|
||||
|
||||
if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed) return;
|
||||
|
||||
hsa_queue_t* queue = NULL; // command queue
|
||||
hsa_signal_t signal = {0}; // completion signal
|
||||
|
||||
/* Create a queue to enqueue kernel */
|
||||
// get queue size
|
||||
uint32_t queue_size = 0;
|
||||
ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size));
|
||||
|
||||
// create queue
|
||||
ASSERT_SUCCESS(
|
||||
hsa_queue_create(agent, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue));
|
||||
|
||||
// Find a memory pool that supports kernel arguments.
|
||||
hsa_amd_memory_pool_t kernarg_pool;
|
||||
ASSERT_SUCCESS(
|
||||
hsa_amd_agent_iterate_memory_pools(cpu_agent, rocrtst::GetKernArgMemoryPool, &kernarg_pool));
|
||||
|
||||
struct host_data_t {
|
||||
int data[kMemoryAllocSize * 4];
|
||||
int dup_data[kMemoryAllocSize * 4];
|
||||
int result[kMemoryAllocSize * 4];
|
||||
};
|
||||
|
||||
struct dev_data_t {
|
||||
int result[kMemoryAllocSize * 4];
|
||||
};
|
||||
|
||||
struct host_data_t* host_data = NULL;
|
||||
struct dev_data_t* dev_data = NULL;
|
||||
|
||||
/* Set up host_data */
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve((void**)&host_data, sizeof(host_data_t), 0, HSA_AMD_VMEM_ADDRESS_NO_REGISTER));
|
||||
ASSERT_NE(host_data, nullptr);
|
||||
|
||||
/* Verify that pointer info for unmapped VA's return expected values */
|
||||
ptrInfo.size = sizeof(ptrInfo);
|
||||
ASSERT_SUCCESS(hsa_amd_pointer_info(host_data, &ptrInfo, nullptr, nullptr, nullptr));
|
||||
ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_RESERVED_ADDR);
|
||||
ASSERT_EQ(ptrInfo.hostBaseAddress, host_data);
|
||||
/* For unmapped VA, then size is equal to size of address reservation */
|
||||
ASSERT_EQ(ptrInfo.sizeInBytes, sizeof(host_data_t));
|
||||
ASSERT_EQ(num_agents_accessible, 0);
|
||||
|
||||
ptrInfo.size = sizeof(ptrInfo);
|
||||
ASSERT_SUCCESS(hsa_amd_pointer_info(&host_data->result, &ptrInfo, nullptr, nullptr, nullptr));
|
||||
ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_RESERVED_ADDR);
|
||||
ASSERT_EQ(ptrInfo.hostBaseAddress, host_data);
|
||||
/* For unmapped VA, then size is equal to size of address reservation */
|
||||
ASSERT_EQ(ptrInfo.sizeInBytes, sizeof(host_data_t));
|
||||
ASSERT_EQ(num_agents_accessible, 0);
|
||||
if (verbosity() > 0) {
|
||||
std::cout << " Pointer info on reserved address OK" << std::endl;
|
||||
}
|
||||
|
||||
std::vector<hsa_amd_svm_attribute_pair_t> host_attrs;
|
||||
host_attrs.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, cpu_agent.handle});
|
||||
host_attrs.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE, agent.handle});
|
||||
ASSERT_SUCCESS(hsa_amd_svm_attributes_set(host_data, sizeof(host_data_t), host_attrs.data(), host_attrs.size()));
|
||||
|
||||
/* Set up dev_data */
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve((void**)&dev_data, sizeof(dev_data_t), 0, HSA_AMD_VMEM_ADDRESS_NO_REGISTER));
|
||||
ASSERT_NE(dev_data, nullptr);
|
||||
|
||||
std::vector<hsa_amd_svm_attribute_pair_t> dev_attrs;
|
||||
dev_attrs.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, agent.handle});
|
||||
dev_attrs.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE, agent.handle});
|
||||
|
||||
ASSERT_SUCCESS(hsa_amd_svm_attributes_set(dev_data, sizeof(dev_data_t), dev_attrs.data(), dev_attrs.size()));
|
||||
|
||||
// initialize the host buffers
|
||||
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
||||
unsigned int seed = time(NULL);
|
||||
host_data->data[i] = 1 + rand_r(&seed) % 1;
|
||||
host_data->dup_data[i] = host_data->data[i];
|
||||
}
|
||||
|
||||
memset(host_data->result, 0, sizeof(host_data->result));
|
||||
memset(dev_data->result, 0, sizeof(dev_data->result));
|
||||
|
||||
// Allocate the kernel argument buffer from the kernarg_pool.
|
||||
ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
|
||||
reinterpret_cast<void**>(&kernArgs)));
|
||||
|
||||
ASSERT_SUCCESS(hsa_amd_agents_allow_access(1, &agent, NULL, kernArgs));
|
||||
kernArgs->a = host_data->data;
|
||||
kernArgs->b = host_data->result; // system memory passed to gpu for write
|
||||
kernArgs->c = dev_data->result; // gpu memory to verify that gpu read system data
|
||||
|
||||
// Create the executable, get symbol by name and load the code object
|
||||
set_kernel_file_name("gpuReadWrite_kernels.hsaco");
|
||||
set_kernel_name("gpuReadWrite");
|
||||
ASSERT_SUCCESS(rocrtst::LoadKernelFromObjFile(this, &agent));
|
||||
|
||||
ASSERT_SUCCESS(hsa_signal_create(1, 0, NULL, &signal));
|
||||
|
||||
// create aql packet
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
memset(&aql, 0, sizeof(aql));
|
||||
|
||||
// initialize aql packet
|
||||
aql.workgroup_size_x = 256;
|
||||
aql.workgroup_size_y = 1;
|
||||
aql.workgroup_size_z = 1;
|
||||
aql.grid_size_x = kMemoryAllocSize;
|
||||
aql.grid_size_y = 1;
|
||||
aql.grid_size_z = 1;
|
||||
aql.private_segment_size = 0;
|
||||
aql.group_segment_size = 0;
|
||||
aql.kernel_object = kernel_object(); // kernel_code;
|
||||
aql.kernarg_address = kernArgs;
|
||||
aql.completion_signal = signal;
|
||||
|
||||
const uint32_t queue_mask = queue->size - 1;
|
||||
|
||||
// write to command queue
|
||||
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
|
||||
hsa_queue_store_write_index_relaxed(queue, index + 1);
|
||||
|
||||
rocrtst::WriteAQLToQueueLoc(queue, index, &aql);
|
||||
|
||||
hsa_kernel_dispatch_packet_t* q_base_addr =
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue->base_address);
|
||||
rocrtst::AtomicSetPacketHeader(
|
||||
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE),
|
||||
(1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS),
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(&q_base_addr[index & queue_mask]));
|
||||
|
||||
// ringdoor bell
|
||||
hsa_signal_store_relaxed(queue->doorbell_signal, index);
|
||||
// wait for the signal and reset it for future use
|
||||
while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
|
||||
HSA_WAIT_STATE_ACTIVE)) {
|
||||
}
|
||||
|
||||
// compare device and host side results
|
||||
if (verbosity() > 0) {
|
||||
std::cout << " Check GPU has read the host memory" << std::endl;
|
||||
}
|
||||
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
||||
// printf("Verifying data at index[%d]\n", i);
|
||||
ASSERT_EQ(dev_data->result[i], host_data->dup_data[i]);
|
||||
}
|
||||
|
||||
if (verbosity() > 0) {
|
||||
std::cout << " GPU has read the host memory successfully" << std::endl;
|
||||
std::cout << " Check GPU has written to host memory" << std::endl;
|
||||
}
|
||||
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
||||
ASSERT_EQ(host_data->result[i], i);
|
||||
}
|
||||
|
||||
if (verbosity() > 0) {
|
||||
std::cout << " GPU has written to host memory successfully" << std::endl;
|
||||
}
|
||||
|
||||
if (kernArgs) {
|
||||
hsa_memory_free(kernArgs);
|
||||
}
|
||||
|
||||
if (signal.handle) {
|
||||
hsa_signal_destroy(signal);
|
||||
}
|
||||
if (queue) {
|
||||
hsa_queue_destroy(queue);
|
||||
}
|
||||
}
|
||||
|
||||
void SvmMemoryTestBasic::TestCreateDestroy(void) {
|
||||
hsa_status_t err;
|
||||
std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
|
||||
|
||||
if (verbosity() > 0) {
|
||||
PrintMemorySubtestHeader("CreateDestroy Test");
|
||||
}
|
||||
|
||||
ASSERT_SUCCESS(rocrtst::GetAgentPools(&agent_pools));
|
||||
|
||||
auto pool_idx = 0;
|
||||
for (auto a : agent_pools) {
|
||||
for (auto p : a->pools) {
|
||||
TestCreateDestroy(a->agent, p);
|
||||
}
|
||||
}
|
||||
|
||||
if (verbosity() > 0) {
|
||||
std::cout << " Subtest finished" << std::endl;
|
||||
std::cout << kSubTestSeparator << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void SvmMemoryTestBasic::SetUp(void) {
|
||||
hsa_status_t err;
|
||||
|
||||
TestBase::SetUp();
|
||||
|
||||
ASSERT_SUCCESS(rocrtst::SetDefaultAgents(this));
|
||||
ASSERT_SUCCESS(rocrtst::SetPoolsTypical(this));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void SvmMemoryTestBasic::Run(void) {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!rocrtst::CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TestBase::Run();
|
||||
}
|
||||
|
||||
void SvmMemoryTestBasic::DisplayTestInfo(void) { TestBase::DisplayTestInfo(); }
|
||||
|
||||
void SvmMemoryTestBasic::DisplayResults(void) const {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!rocrtst::CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void SvmMemoryTestBasic::Close() {
|
||||
// This will close handles opened within rocrtst utility calls and call
|
||||
// hsa_shut_down(), so it should be done after other hsa cleanup
|
||||
TestBase::Close();
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2025, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef ROCRTST_SUITES_FUNCTIONAL_SVM_MEMORY_H_
|
||||
#define ROCRTST_SUITES_FUNCTIONAL_SVM_MEMORY_H_
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "common/base_rocr.h"
|
||||
#include "hsa/hsa.h"
|
||||
#include "suites/test_common/test_base.h"
|
||||
|
||||
class SvmMemoryTestBasic : public TestBase {
|
||||
public:
|
||||
SvmMemoryTestBasic();
|
||||
|
||||
// @Brief: Destructor for test case of SvmMemoryTestBasic
|
||||
virtual ~SvmMemoryTestBasic();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual void SetUp();
|
||||
|
||||
// @Brief: Core measurement execution
|
||||
virtual void Run();
|
||||
|
||||
// @Brief: Clean up and retrive the resource
|
||||
virtual void Close();
|
||||
|
||||
// @Brief: Display results
|
||||
virtual void DisplayResults() const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
|
||||
void TestCreateDestroy(void);
|
||||
|
||||
private:
|
||||
void TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_pool_t pool);
|
||||
};
|
||||
|
||||
#endif // ROCRTST_SUITES_FUNCTIONAL_SVM_MEMORY_H_
|
||||
@@ -101,6 +101,9 @@ VirtMemoryTestBasic::VirtMemoryTestBasic(void) : TestBase() {
|
||||
VirtMemoryTestBasic::~VirtMemoryTestBasic(void) {}
|
||||
|
||||
void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_pool_t pool) {
|
||||
hsa_agent_t* agents_accessible;
|
||||
hsa_amd_pointer_info_t ptrInfo = {};
|
||||
uint32_t num_agents_accessible = 0;
|
||||
std::vector<hsa_agent_t> gpus;
|
||||
rocrtst::pool_info_t pool_i;
|
||||
hsa_device_type_t ag_type;
|
||||
@@ -116,14 +119,38 @@ void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_po
|
||||
if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed) return;
|
||||
|
||||
size_t granule_size = pool_i.alloc_granule;
|
||||
const size_t sizeof_addrRangeUnmapped = 10 * granule_size;
|
||||
const size_t sizeof_addrRange = 20 * granule_size;
|
||||
|
||||
ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 20 * granule_size, 0, 0));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRangeUnmapped, 10 * granule_size, 0, 0));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, sizeof_addrRange, 0, 0));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRangeUnmapped, sizeof_addrRangeUnmapped, 0, 0));
|
||||
|
||||
/* Verify that pointer info for unmapped VA's return expected values */
|
||||
ptrInfo.size = sizeof(ptrInfo);
|
||||
ASSERT_SUCCESS(hsa_amd_pointer_info(addrRangeUnmapped, &ptrInfo, &malloc, &num_agents_accessible,
|
||||
&agents_accessible));
|
||||
ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_RESERVED_ADDR);
|
||||
ASSERT_EQ(ptrInfo.hostBaseAddress, addrRangeUnmapped);
|
||||
/* For unmapped VA, then size is equal to size of address reservation */
|
||||
ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_addrRangeUnmapped);
|
||||
ASSERT_EQ(num_agents_accessible, 0);
|
||||
|
||||
/* Verify that pointer info for unmapped VA offset return expected values */
|
||||
ptrInfo.size = sizeof(ptrInfo);
|
||||
ASSERT_SUCCESS(hsa_amd_pointer_info(reinterpret_cast<uint8_t*>(addrRangeUnmapped) + 10, &ptrInfo, &malloc,
|
||||
&num_agents_accessible, &agents_accessible));
|
||||
ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_RESERVED_ADDR);
|
||||
ASSERT_EQ(ptrInfo.hostBaseAddress,
|
||||
addrRangeUnmapped); // hostBaseAddress is address of reservation instead of offset.
|
||||
/* For unmapped VA, then size is equal to size of address reservation */
|
||||
ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_addrRangeUnmapped);
|
||||
ASSERT_EQ(num_agents_accessible, 0);
|
||||
|
||||
hsa_amd_vmem_alloc_handle_t mem_handle;
|
||||
const size_t sizeof_mem_handle = 10 * granule_size;
|
||||
ASSERT_SUCCESS(
|
||||
hsa_amd_vmem_handle_create(pool, 10 * granule_size, MEMORY_TYPE_NONE, 0, &mem_handle));
|
||||
hsa_amd_vmem_handle_create(pool, sizeof_mem_handle, MEMORY_TYPE_NONE, 0, &mem_handle));
|
||||
|
||||
/* Test alloc properties returns correct memory type and pool handle */
|
||||
hsa_amd_memory_pool_t poolRet;
|
||||
@@ -134,7 +161,7 @@ void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_po
|
||||
ASSERT_EQ(memTypeRet, MEMORY_TYPE_NONE);
|
||||
|
||||
hsa_amd_vmem_alloc_handle_t mem_handleTypePinned;
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_handle_create(pool, 10 * granule_size, MEMORY_TYPE_PINNED, 0,
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_handle_create(pool, sizeof_mem_handle, MEMORY_TYPE_PINNED, 0,
|
||||
&mem_handleTypePinned));
|
||||
|
||||
ASSERT_SUCCESS(
|
||||
@@ -143,7 +170,15 @@ void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_po
|
||||
ASSERT_EQ(memTypeRet, MEMORY_TYPE_PINNED);
|
||||
|
||||
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, 10 * granule_size, 0, mem_handle, 0));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, sizeof_mem_handle, 0, mem_handle, 0));
|
||||
|
||||
/* Verity pointer info on mapped addresses */
|
||||
ptrInfo.size = sizeof(ptrInfo);
|
||||
ASSERT_SUCCESS(hsa_amd_pointer_info(addrRange, &ptrInfo, &malloc, &num_agents_accessible,
|
||||
&agents_accessible));
|
||||
ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_HSA_VMEM);
|
||||
ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_mem_handle); // size matches memory handle
|
||||
ASSERT_EQ(num_agents_accessible, 0);
|
||||
|
||||
// Access to each GPU should be None
|
||||
for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
|
||||
@@ -160,10 +195,32 @@ void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_po
|
||||
for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
|
||||
desc[descIndex++] = {HSA_ACCESS_PERMISSION_RO, *gpuIt};
|
||||
}
|
||||
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_set_access(addrRange, 10 * granule_size, desc, gpus.size()));
|
||||
}
|
||||
|
||||
/* Verity pointer info accessible agents on mapped addresses */
|
||||
ptrInfo.size = sizeof(ptrInfo);
|
||||
ASSERT_SUCCESS(hsa_amd_pointer_info(addrRange, &ptrInfo, &malloc, &num_agents_accessible,
|
||||
&agents_accessible));
|
||||
ASSERT_EQ(ptrInfo.type, HSA_EXT_POINTER_TYPE_HSA_VMEM);
|
||||
ASSERT_EQ(ptrInfo.sizeInBytes, sizeof_mem_handle); // size matches memory handle
|
||||
ASSERT_EQ(num_agents_accessible, gpus.size());
|
||||
ASSERT_NE(agents_accessible, nullptr);
|
||||
|
||||
/* Verify agents_accessible is valid */
|
||||
for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
|
||||
bool found = false;
|
||||
for (auto i = 0; i < gpus.size(); i++) {
|
||||
if (agents_accessible[i].handle == (*gpuIt).handle) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(found, true);
|
||||
}
|
||||
|
||||
free(agents_accessible);
|
||||
|
||||
for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
|
||||
hsa_access_permission_t perm = HSA_ACCESS_PERMISSION_NONE;
|
||||
|
||||
@@ -195,10 +252,10 @@ void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_po
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, 10 * granule_size));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, sizeof_mem_handle));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, 20 * granule_size));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRangeUnmapped, 10 * granule_size));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, sizeof_addrRange));
|
||||
ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRangeUnmapped, sizeof_addrRangeUnmapped));
|
||||
}
|
||||
|
||||
void VirtMemoryTestBasic::TestCreateDestroy(void) {
|
||||
|
||||
@@ -58,6 +58,7 @@
|
||||
#include "suites/functional/memory_allocation.h"
|
||||
#include "suites/functional/deallocation_notifier.h"
|
||||
#include "suites/functional/virtual_memory.h"
|
||||
#include "suites/functional/svm_memory.h"
|
||||
#include "suites/performance/dispatch_time.h"
|
||||
#include "suites/performance/memory_async_copy.h"
|
||||
#include "suites/performance/memory_async_copy_numa.h"
|
||||
@@ -382,6 +383,14 @@ TEST(rocrtstFunc, AgentPropertiesTests) {
|
||||
RunCustomTestEpilog(&propTest);
|
||||
}
|
||||
|
||||
TEST(rocrtstFunc, SvmMemory_Basic_Test) {
|
||||
SvmMemoryTestBasic smt;
|
||||
|
||||
RunCustomTestProlog(&smt);
|
||||
smt.TestCreateDestroy();
|
||||
RunCustomTestEpilog(&smt);
|
||||
}
|
||||
|
||||
TEST(rocrtstFunc, VirtMemory_Basic_Test) {
|
||||
VirtMemoryTestBasic vmt;
|
||||
|
||||
|
||||
@@ -906,6 +906,10 @@ class Runtime {
|
||||
};
|
||||
std::map<const void*, MappedHandle> mapped_handle_map_; // Indexed by VA
|
||||
|
||||
AddressHandle* VMemoryFindReservedAddressHandle(const void* va);
|
||||
hsa_status_t VMemoryPtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
|
||||
uint32_t* num_agents_accessible, hsa_agent_t** accessible);
|
||||
|
||||
hsa_status_t VMemoryMapAllowAccess(const void *va,
|
||||
hsa_access_permission_t perm,
|
||||
const hsa_agent_t *agents,
|
||||
|
||||
@@ -924,6 +924,80 @@ hsa_status_t Runtime::InteropUnmap(void* ptr) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/* This should be called memory_lock_ held */
|
||||
Runtime::AddressHandle* Runtime::VMemoryFindReservedAddressHandle(const void* va) {
|
||||
auto reservedAddressIt = reserved_address_map_.upper_bound(va);
|
||||
if (reservedAddressIt != reserved_address_map_.begin()) {
|
||||
reservedAddressIt--;
|
||||
if ((reservedAddressIt->first <= va) &&
|
||||
((reinterpret_cast<const uint8_t*>(va)) <=
|
||||
(reinterpret_cast<const uint8_t*>(reservedAddressIt->first) +
|
||||
reservedAddressIt->second.size))) {
|
||||
return &(reservedAddressIt->second);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* This should be called memory_lock_ held */
|
||||
hsa_status_t Runtime::VMemoryPtrInfo(const void* ptr, hsa_amd_pointer_info_t* info,
|
||||
void* (*alloc)(size_t), uint32_t* num_agents_accessible,
|
||||
hsa_agent_t** accessible) {
|
||||
/* Check if this memory was allocated via VMM */
|
||||
auto mappedHandleIt = mapped_handle_map_.upper_bound(ptr);
|
||||
if (mappedHandleIt != mapped_handle_map_.begin()) {
|
||||
mappedHandleIt--;
|
||||
|
||||
if ((reinterpret_cast<const uint8_t*>(mappedHandleIt->first) + mappedHandleIt->second.size) >
|
||||
ptr) {
|
||||
/* Allocation found */
|
||||
info->type = HSA_EXT_POINTER_TYPE_HSA_VMEM;
|
||||
info->agentBaseAddress = const_cast<void*>(ptr);
|
||||
info->hostBaseAddress = const_cast<void*>(ptr);
|
||||
info->sizeInBytes = mappedHandleIt->second.size;
|
||||
info->agentOwner = mappedHandleIt->second.mem_handle->agentOwner()->public_handle();
|
||||
|
||||
if (alloc && num_agents_accessible && accessible) {
|
||||
std::vector<hsa_agent_t> allowed_agents;
|
||||
|
||||
for (auto agentPermsIt = mappedHandleIt->second.allowed_agents.begin();
|
||||
agentPermsIt != mappedHandleIt->second.allowed_agents.end(); agentPermsIt++) {
|
||||
allowed_agents.push_back((*agentPermsIt).second.targetAgent->public_handle());
|
||||
}
|
||||
|
||||
AMD::callback_t<decltype(alloc)> Alloc(alloc);
|
||||
|
||||
*accessible = (hsa_agent_t*)Alloc(sizeof(hsa_agent_t) * allowed_agents.size());
|
||||
if ((*accessible) == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
|
||||
*num_agents_accessible = allowed_agents.size();
|
||||
memcpy(*accessible, allowed_agents.data(), sizeof(hsa_agent_t) * allowed_agents.size());
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* This is not a mapped address. Check if it is a reserved address range */
|
||||
auto addressHandle = VMemoryFindReservedAddressHandle(ptr);
|
||||
if (addressHandle) {
|
||||
info->type = HSA_EXT_POINTER_TYPE_RESERVED_ADDR;
|
||||
info->agentBaseAddress = NULL;
|
||||
info->hostBaseAddress = addressHandle->os_addr;
|
||||
info->sizeInBytes = addressHandle->size;
|
||||
info->agentOwner = {};
|
||||
|
||||
if (num_agents_accessible) {
|
||||
*num_agents_accessible = 0;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
/* Allocation not found */
|
||||
info->type = HSA_EXT_POINTER_TYPE_UNKNOWN;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::PtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
|
||||
uint32_t* num_agents_accessible, hsa_agent_t** accessible,
|
||||
PtrInfoBlockData* block_info) {
|
||||
@@ -955,6 +1029,12 @@ hsa_status_t Runtime::PtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, voi
|
||||
// change with calls to memory APIs.
|
||||
ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
|
||||
|
||||
if (VMemoryPtrInfo(ptr, &retInfo, alloc, num_agents_accessible, accessible) ==
|
||||
HSA_STATUS_SUCCESS) {
|
||||
memcpy(info, &retInfo, retInfo.size);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// We don't care if this returns an error code.
|
||||
// The type will be HSA_EXT_POINTER_TYPE_UNKNOWN if so.
|
||||
auto err = HSAKMT_CALL(hsaKmtQueryPointerInfo(ptr, &thunkInfo));
|
||||
@@ -3269,19 +3349,14 @@ hsa_status_t Runtime::VMemoryHandleMap(void* va, size_t size, size_t in_offset,
|
||||
int drm_fd, dmabuf_fd = 0;
|
||||
uint64_t offset = 0, ret;
|
||||
uint64_t drm_cpu_addr = 0;
|
||||
bool reservedAddressFound = false;
|
||||
|
||||
ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
|
||||
auto reservedAddressIt = reserved_address_map_.upper_bound(va);
|
||||
if (reservedAddressIt != reserved_address_map_.begin()) {
|
||||
reservedAddressIt--;
|
||||
if ((reservedAddressIt->first <= va) &&
|
||||
((reinterpret_cast<uint8_t*>(va) + size) <=
|
||||
(reinterpret_cast<const uint8_t*>(reservedAddressIt->first) + reservedAddressIt->second.size))) {
|
||||
reservedAddressFound = true;
|
||||
}
|
||||
auto addressHandle = VMemoryFindReservedAddressHandle(va);
|
||||
if (addressHandle == nullptr ||
|
||||
reinterpret_cast<uint8_t*>(va) + size >
|
||||
reinterpret_cast<uint8_t*>(addressHandle->os_addr) + addressHandle->size) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
if (!reservedAddressFound) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
/* Confirm that this VA range has not been mapped yet */
|
||||
auto upperMappedHandleIt = mapped_handle_map_.upper_bound(va);
|
||||
@@ -3329,12 +3404,11 @@ hsa_status_t Runtime::VMemoryHandleMap(void* va, size_t size, size_t in_offset,
|
||||
|
||||
mapped_handle_map_.emplace(
|
||||
std::piecewise_construct, std::forward_as_tuple(va),
|
||||
std::forward_as_tuple(&memoryHandleIt->second, &reservedAddressIt->second,
|
||||
offset, size, drm_fd,
|
||||
reinterpret_cast<void *>(drm_cpu_addr),
|
||||
HSA_ACCESS_PERMISSION_NONE, shareable_handle));
|
||||
std::forward_as_tuple(&memoryHandleIt->second, addressHandle, offset, size, drm_fd,
|
||||
reinterpret_cast<void*>(drm_cpu_addr), HSA_ACCESS_PERMISSION_NONE,
|
||||
shareable_handle));
|
||||
|
||||
reservedAddressIt->second.use_count++;
|
||||
addressHandle->use_count++;
|
||||
memoryHandleIt->second.use_count++;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
@@ -3519,7 +3593,6 @@ hsa_status_t Runtime::VMemorySetAccess(void* va, size_t size,
|
||||
const hsa_amd_memory_access_desc_t* desc,
|
||||
const size_t desc_cnt) {
|
||||
std::list<std::pair<void*, MappedHandle*>> mappedHandles;
|
||||
bool reservedAddressFound = false;
|
||||
|
||||
// Validate all agents
|
||||
for (int i = 0; i < desc_cnt; i++) {
|
||||
@@ -3530,17 +3603,12 @@ hsa_status_t Runtime::VMemorySetAccess(void* va, size_t size,
|
||||
|
||||
ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
|
||||
|
||||
auto reservedAddressIt = reserved_address_map_.upper_bound(va);
|
||||
if (reservedAddressIt != reserved_address_map_.begin()) {
|
||||
reservedAddressIt--;
|
||||
if ((reservedAddressIt->first <= va) &&
|
||||
((reinterpret_cast<uint8_t*>(va) + size) <=
|
||||
(reinterpret_cast<const uint8_t*>(reservedAddressIt->first) +
|
||||
reservedAddressIt->second.size))) {
|
||||
reservedAddressFound = true;
|
||||
}
|
||||
auto addressHandle = VMemoryFindReservedAddressHandle(va);
|
||||
if (addressHandle == nullptr ||
|
||||
reinterpret_cast<uint8_t*>(va) + size >
|
||||
reinterpret_cast<uint8_t*>(addressHandle->os_addr) + addressHandle->size) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
if (!reservedAddressFound) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
// va + size may consist of multiple MappedHandle's. Build a list lf MappedHandles within this VA
|
||||
// range
|
||||
|
||||
@@ -62,9 +62,10 @@
|
||||
* - 1.9 - hsa_amd_portable_export_dmabuf_v2
|
||||
* - 1.10 - hsa_amd_vmem_address_reserve: HSA_AMD_VMEM_ADDRESS_NO_REGISTER
|
||||
* - 1.11 - hsa_amd_agent_info_t: HSA_AMD_AGENT_INFO_CLOCK_COUNTERS
|
||||
* - 1.12 - hsa_amd_pointer_info: HSA_EXT_POINTER_TYPE_HSA_VMEM and HSA_EXT_POINTER_TYPE_RESERVED_ADDR
|
||||
*/
|
||||
#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
|
||||
#define HSA_AMD_INTERFACE_VERSION_MINOR 11
|
||||
#define HSA_AMD_INTERFACE_VERSION_MINOR 12
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -2362,7 +2363,11 @@ typedef enum {
|
||||
/*
|
||||
No backend memory but virtual address
|
||||
*/
|
||||
HSA_EXT_POINTER_TYPE_RESERVED_ADDR = 5
|
||||
HSA_EXT_POINTER_TYPE_RESERVED_ADDR = 5,
|
||||
/*
|
||||
Memory was allocated with an HSA virtual memory allocator
|
||||
*/
|
||||
HSA_EXT_POINTER_TYPE_HSA_VMEM = 6
|
||||
} hsa_amd_pointer_type_t;
|
||||
|
||||
/**
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user