2018-07-23 14:45:44 -04:00
|
|
|
/*
|
|
|
|
|
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
|
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
|
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "KFDMemoryTest.hpp"
|
|
|
|
|
#include <sys/prctl.h>
|
|
|
|
|
#include <sys/ptrace.h>
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <unistd.h>
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
#include <signal.h>
|
2019-06-13 15:22:52 -04:00
|
|
|
#include <numa.h>
|
2018-07-23 14:45:44 -04:00
|
|
|
#include <vector>
|
|
|
|
|
#include "Dispatch.hpp"
|
|
|
|
|
#include "PM4Queue.hpp"
|
|
|
|
|
#include "PM4Packet.hpp"
|
|
|
|
|
#include "SDMAQueue.hpp"
|
|
|
|
|
#include "SDMAPacket.hpp"
|
2019-04-30 15:32:01 -05:00
|
|
|
#include "linux/kfd_ioctl.h"
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
const char* gfx8_ScratchCopyDword =
|
|
|
|
|
"\
|
|
|
|
|
shader ScratchCopyDword\n\
|
|
|
|
|
asic(VI)\n\
|
|
|
|
|
type(CS)\n\
|
|
|
|
|
/*copy the parameters from scalar registers to vector registers*/\n\
|
|
|
|
|
v_mov_b32 v0, s0\n\
|
|
|
|
|
v_mov_b32 v1, s1\n\
|
|
|
|
|
v_mov_b32 v2, s2\n\
|
|
|
|
|
v_mov_b32 v3, s3\n\
|
|
|
|
|
/*set up the scratch parameters. This assumes a single 16-reg block.*/\n\
|
|
|
|
|
s_mov_b32 flat_scratch_lo, 8/*2 dwords of scratch per thread*/\n\
|
|
|
|
|
s_mov_b32 flat_scratch_hi, 0/*offset in units of 256bytes*/\n\
|
|
|
|
|
/*copy a dword between the passed addresses*/\n\
|
|
|
|
|
flat_load_dword v4, v[0:1] slc\n\
|
|
|
|
|
s_waitcnt vmcnt(0)&lgkmcnt(0)\n\
|
|
|
|
|
flat_store_dword v[2:3], v4 slc\n\
|
|
|
|
|
\n\
|
|
|
|
|
s_endpgm\n\
|
|
|
|
|
\n\
|
|
|
|
|
end\n\
|
|
|
|
|
";
|
|
|
|
|
|
|
|
|
|
const char* gfx9_ScratchCopyDword =
|
|
|
|
|
"\
|
|
|
|
|
shader ScratchCopyDword\n\
|
|
|
|
|
asic(GFX9)\n\
|
|
|
|
|
type(CS)\n\
|
|
|
|
|
/*copy the parameters from scalar registers to vector registers*/\n\
|
|
|
|
|
v_mov_b32 v0, s0\n\
|
|
|
|
|
v_mov_b32 v1, s1\n\
|
|
|
|
|
v_mov_b32 v2, s2\n\
|
|
|
|
|
v_mov_b32 v3, s3\n\
|
|
|
|
|
/*set up the scratch parameters. This assumes a single 16-reg block.*/\n\
|
|
|
|
|
s_mov_b32 flat_scratch_lo, s4\n\
|
|
|
|
|
s_mov_b32 flat_scratch_hi, s5\n\
|
|
|
|
|
/*copy a dword between the passed addresses*/\n\
|
|
|
|
|
flat_load_dword v4, v[0:1] slc\n\
|
|
|
|
|
s_waitcnt vmcnt(0)&lgkmcnt(0)\n\
|
|
|
|
|
flat_store_dword v[2:3], v4 slc\n\
|
|
|
|
|
\n\
|
|
|
|
|
s_endpgm\n\
|
|
|
|
|
\n\
|
|
|
|
|
end\n\
|
|
|
|
|
";
|
|
|
|
|
|
|
|
|
|
/* Continuously poll src buffer and check buffer value
|
|
|
|
|
* After src buffer is filled with specific value (0x5678,
|
|
|
|
|
* by host program), fill dst buffer with specific
|
|
|
|
|
* value(0x5678) and quit
|
|
|
|
|
*/
|
|
|
|
|
const char* gfx9_PollMemory =
|
|
|
|
|
"\
|
|
|
|
|
shader ReadMemory\n\
|
|
|
|
|
asic(GFX9)\n\
|
|
|
|
|
type(CS)\n\
|
|
|
|
|
/* Assume src address in s0, s1 and dst address in s2, s3*/\n\
|
2018-08-13 09:03:31 -04:00
|
|
|
s_movk_i32 s18, 0x5678\n\
|
|
|
|
|
LOOP:\n\
|
|
|
|
|
s_load_dword s16, s[0:1], 0x0 glc\n\
|
|
|
|
|
s_cmp_eq_i32 s16, s18\n\
|
|
|
|
|
s_cbranch_scc0 LOOP\n\
|
|
|
|
|
s_store_dword s18, s[2:3], 0x0 glc\n\
|
|
|
|
|
s_endpgm\n\
|
|
|
|
|
end\n\
|
2018-07-23 14:45:44 -04:00
|
|
|
";
|
|
|
|
|
|
2019-04-30 15:32:01 -05:00
|
|
|
/* Input: A buffer of at least 3 dwords.
|
2019-05-30 16:09:06 -05:00
|
|
|
* DW0: used as a signal. 0xcafe means it is signaled
|
|
|
|
|
* DW1: Input buffer for device to read.
|
2019-04-30 15:32:01 -05:00
|
|
|
* DW2: Output buffer for device to write.
|
2019-05-30 16:09:06 -05:00
|
|
|
* Once receive signal, device will copy DW1 to DW2
|
2019-04-30 15:32:01 -05:00
|
|
|
* This shader continously poll the signal buffer,
|
|
|
|
|
* Once signal buffer is signaled, it copies input buffer
|
|
|
|
|
* to output buffer
|
|
|
|
|
*/
|
|
|
|
|
const char* gfx9_CopyOnSignal =
|
|
|
|
|
"\
|
|
|
|
|
shader CopyOnSignal\n\
|
|
|
|
|
asic(GFX9)\n\
|
|
|
|
|
type(CS)\n\
|
|
|
|
|
/* Assume input buffer in s0, s1 */\n\
|
2019-05-27 14:57:57 -05:00
|
|
|
s_mov_b32 s18, 0xcafe\n\
|
|
|
|
|
POLLSIGNAL:\n\
|
2019-04-30 15:32:01 -05:00
|
|
|
s_load_dword s16, s[0:1], 0x0 glc\n\
|
|
|
|
|
s_cmp_eq_i32 s16, s18\n\
|
|
|
|
|
s_cbranch_scc0 POLLSIGNAL\n\
|
|
|
|
|
s_load_dword s17, s[0:1], 0x4 glc\n\
|
2019-05-27 14:57:57 -05:00
|
|
|
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
2019-04-30 15:32:01 -05:00
|
|
|
s_store_dword s17, s[0:1], 0x8 glc\n\
|
2019-05-27 14:57:57 -05:00
|
|
|
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
2019-04-30 15:32:01 -05:00
|
|
|
s_endpgm\n\
|
|
|
|
|
end\n\
|
|
|
|
|
";
|
|
|
|
|
|
2019-05-30 16:09:06 -05:00
|
|
|
/* Input0: A buffer of at least 2 dwords.
|
|
|
|
|
* DW0: used as a signal. Write 0xcafe to signal
|
|
|
|
|
* DW1: Write to this buffer for other device to read.
|
|
|
|
|
* Input1: mmio base address
|
|
|
|
|
*/
|
|
|
|
|
const char* gfx9_WriteAndSignal =
|
|
|
|
|
"\
|
|
|
|
|
shader WriteAndSignal\n\
|
|
|
|
|
asic(GFX9)\n\
|
|
|
|
|
type(CS)\n\
|
|
|
|
|
/* Assume input buffer in s0, s1 */\n\
|
|
|
|
|
s_mov_b32 s18, 0xbeef\n\
|
|
|
|
|
s_store_dword s18, s[0:1], 0x4 glc\n\
|
|
|
|
|
s_mov_b32 s18, 0x1\n\
|
|
|
|
|
s_store_dword s18, s[2:3], 0 glc\n\
|
|
|
|
|
s_mov_b32 s18, 0xcafe\n\
|
|
|
|
|
s_store_dword s18, s[0:1], 0x0 glc\n\
|
|
|
|
|
s_endpgm\n\
|
|
|
|
|
end\n\
|
|
|
|
|
";
|
|
|
|
|
|
2018-07-23 14:45:44 -04:00
|
|
|
void KFDMemoryTest::SetUp() {
|
|
|
|
|
ROUTINE_START
|
|
|
|
|
|
|
|
|
|
KFDBaseComponentTest::SetUp();
|
|
|
|
|
|
|
|
|
|
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
|
|
|
|
|
|
|
|
|
ROUTINE_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void KFDMemoryTest::TearDown() {
|
|
|
|
|
ROUTINE_START
|
|
|
|
|
|
|
|
|
|
if (m_pIsaGen)
|
|
|
|
|
delete m_pIsaGen;
|
|
|
|
|
m_pIsaGen = NULL;
|
|
|
|
|
|
|
|
|
|
KFDBaseComponentTest::TearDown();
|
|
|
|
|
|
|
|
|
|
ROUTINE_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#include <sys/mman.h>
|
2018-08-13 09:03:31 -04:00
|
|
|
#define GB(x) ((x) << 30)
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
/*
|
2018-08-14 09:52:31 -04:00
|
|
|
* Try to map as much as possible system memory to gpu
|
|
|
|
|
* to see if KFD supports 1TB memory correctly or not.
|
|
|
|
|
* After this test case, we can observe if there are any side effects.
|
|
|
|
|
* NOTICE: There are memory usage limit checks in hsa/kfd according to the total
|
2018-07-23 14:45:44 -04:00
|
|
|
* physical system memory.
|
|
|
|
|
*/
|
|
|
|
|
TEST_F(KFDMemoryTest, MMapLarge) {
|
|
|
|
|
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
if (!is_dgpu()) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: Test not supported on APU." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
2018-08-13 09:03:31 -04:00
|
|
|
const HSAuint64 nObjects = 1<<14;
|
2018-07-23 14:45:44 -04:00
|
|
|
HSAuint64 *AlternateVAGPU = new HSAuint64[nObjects];
|
2018-08-13 09:03:31 -04:00
|
|
|
ASSERT_NE((HSAuint64)AlternateVAGPU, 0);
|
2018-07-23 14:45:44 -04:00
|
|
|
HsaMemMapFlags mapFlags = {0};
|
2018-08-13 09:03:31 -04:00
|
|
|
HSAuint64 s;
|
2018-07-23 14:45:44 -04:00
|
|
|
char *addr;
|
2018-08-13 09:03:31 -04:00
|
|
|
HSAuint64 flags = MAP_ANONYMOUS | MAP_PRIVATE;
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
/* Test up to 1TB memory*/
|
|
|
|
|
s = GB(1024ULL) / nObjects;
|
2018-08-13 09:03:31 -04:00
|
|
|
addr = reinterpret_cast<char*>(mmap(0, s, PROT_READ | PROT_WRITE, flags, -1, 0));
|
2018-07-23 14:45:44 -04:00
|
|
|
ASSERT_NE(addr, MAP_FAILED);
|
|
|
|
|
memset(addr, 0, s);
|
|
|
|
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
/* Allocate 1024GB, aka 1TB*/
|
|
|
|
|
for (; i < nObjects; i++) {
|
|
|
|
|
if (hsaKmtRegisterMemory(addr + i, s - i))
|
|
|
|
|
break;
|
|
|
|
|
if (hsaKmtMapMemoryToGPUNodes(addr + i, s - i,
|
2018-08-13 09:03:31 -04:00
|
|
|
&AlternateVAGPU[i], mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode))) {
|
2018-08-10 16:40:34 +08:00
|
|
|
hsaKmtDeregisterMemory(addr + i);
|
2018-07-23 14:45:44 -04:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LOG() << "Successfully registered and mapped " << (i * s >> 30)
|
|
|
|
|
<< "GB system memory to gpu" << std::endl;
|
|
|
|
|
|
2018-09-12 17:30:42 +08:00
|
|
|
RECORD(i * s >> 30) << "Mmap-SysMem-Size";
|
|
|
|
|
|
2018-07-23 14:45:44 -04:00
|
|
|
while (i--) {
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void*>(AlternateVAGPU[i])));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void*>(AlternateVAGPU[i])));
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
munmap(addr, s);
|
|
|
|
|
delete []AlternateVAGPU;
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Keep memory mapped to default node
|
2018-07-23 14:45:44 -04:00
|
|
|
* Keep mapping/unmapping memory to/from non-default node
|
2018-08-14 09:52:31 -04:00
|
|
|
* A shader running on default node consistantly accesses
|
|
|
|
|
* memory - make sure memory is always accessible by default,
|
|
|
|
|
* i.e. there is no gpu vm fault.
|
2018-07-23 14:45:44 -04:00
|
|
|
* Synchronization b/t host program and shader:
|
2018-08-14 09:52:31 -04:00
|
|
|
* 1. Host initializes src and dst buffer to 0
|
|
|
|
|
* 2. Shader keeps reading src buffer and check value
|
|
|
|
|
* 3. Host writes src buffer to 0x5678 to indicate quit, polling dst until it becomes 0x5678
|
|
|
|
|
* 4. Shader write dst buffer to 0x5678 after src changes to 0x5678, then quits
|
|
|
|
|
* 5. Host program quits after dst becomes 0x5678
|
|
|
|
|
* Need at least two gpu nodes to run the test. The default node has to be a gfx9 node,
|
|
|
|
|
* otherwise, test is skipped. Use kfdtest --node=$$ to specify the default node
|
2018-07-23 14:45:44 -04:00
|
|
|
* This test case is introduced as a side-result of investigation of SWDEV-134798, which
|
|
|
|
|
* is a gpu vm fault while running rocr conformance test. Here we try to simulate the
|
|
|
|
|
* same test behaviour.
|
|
|
|
|
*/
|
|
|
|
|
TEST_F(KFDMemoryTest, MapUnmapToNodes) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
if (m_FamilyId != FAMILY_AI) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: GFX9-based shader not supported on other ASICs." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
|
|
|
|
|
if (gpuNodes.size() < 2) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: At least two GPUs are required." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
LOG() << "default GPU node" << defaultGPUNode << std::endl;
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
|
|
|
|
HSAuint32 nondefaultNode;
|
|
|
|
|
for (unsigned i = 0; i < gpuNodes.size(); i++) {
|
|
|
|
|
if (gpuNodes.at(i) != defaultGPUNode) {
|
|
|
|
|
nondefaultNode = gpuNodes.at(i);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
HSAuint32 mapNodes[2] = {defaultGPUNode, nondefaultNode};
|
|
|
|
|
|
|
|
|
|
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
|
|
|
|
HsaMemoryBuffer srcBuffer(PAGE_SIZE, defaultGPUNode);
|
|
|
|
|
HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode);
|
|
|
|
|
|
|
|
|
|
m_pIsaGen->CompileShader(gfx9_PollMemory, "ReadMemory", isaBuffer);
|
|
|
|
|
|
|
|
|
|
PM4Queue pm4Queue;
|
|
|
|
|
ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
|
|
|
|
|
|
|
|
|
|
Dispatch dispatch0(isaBuffer);
|
|
|
|
|
dispatch0.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
|
|
|
|
|
dispatch0.Submit(pm4Queue);
|
|
|
|
|
|
|
|
|
|
HsaMemMapFlags memFlags = {0};
|
|
|
|
|
memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
|
|
|
|
memFlags.ui32.HostAccess = 1;
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 1<<14; i ++) {
|
|
|
|
|
hsaKmtMapMemoryToGPUNodes(srcBuffer.As<void*>(), PAGE_SIZE, NULL, memFlags, (i>>5)&1+1, mapNodes);
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Fill src buffer so shader quits */
|
2018-07-23 14:45:44 -04:00
|
|
|
srcBuffer.Fill(0x5678);
|
|
|
|
|
WaitOnValue(dstBuffer.As<uint32_t *>(), 0x5678);
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(*dstBuffer.As<uint32_t *>(), 0x5678);
|
|
|
|
|
EXPECT_SUCCESS(pm4Queue.Destroy());
|
2018-07-23 14:45:44 -04:00
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
// Basic test of hsaKmtMapMemoryToGPU and hsaKmtUnmapMemoryToGPU
|
2018-07-23 14:45:44 -04:00
|
|
|
TEST_F(KFDMemoryTest , MapMemoryToGPU) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
unsigned int *nullPtr = NULL;
|
|
|
|
|
unsigned int* pDb = NULL;
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
2018-08-13 09:03:31 -04:00
|
|
|
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode /* system */, PAGE_SIZE, m_MemoryFlags,
|
|
|
|
|
reinterpret_cast<void**>(&pDb)));
|
2018-07-23 14:45:44 -04:00
|
|
|
// verify that pDb is not null before it's being used
|
|
|
|
|
ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer";
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(pDb, PAGE_SIZE, NULL));
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(pDb));
|
2018-07-23 14:45:44 -04:00
|
|
|
// Release the buffers
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb, PAGE_SIZE));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
// Following tests are for hsaKmtAllocMemory with invalid params
|
2018-07-23 14:45:44 -04:00
|
|
|
TEST_F(KFDMemoryTest, InvalidMemoryPointerAlloc) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, NULL));
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(KFDMemoryTest, ZeroMemorySizeAlloc) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
unsigned int* pDb = NULL;
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtAllocMemory(0 /* system */, 0, m_MemoryFlags,
|
|
|
|
|
reinterpret_cast<void**>(&pDb)));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
// Basic test for hsaKmtAllocMemory
|
2018-07-23 14:45:44 -04:00
|
|
|
TEST_F(KFDMemoryTest, MemoryAlloc) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
unsigned int* pDb = NULL;
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, reinterpret_cast<void**>(&pDb)));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(KFDMemoryTest, AccessPPRMem) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
|
|
|
|
if (is_dgpu()) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: Test requires APU." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned int *destBuf = (unsigned int *)VirtualAllocMemory(NULL, PAGE_SIZE,
|
|
|
|
|
MEM_READ | MEM_WRITE);
|
|
|
|
|
|
|
|
|
|
PM4Queue queue;
|
|
|
|
|
|
|
|
|
|
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
|
|
|
|
|
|
|
|
|
queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf,
|
|
|
|
|
0xABCDEF09, 0x12345678));
|
|
|
|
|
|
|
|
|
|
queue.Wait4PacketConsumption();
|
|
|
|
|
|
|
|
|
|
WaitOnValue(destBuf, 0xABCDEF09);
|
|
|
|
|
WaitOnValue(destBuf + 1, 0x12345678);
|
|
|
|
|
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(queue.Destroy());
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
/* This sleep hides the dmesg PPR message storm on Raven, which happens
|
|
|
|
|
* when the CPU buffer is freed before the excessive PPRs are all
|
|
|
|
|
* consumed by IOMMU HW. Because of that, a kernel driver workaround
|
|
|
|
|
* is put in place to address that, so we don't need to wait here.
|
|
|
|
|
*/
|
2018-08-13 09:03:31 -04:00
|
|
|
// sleep(5);
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
VirtualFreeMemory(destBuf, PAGE_SIZE);
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Linux OS-specific Test for registering OS allocated memory
|
|
|
|
|
TEST_F(KFDMemoryTest, MemoryRegister) {
|
|
|
|
|
const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();
|
|
|
|
|
if (isTonga(pNodeProperties)) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: Workaround in thunk for Tonga causes failure." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
|
|
|
|
/* Different unaligned memory locations to be mapped for GPU
|
|
|
|
|
* access:
|
|
|
|
|
*
|
|
|
|
|
* - initialized data segment (file backed)
|
|
|
|
|
* - stack (anonymous memory)
|
|
|
|
|
*
|
|
|
|
|
* Separate them enough so they are in different cache lines
|
|
|
|
|
* (64-byte = 16-dword).
|
|
|
|
|
*/
|
|
|
|
|
static volatile HSAuint32 globalData = 0xdeadbeef;
|
|
|
|
|
volatile HSAuint32 stackData[17] = {0};
|
|
|
|
|
const unsigned dstOffset = 0;
|
|
|
|
|
const unsigned sdmaOffset = 16;
|
|
|
|
|
|
|
|
|
|
HsaMemoryBuffer srcBuffer((void *)&globalData, sizeof(HSAuint32));
|
|
|
|
|
HsaMemoryBuffer dstBuffer((void *)&stackData[dstOffset], sizeof(HSAuint32));
|
|
|
|
|
HsaMemoryBuffer sdmaBuffer((void *)&stackData[sdmaOffset], sizeof(HSAuint32));
|
|
|
|
|
|
|
|
|
|
/* Create PM4 and SDMA queues before fork+COW to test queue
|
2018-08-14 09:52:31 -04:00
|
|
|
* eviction and restore
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
PM4Queue pm4Queue;
|
|
|
|
|
SDMAQueue sdmaQueue;
|
|
|
|
|
ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
|
|
|
|
|
ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode));
|
|
|
|
|
|
|
|
|
|
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
|
|
|
|
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
|
|
|
|
|
|
|
|
|
/* First submit just so the queues are not empty, and to get the
|
|
|
|
|
* TLB populated (in case we need to flush TLBs somewhere after
|
2018-08-14 09:52:31 -04:00
|
|
|
* updating the page tables)
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
Dispatch dispatch0(isaBuffer);
|
|
|
|
|
dispatch0.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
|
|
|
|
|
dispatch0.Submit(pm4Queue);
|
|
|
|
|
dispatch0.Sync(g_TestTimeOut);
|
|
|
|
|
|
|
|
|
|
sdmaQueue.PlaceAndSubmitPacket(SDMAWriteDataPacket(sdmaBuffer.As<HSAuint32 *>(), 0x12345678));
|
|
|
|
|
sdmaQueue.Wait4PacketConsumption();
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_TRUE(WaitOnValue(&stackData[sdmaOffset], 0x12345678));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
/* Fork a child process to mark pages as COW */
|
|
|
|
|
pid_t pid = fork();
|
|
|
|
|
ASSERT_GE(pid, 0);
|
|
|
|
|
if (pid == 0) {
|
|
|
|
|
/* Child process waits for a SIGTERM from the parent. It can't
|
|
|
|
|
* make any write access to the stack because we want the
|
|
|
|
|
* parent to make the first write access and get a new copy. A
|
|
|
|
|
* busy loop is the safest way to do that, since any function
|
2018-08-14 09:52:31 -04:00
|
|
|
* call (e.g. sleep) would write to the stack.
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
while (1)
|
|
|
|
|
{}
|
|
|
|
|
WARN() << "Shouldn't get here!" << std::endl;
|
|
|
|
|
exit(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parent process writes to COW page(s) and gets a new copy. MMU
|
|
|
|
|
* notifier needs to update the GPU mapping(s) for the test to
|
2018-08-14 09:52:31 -04:00
|
|
|
* pass.
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
globalData = 0xD00BED00;
|
|
|
|
|
stackData[dstOffset] = 0xdeadbeef;
|
|
|
|
|
stackData[sdmaOffset] = 0xdeadbeef;
|
|
|
|
|
|
|
|
|
|
/* Terminate the child process before a possible test failure that
|
2018-08-14 09:52:31 -04:00
|
|
|
* would leave it spinning in the background indefinitely.
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
int status;
|
|
|
|
|
EXPECT_EQ(0, kill(pid, SIGTERM));
|
|
|
|
|
EXPECT_EQ(pid, waitpid(pid, &status, 0));
|
|
|
|
|
EXPECT_NE(0, WIFSIGNALED(status));
|
|
|
|
|
EXPECT_EQ(SIGTERM, WTERMSIG(status));
|
|
|
|
|
|
|
|
|
|
/* Now check that the GPU is accessing the correct page */
|
|
|
|
|
Dispatch dispatch1(isaBuffer);
|
|
|
|
|
dispatch1.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
|
|
|
|
|
dispatch1.Submit(pm4Queue);
|
|
|
|
|
dispatch1.Sync(g_TestTimeOut);
|
|
|
|
|
|
|
|
|
|
sdmaQueue.PlaceAndSubmitPacket(SDMAWriteDataPacket(sdmaBuffer.As<HSAuint32 *>(), 0xD0BED0BE));
|
|
|
|
|
sdmaQueue.Wait4PacketConsumption();
|
|
|
|
|
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(pm4Queue.Destroy());
|
|
|
|
|
EXPECT_SUCCESS(sdmaQueue.Destroy());
|
2018-07-23 14:45:44 -04:00
|
|
|
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(0xD00BED00, globalData);
|
|
|
|
|
EXPECT_EQ(0xD00BED00, stackData[dstOffset]);
|
|
|
|
|
EXPECT_EQ(0xD0BED0BE, stackData[sdmaOffset]);
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) {
|
|
|
|
|
if (!is_dgpu()) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: Will run on APU once APU+dGPU supported." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
|
|
|
|
|
HSAuint64 nGPU = gpuNodes.size(); // number of gpu nodes
|
|
|
|
|
static volatile HSAuint32 mem[4];
|
|
|
|
|
HSAuint64 gpuva1, gpuva2;
|
|
|
|
|
|
|
|
|
|
/* Same address, different size */
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtRegisterMemory((void *)&mem[0], sizeof(HSAuint32)*2));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtMapMemoryToGPU((void *)&mem[0], sizeof(HSAuint32)*2,
|
|
|
|
|
&gpuva1));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtRegisterMemory((void *)&mem[0], sizeof(HSAuint32)));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtMapMemoryToGPU((void *)&mem[0], sizeof(HSAuint32),
|
|
|
|
|
&gpuva2));
|
|
|
|
|
EXPECT_TRUE(gpuva1 != gpuva2);
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva1)));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva1)));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva2)));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva2)));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
/* Same address, same size */
|
|
|
|
|
HsaMemMapFlags memFlags = {0};
|
|
|
|
|
memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
|
|
|
|
memFlags.ui32.HostAccess = 1;
|
|
|
|
|
|
|
|
|
|
HSAuint32 nodes[nGPU];
|
|
|
|
|
for (unsigned int i = 0; i < nGPU; i++)
|
|
|
|
|
nodes[i] = gpuNodes.at(i);
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)&mem[2],
|
|
|
|
|
sizeof(HSAuint32)*2, nGPU, nodes));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes((void *)&mem[2],
|
|
|
|
|
sizeof(HSAuint32) * 2,
|
|
|
|
|
&gpuva1, memFlags, nGPU, nodes));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)&mem[2],
|
|
|
|
|
sizeof(HSAuint32) * 2, nGPU, nodes));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes((void *)&mem[2],
|
|
|
|
|
sizeof(HSAuint32) * 2,
|
|
|
|
|
&gpuva2, memFlags, nGPU, nodes));
|
|
|
|
|
EXPECT_EQ(gpuva1, gpuva2);
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva1)));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva1)));
|
2018-07-23 14:45:44 -04:00
|
|
|
/* Confirm that we still have access to the memory, mem[2] */
|
|
|
|
|
PM4Queue queue;
|
|
|
|
|
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
|
|
|
|
mem[2] = 0x0;
|
2018-08-13 09:03:31 -04:00
|
|
|
queue.PlaceAndSubmitPacket(PM4WriteDataPacket(reinterpret_cast<unsigned int *>(gpuva2),
|
2018-07-23 14:45:44 -04:00
|
|
|
0xdeadbeef));
|
|
|
|
|
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(true, 0, 0));
|
|
|
|
|
queue.Wait4PacketConsumption();
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_EQ(true, WaitOnValue((unsigned int *)(&mem[2]), 0xdeadbeef));
|
2018-07-23 14:45:44 -04:00
|
|
|
EXPECT_SUCCESS(queue.Destroy());
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva2)));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva2)));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
/* FlatScratchAccess
|
|
|
|
|
* Since HsaMemoryBuffer has to be associated with a specific GPU node, this function in the current form
|
|
|
|
|
* will not work for multiple GPU nodes. For now test only one default GPU node.
|
|
|
|
|
* TODO: Generalize it to support multiple nodes
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
#define SCRATCH_SLICE_SIZE 0x10000
|
|
|
|
|
#define SCRATCH_SLICE_NUM 3
|
|
|
|
|
#define SCRATCH_SIZE (SCRATCH_SLICE_NUM * SCRATCH_SLICE_SIZE)
|
|
|
|
|
#define SCRATCH_SLICE_OFFSET(i) ((i) * SCRATCH_SLICE_SIZE)
|
|
|
|
|
|
|
|
|
|
TEST_F(KFDMemoryTest, FlatScratchAccess) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
if (m_FamilyId == FAMILY_CI || m_FamilyId == FAMILY_KV) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: VI-based shader not supported on other ASICs." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
|
|
|
|
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
2018-08-13 09:03:31 -04:00
|
|
|
HsaMemoryBuffer scratchBuffer(SCRATCH_SIZE, defaultGPUNode, false/*zero*/, false/*local*/,
|
|
|
|
|
false/*exec*/, true /*scratch*/);
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
// Unmap scratch for sub-allocation mapping tests
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(scratchBuffer.As<void*>()));
|
|
|
|
|
|
|
|
|
|
// Map and unmap a few slices in different order: 2-0-1, 0-2-1
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(2),
|
|
|
|
|
SCRATCH_SLICE_SIZE, NULL));
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(0),
|
|
|
|
|
SCRATCH_SLICE_SIZE, NULL));
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(1),
|
|
|
|
|
SCRATCH_SLICE_SIZE, NULL));
|
|
|
|
|
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(1)));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(2)));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(0)));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
// Map everything for test below
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>(), SCRATCH_SIZE, NULL));
|
|
|
|
|
|
|
|
|
|
HsaMemoryBuffer srcMemBuffer(PAGE_SIZE, defaultGPUNode);
|
|
|
|
|
HsaMemoryBuffer dstMemBuffer(PAGE_SIZE, defaultGPUNode);
|
|
|
|
|
|
|
|
|
|
// Initialize the srcBuffer to some fixed value
|
|
|
|
|
srcMemBuffer.Fill(0x01010101);
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
// Initialize a buffer with a dword copy ISA
|
2018-07-23 14:45:44 -04:00
|
|
|
m_pIsaGen->CompileShader((m_FamilyId >= FAMILY_AI) ? gfx9_ScratchCopyDword : gfx8_ScratchCopyDword,
|
|
|
|
|
"ScratchCopyDword", isaBuffer);
|
|
|
|
|
|
|
|
|
|
const HsaNodeProperties *pNodeProperties = m_NodeInfo.GetNodeProperties(defaultGPUNode);
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
/* TODO: Add support to all GPU Nodes.
|
|
|
|
|
* The loop over the system nodes is removed as the test can be executed only on GPU nodes. This
|
|
|
|
|
* also requires changes to be made to all the HsaMemoryBuffer variables defined above, as
|
|
|
|
|
* HsaMemoryBuffer is now associated with a Node.
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
if (pNodeProperties != NULL) {
|
|
|
|
|
// Get the aperture of the scratch buffer
|
|
|
|
|
HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(defaultGPUNode, pNodeProperties->NumMemoryBanks,
|
|
|
|
|
memoryProperties));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
|
|
|
|
|
if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_GPU_SCRATCH) {
|
|
|
|
|
int numWaves = 4; // WAVES must be >= # SE
|
2018-08-14 09:52:31 -04:00
|
|
|
int waveSize = 1; // Amount of space used by each wave in units of 256 dwords
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
PM4Queue queue;
|
|
|
|
|
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
|
|
|
|
|
|
|
|
|
HSAuint64 scratchApertureAddr = memoryProperties[bank].VirtualBaseAddress;
|
|
|
|
|
|
|
|
|
|
// Create a dispatch packet to copy
|
|
|
|
|
Dispatch dispatchSrcToScratch(isaBuffer);
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
// Setup the dispatch packet
|
2018-07-23 14:45:44 -04:00
|
|
|
// Copying from the source Memory Buffer to the scratch buffer
|
|
|
|
|
dispatchSrcToScratch.SetArgs(srcMemBuffer.As<void*>(), reinterpret_cast<void*>(scratchApertureAddr));
|
|
|
|
|
dispatchSrcToScratch.SetDim(1, 1, 1);
|
|
|
|
|
dispatchSrcToScratch.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());
|
2018-08-14 09:52:31 -04:00
|
|
|
// Submit the packet
|
2018-07-23 14:45:44 -04:00
|
|
|
dispatchSrcToScratch.Submit(queue);
|
|
|
|
|
dispatchSrcToScratch.Sync();
|
|
|
|
|
|
|
|
|
|
// Create another dispatch packet to copy scratch buffer contents to destination buffer.
|
|
|
|
|
Dispatch dispatchScratchToDst(isaBuffer);
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
// Set the arguments to copy from the scratch buffer to the destination buffer
|
2018-07-23 14:45:44 -04:00
|
|
|
dispatchScratchToDst.SetArgs(reinterpret_cast<void*>(scratchApertureAddr), dstMemBuffer.As<void*>());
|
|
|
|
|
dispatchScratchToDst.SetDim(1, 1, 1);
|
|
|
|
|
dispatchScratchToDst.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
// Submit the packet
|
2018-07-23 14:45:44 -04:00
|
|
|
dispatchScratchToDst.Submit(queue);
|
|
|
|
|
dispatchScratchToDst.Sync();
|
|
|
|
|
|
|
|
|
|
// Check that the scratch buffer contents were correctly copied over to the system memory buffer
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(dstMemBuffer.As<unsigned int*>()[0], 0x01010101);
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
delete [] memoryProperties;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(KFDMemoryTest, GetTileConfigTest) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
HSAuint32 tile_config[32] = {0};
|
|
|
|
|
HSAuint32 macro_tile_config[16] = {0};
|
|
|
|
|
unsigned int i;
|
|
|
|
|
HsaGpuTileConfig config = {0};
|
|
|
|
|
|
|
|
|
|
config.TileConfig = tile_config;
|
|
|
|
|
config.MacroTileConfig = macro_tile_config;
|
|
|
|
|
config.NumTileConfigs = 32;
|
|
|
|
|
config.NumMacroTileConfigs = 16;
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtGetTileConfig(defaultGPUNode, &config));
|
|
|
|
|
|
|
|
|
|
LOG() << "tile_config:" << std::endl;
|
|
|
|
|
for (i = 0; i < config.NumTileConfigs; i++)
|
|
|
|
|
LOG() << "\t" << std::dec << i << ": 0x" << std::hex
|
|
|
|
|
<< tile_config[i] << std::endl;
|
|
|
|
|
|
|
|
|
|
LOG() << "macro_tile_config:" << std::endl;
|
|
|
|
|
for (i = 0; i < config.NumMacroTileConfigs; i++)
|
|
|
|
|
LOG() << "\t" << std::dec << i << ": 0x" << std::hex
|
|
|
|
|
<< macro_tile_config[i] << std::endl;
|
|
|
|
|
|
|
|
|
|
LOG() << "gb_addr_config: 0x" << std::hex << config.GbAddrConfig
|
|
|
|
|
<< std::endl;
|
|
|
|
|
LOG() << "num_banks: 0x" << std::hex << config.NumBanks << std::endl;
|
|
|
|
|
LOG() << "num_ranks: 0x" << std::hex << config.NumRanks << std::endl;
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void KFDMemoryTest::BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granularityMB,
|
|
|
|
|
HSAuint64 *lastSize) {
|
|
|
|
|
HSAuint64 sysMemSizeMB;
|
|
|
|
|
HsaMemMapFlags mapFlags = {0};
|
|
|
|
|
HSAuint64 AlternateVAGPU;
|
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
|
|
sysMemSizeMB = GetSysMemSize() >> 20;
|
|
|
|
|
|
|
|
|
|
LOG() << "Found System Memory of " << std::dec << sysMemSizeMB
|
|
|
|
|
<< "MB" << std::endl;
|
|
|
|
|
|
|
|
|
|
/* Testing big buffers in system memory */
|
|
|
|
|
unsigned int * pDb = NULL;
|
|
|
|
|
HSAuint64 lowMB = 0;
|
|
|
|
|
HSAuint64 highMB = (sysMemSizeMB + granularityMB - 1) & ~(granularityMB - 1);
|
|
|
|
|
|
|
|
|
|
HSAuint64 sizeMB;
|
|
|
|
|
HSAuint64 size = 0;
|
|
|
|
|
HSAuint64 lastTestedSize = 0;
|
|
|
|
|
|
|
|
|
|
while (highMB - lowMB > granularityMB) {
|
|
|
|
|
sizeMB = (lowMB + highMB) / 2;
|
|
|
|
|
size = sizeMB * 1024 * 1024;
|
|
|
|
|
ret = hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags,
|
2018-08-13 09:03:31 -04:00
|
|
|
reinterpret_cast<void**>(&pDb));
|
2018-07-23 14:45:44 -04:00
|
|
|
if (ret) {
|
|
|
|
|
highMB = sizeMB;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ret = hsaKmtMapMemoryToGPUNodes(pDb, size, &AlternateVAGPU,
|
2018-08-13 09:03:31 -04:00
|
|
|
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode));
|
2018-07-23 14:45:44 -04:00
|
|
|
if (ret) {
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb, size));
|
2018-07-23 14:45:44 -04:00
|
|
|
highMB = sizeMB;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(pDb));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb, size));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
lowMB = sizeMB;
|
|
|
|
|
lastTestedSize = sizeMB;
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Save the biggest allocated system buffer for signal handling test */
|
2018-07-23 14:45:44 -04:00
|
|
|
LOG() << "The biggest allocated system buffer is " << std::dec
|
|
|
|
|
<< lastTestedSize << "MB" << std::endl;
|
|
|
|
|
if (lastSize)
|
|
|
|
|
*lastSize = lastTestedSize * 1024 *1024;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB,
|
|
|
|
|
HSAuint64 *lastSize) {
|
|
|
|
|
HSAuint64 AlternateVAGPU;
|
|
|
|
|
int ret;
|
|
|
|
|
HSAuint64 vramSizeMB;
|
|
|
|
|
HsaMemFlags memFlags;
|
|
|
|
|
HsaMemMapFlags mapFlags = {0};
|
|
|
|
|
|
|
|
|
|
vramSizeMB = GetVramSize(defaultGPUNode) >> 20;
|
|
|
|
|
|
|
|
|
|
LOG() << "Found VRAM of " << std::dec << vramSizeMB << "MB." << std::endl;
|
|
|
|
|
|
|
|
|
|
/* Testing big buffers in VRAM */
|
|
|
|
|
unsigned int * pDb = NULL;
|
|
|
|
|
HSAuint64 lowMB = 0;
|
|
|
|
|
HSAuint64 highMB = (vramSizeMB + granularityMB - 1) & ~(granularityMB - 1);
|
|
|
|
|
|
|
|
|
|
HSAuint64 sizeMB;
|
|
|
|
|
HSAuint64 size = 0;
|
|
|
|
|
HSAuint64 lastTestedSize = 0;
|
|
|
|
|
|
|
|
|
|
memset(&memFlags, 0, sizeof(memFlags));
|
|
|
|
|
memFlags.ui32.HostAccess = 0;
|
|
|
|
|
memFlags.ui32.NonPaged = 1;
|
|
|
|
|
|
|
|
|
|
while (highMB - lowMB > granularityMB) {
|
|
|
|
|
sizeMB = (lowMB + highMB) / 2;
|
|
|
|
|
size = sizeMB * 1024 * 1024;
|
|
|
|
|
ret = hsaKmtAllocMemory(defaultGPUNode, size, memFlags,
|
2018-08-13 09:03:31 -04:00
|
|
|
reinterpret_cast<void**>(&pDb));
|
2018-07-23 14:45:44 -04:00
|
|
|
if (ret) {
|
|
|
|
|
highMB = sizeMB;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ret = hsaKmtMapMemoryToGPUNodes(pDb, size, &AlternateVAGPU,
|
2018-08-13 09:03:31 -04:00
|
|
|
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode));
|
2018-07-23 14:45:44 -04:00
|
|
|
if (ret) {
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb, size));
|
2018-07-23 14:45:44 -04:00
|
|
|
highMB = sizeMB;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(pDb));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb, size));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
lowMB = sizeMB;
|
|
|
|
|
lastTestedSize = sizeMB;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LOG() << "The biggest allocated VRAM buffer is " << std::dec
|
|
|
|
|
<< lastTestedSize << "MB" << std::endl;
|
|
|
|
|
if (lastSize)
|
|
|
|
|
*lastSize = lastTestedSize * 1024 * 1024;
|
2018-08-22 15:54:54 +08:00
|
|
|
|
|
|
|
|
/* Make sure 3/4 vram can be allocated.*/
|
|
|
|
|
EXPECT_GE(lastTestedSize * 4, vramSizeMB * 3);
|
|
|
|
|
if (lastTestedSize * 16 < vramSizeMB * 15)
|
|
|
|
|
WARN() << "The biggest allocated VRAM buffer size is smaller than the expected "
|
|
|
|
|
<< vramSizeMB * 15 / 16 << "MB" << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
2019-06-13 15:22:52 -04:00
|
|
|
void KFDMemoryTest::NumaNodeBind(const char *nodeStr) {
|
|
|
|
|
if (numa_available() != -1) {
|
|
|
|
|
int num_node = numa_num_task_nodes();
|
|
|
|
|
|
|
|
|
|
if (num_node > 1) {
|
|
|
|
|
struct bitmask *nodemask;
|
|
|
|
|
|
|
|
|
|
LOG() << "NUMA total nodes " << num_node << ", bind to " << nodeStr << std::endl;
|
|
|
|
|
|
|
|
|
|
nodemask = numa_parse_nodestring(nodeStr);
|
|
|
|
|
if (nodemask) {
|
|
|
|
|
numa_bind(nodemask);
|
|
|
|
|
numa_free_nodemask(nodemask);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-23 14:45:44 -04:00
|
|
|
/* BigBufferStressTest allocs, maps/unmaps, and frees the biggest possible system
|
|
|
|
|
* buffers. Its size is found using binary search in the range (0, RAM SIZE) with
|
|
|
|
|
* a granularity of 128M. Repeat the similar logic on local buffers (VRAM).
|
|
|
|
|
* Finally, it allocs and maps 128M system buffers in a loop until it
|
|
|
|
|
* fails, then unmaps and frees them afterwards.
|
|
|
|
|
* Please note we limit the biggest possible system buffer to be smaller than
|
|
|
|
|
* the RAM size. The reason is that the system buffer can make use of virtual
|
|
|
|
|
* memory so that a system buffer could be very large even though the RAM size
|
|
|
|
|
* is small. For example, on a typical Carrizo platform, the biggest allocated
|
|
|
|
|
* system buffer could be more than 14G even though it only has 4G memory.
|
|
|
|
|
* In that situation, it will take too much time to finish the test, because of
|
2018-08-14 09:52:31 -04:00
|
|
|
* the onerous memory swap operation. So we limit the buffer size that way.
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
TEST_F(KFDMemoryTest, BigBufferStressTest) {
|
|
|
|
|
if (!is_dgpu()) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: Running on APU fails and locks the system." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL);
|
|
|
|
|
|
|
|
|
|
HSAuint64 AlternateVAGPU;
|
|
|
|
|
HsaMemMapFlags mapFlags = {0};
|
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
|
|
HSAuint64 granularityMB = 128;
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
2019-06-13 15:22:52 -04:00
|
|
|
/* Don't run on node 0 on multiple NUMA node machine because dma32 zone is on node 0,
|
|
|
|
|
* Use all memory including dma32 zone on node 0 will cause TTM eviction to free dma32
|
|
|
|
|
* zone for other devices which supports 32bit physical address. The eviction and
|
|
|
|
|
* restore may retry if busy and cause queue timeout and test failure.
|
|
|
|
|
*/
|
|
|
|
|
NumaNodeBind("!0");
|
|
|
|
|
|
2018-07-23 14:45:44 -04:00
|
|
|
BigBufferSystemMemory(defaultGPUNode, granularityMB, NULL);
|
|
|
|
|
|
|
|
|
|
BigBufferVRAM(defaultGPUNode, granularityMB, NULL);
|
|
|
|
|
|
|
|
|
|
/* Repeatedly allocate and map big buffers in system memory until it fails,
|
2018-08-14 09:52:31 -04:00
|
|
|
* then unmap and free them.
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
#define ARRAY_ENTRIES 2048
|
|
|
|
|
|
2018-08-24 11:57:04 +08:00
|
|
|
int i = 0, allocationCount = 0;
|
2018-07-23 14:45:44 -04:00
|
|
|
unsigned int* pDb_array[ARRAY_ENTRIES];
|
|
|
|
|
HSAuint64 block_size_mb = 128;
|
|
|
|
|
HSAuint64 block_size = block_size_mb * 1024 * 1024;
|
2018-08-24 11:57:04 +08:00
|
|
|
PM4Queue queue;
|
2018-07-23 14:45:44 -04:00
|
|
|
|
2018-08-24 11:57:04 +08:00
|
|
|
/* Test 4 times to see if there is any memory leak.*/
|
|
|
|
|
for (int repeat = 1; repeat < 5; repeat++) {
|
2019-06-13 15:46:05 -04:00
|
|
|
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
|
|
|
|
|
2018-08-24 11:57:04 +08:00
|
|
|
for (i = 0; i < ARRAY_ENTRIES; i++) {
|
|
|
|
|
ret = hsaKmtAllocMemory(0 /* system */, block_size, m_MemoryFlags,
|
|
|
|
|
reinterpret_cast<void**>(&pDb_array[i]));
|
|
|
|
|
if (ret)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
ret = hsaKmtMapMemoryToGPUNodes(pDb_array[i], block_size,
|
|
|
|
|
&AlternateVAGPU, mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode));
|
|
|
|
|
if (ret) {
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb_array[i], block_size));
|
|
|
|
|
break;
|
|
|
|
|
}
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
2018-08-24 11:57:04 +08:00
|
|
|
LOG() << "Allocated system buffers time " << std::dec << repeat << ": " << i << "x"
|
|
|
|
|
<< block_size_mb << "MB" << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
|
2018-08-24 11:57:04 +08:00
|
|
|
if (allocationCount == 0)
|
|
|
|
|
allocationCount = i;
|
|
|
|
|
EXPECT_GE(i, allocationCount) << "There might be memory leak!" << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
|
2019-06-13 15:46:05 -04:00
|
|
|
for (int j = 0; j < i; j++) {
|
2018-08-24 11:57:04 +08:00
|
|
|
/* To see if GPU can access the memory correctly*/
|
2019-06-13 15:46:05 -04:00
|
|
|
unsigned int *begin = pDb_array[j];
|
2018-08-24 11:57:04 +08:00
|
|
|
*begin = 0;
|
|
|
|
|
queue.PlaceAndSubmitPacket(
|
|
|
|
|
PM4WriteDataPacket(begin, 0xdeadbeaf));
|
2019-06-13 15:46:05 -04:00
|
|
|
queue.Wait4PacketConsumption(NULL, 300000);
|
2018-08-24 11:57:04 +08:00
|
|
|
EXPECT_TRUE(WaitOnValue(begin, 0xdeadbeaf));
|
2019-06-13 15:46:05 -04:00
|
|
|
}
|
2018-08-24 11:57:04 +08:00
|
|
|
|
2019-06-13 15:46:05 -04:00
|
|
|
EXPECT_SUCCESS(queue.Destroy());
|
|
|
|
|
|
|
|
|
|
for (int j = 0; j < i; j++) {
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(pDb_array[j]));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb_array[j], block_size));
|
2018-08-24 11:57:04 +08:00
|
|
|
}
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
2019-06-13 15:22:52 -04:00
|
|
|
/* Reset to run on all task nodes */
|
|
|
|
|
NumaNodeBind("all");
|
|
|
|
|
|
2018-07-23 14:45:44 -04:00
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(KFDMemoryTest, MMBench) {
|
|
|
|
|
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL);
|
|
|
|
|
|
|
|
|
|
unsigned testIndex, sizeIndex, memType, nMemTypes;
|
|
|
|
|
const char *memTypeStrings[2] = {"SysMem", "VRAM"};
|
2019-07-11 16:41:00 -04:00
|
|
|
const struct {
|
|
|
|
|
unsigned size;
|
|
|
|
|
unsigned num;
|
|
|
|
|
} bufParams[] = {
|
|
|
|
|
/* Buffer sizes in x16 increments. Limit memory usage to about
|
|
|
|
|
* 1GB. For small sizes we use 1000 buffers, which means we
|
|
|
|
|
* conveniently measure microseconds and report nanoseconds.
|
|
|
|
|
*/
|
|
|
|
|
{PAGE_SIZE , 1000}, /* 4KB */
|
|
|
|
|
{PAGE_SIZE << 4, 1000}, /* 64KB */
|
|
|
|
|
{PAGE_SIZE << 9, 500}, /* 2MB */
|
|
|
|
|
{PAGE_SIZE << 13, 32}, /* 32MB */
|
|
|
|
|
{PAGE_SIZE << 18, 1}, /* 1GB */
|
|
|
|
|
};
|
|
|
|
|
const unsigned nSizes = sizeof(bufParams) / sizeof(bufParams[0]);
|
2018-07-23 14:45:44 -04:00
|
|
|
const unsigned nTests = nSizes << 2;
|
2019-07-11 16:41:00 -04:00
|
|
|
#define TEST_BUFSIZE(index) (bufParams[(index) % nSizes].size)
|
|
|
|
|
#define TEST_NBUFS(index) (bufParams[(index) % nSizes].num)
|
2018-07-23 14:45:44 -04:00
|
|
|
#define TEST_MEMTYPE(index) ((index / nSizes) & 0x1)
|
|
|
|
|
#define TEST_SDMA(index) (((index / nSizes) >> 1) & 0x1)
|
|
|
|
|
|
2019-07-11 16:41:00 -04:00
|
|
|
void *bufs[1000];
|
2018-08-13 09:03:31 -04:00
|
|
|
HSAuint64 start, end;
|
2018-07-23 14:45:44 -04:00
|
|
|
unsigned i;
|
|
|
|
|
HSAKMT_STATUS ret;
|
|
|
|
|
HsaMemFlags memFlags = {0};
|
|
|
|
|
HsaMemMapFlags mapFlags = {0};
|
|
|
|
|
HSAuint64 altVa;
|
|
|
|
|
|
|
|
|
|
HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
|
|
|
|
HSAuint64 vramSizeMB = GetVramSize(defaultGPUNode) >> 20;
|
|
|
|
|
|
2018-11-19 11:38:38 -05:00
|
|
|
const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
|
|
|
|
|
bool is_all_large_bar = true;
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < gpuNodes.size(); i++) {
|
|
|
|
|
if (!m_NodeInfo.IsGPUNodeLargeBar(gpuNodes.at(i))) {
|
|
|
|
|
is_all_large_bar = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-23 14:45:44 -04:00
|
|
|
LOG() << "Found VRAM of " << std::dec << vramSizeMB << "MB." << std::endl;
|
|
|
|
|
|
|
|
|
|
if (vramSizeMB == 0)
|
|
|
|
|
nMemTypes = 1;
|
|
|
|
|
else
|
|
|
|
|
nMemTypes = 2;
|
|
|
|
|
|
|
|
|
|
/* Two SDMA queues to interleave user mode SDMA with memory
|
|
|
|
|
* management on either SDMA engine. Make the queues long enough
|
|
|
|
|
* to buffer at least nBufs x WriteData packets (7 dwords per
|
2018-08-14 09:52:31 -04:00
|
|
|
* packet).
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
SDMAQueue sdmaQueue[2];
|
|
|
|
|
ASSERT_SUCCESS(sdmaQueue[0].Create(defaultGPUNode, PAGE_SIZE*8));
|
|
|
|
|
ASSERT_SUCCESS(sdmaQueue[1].Create(defaultGPUNode, PAGE_SIZE*8));
|
|
|
|
|
HsaMemoryBuffer sdmaBuffer(PAGE_SIZE, 0); /* system memory */
|
|
|
|
|
#define INTERLEAVE_SDMA() do { \
|
|
|
|
|
if (interleaveSDMA) { \
|
|
|
|
|
sdmaQueue[0].PlaceAndSubmitPacket( \
|
|
|
|
|
SDMAWriteDataPacket(sdmaBuffer.As<HSAuint32 *>(), \
|
|
|
|
|
0x12345678)); \
|
|
|
|
|
sdmaQueue[1].PlaceAndSubmitPacket( \
|
|
|
|
|
SDMAWriteDataPacket(sdmaBuffer.As<HSAuint32 *>()+16, \
|
|
|
|
|
0x12345678)); \
|
|
|
|
|
} \
|
|
|
|
|
} while (0)
|
|
|
|
|
#define IDLE_SDMA() do { \
|
|
|
|
|
if (interleaveSDMA) { \
|
|
|
|
|
sdmaQueue[0].Wait4PacketConsumption(); \
|
|
|
|
|
sdmaQueue[1].Wait4PacketConsumption(); \
|
|
|
|
|
} \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
2019-07-11 16:41:00 -04:00
|
|
|
LOG() << "Test (avg. ns)\t alloc mapOne umapOne mapAll umapAll free" << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
for (testIndex = 0; testIndex < nTests; testIndex++) {
|
|
|
|
|
unsigned bufSize = TEST_BUFSIZE(testIndex);
|
2019-07-11 16:41:00 -04:00
|
|
|
unsigned nBufs = TEST_NBUFS(testIndex);
|
2018-07-23 14:45:44 -04:00
|
|
|
unsigned memType = TEST_MEMTYPE(testIndex);
|
|
|
|
|
bool interleaveSDMA = TEST_SDMA(testIndex);
|
2018-08-13 09:03:31 -04:00
|
|
|
HSAuint64 allocTime, map1Time, unmap1Time, mapAllTime, unmapAllTime, freeTime;
|
2018-07-23 14:45:44 -04:00
|
|
|
HSAuint32 allocNode;
|
|
|
|
|
|
2019-07-11 16:41:00 -04:00
|
|
|
if ((testIndex % nSizes) == 0)
|
|
|
|
|
LOG() << "--------------------------------------------------------------------------" << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
if (memType >= nMemTypes)
|
|
|
|
|
continue; // skip unsupported mem types
|
|
|
|
|
|
|
|
|
|
if (memType == 0) {
|
|
|
|
|
allocNode = 0;
|
|
|
|
|
memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
|
|
|
|
memFlags.ui32.HostAccess = 1;
|
|
|
|
|
memFlags.ui32.NonPaged = 0;
|
|
|
|
|
} else {
|
|
|
|
|
allocNode = defaultGPUNode;
|
|
|
|
|
memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
|
|
|
|
memFlags.ui32.HostAccess = 0;
|
|
|
|
|
memFlags.ui32.NonPaged = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Allocation */
|
|
|
|
|
start = GetSystemTickCountInMicroSec();
|
|
|
|
|
for (i = 0; i < nBufs; i++) {
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtAllocMemory(allocNode, bufSize, memFlags,
|
|
|
|
|
&bufs[i]));
|
|
|
|
|
INTERLEAVE_SDMA();
|
|
|
|
|
}
|
|
|
|
|
allocTime = GetSystemTickCountInMicroSec() - start;
|
|
|
|
|
IDLE_SDMA();
|
|
|
|
|
|
|
|
|
|
/* Map to one GPU */
|
|
|
|
|
start = GetSystemTickCountInMicroSec();
|
|
|
|
|
for (i = 0; i < nBufs; i++) {
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(bufs[i], bufSize,
|
|
|
|
|
&altVa, mapFlags, 1,
|
|
|
|
|
&defaultGPUNode));
|
|
|
|
|
INTERLEAVE_SDMA();
|
|
|
|
|
}
|
|
|
|
|
map1Time = GetSystemTickCountInMicroSec() - start;
|
|
|
|
|
IDLE_SDMA();
|
|
|
|
|
|
|
|
|
|
/* Unmap from GPU */
|
|
|
|
|
start = GetSystemTickCountInMicroSec();
|
|
|
|
|
for (i = 0; i < nBufs; i++) {
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(bufs[i]));
|
2018-07-23 14:45:44 -04:00
|
|
|
INTERLEAVE_SDMA();
|
|
|
|
|
}
|
|
|
|
|
unmap1Time = GetSystemTickCountInMicroSec() - start;
|
|
|
|
|
IDLE_SDMA();
|
|
|
|
|
|
|
|
|
|
/* Map to all GPUs */
|
2018-11-19 11:38:38 -05:00
|
|
|
if (is_all_large_bar) {
|
|
|
|
|
start = GetSystemTickCountInMicroSec();
|
|
|
|
|
for (i = 0; i < nBufs; i++) {
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(bufs[i], bufSize, &altVa));
|
|
|
|
|
INTERLEAVE_SDMA();
|
|
|
|
|
}
|
|
|
|
|
mapAllTime = GetSystemTickCountInMicroSec() - start;
|
|
|
|
|
IDLE_SDMA();
|
|
|
|
|
|
|
|
|
|
/* Unmap from all GPUs */
|
|
|
|
|
start = GetSystemTickCountInMicroSec();
|
|
|
|
|
for (i = 0; i < nBufs; i++) {
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(bufs[i]));
|
|
|
|
|
INTERLEAVE_SDMA();
|
|
|
|
|
}
|
|
|
|
|
unmapAllTime = GetSystemTickCountInMicroSec() - start;
|
|
|
|
|
IDLE_SDMA();
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Free */
|
|
|
|
|
start = GetSystemTickCountInMicroSec();
|
|
|
|
|
for (i = 0; i < nBufs; i++) {
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(bufs[i], bufSize));
|
2018-07-23 14:45:44 -04:00
|
|
|
INTERLEAVE_SDMA();
|
|
|
|
|
}
|
|
|
|
|
freeTime = GetSystemTickCountInMicroSec() - start;
|
|
|
|
|
IDLE_SDMA();
|
|
|
|
|
|
2019-07-11 16:41:00 -04:00
|
|
|
allocTime = allocTime * 1000 / nBufs;
|
|
|
|
|
map1Time = map1Time * 1000 / nBufs;
|
|
|
|
|
unmap1Time = unmap1Time * 1000 / nBufs;
|
|
|
|
|
mapAllTime = mapAllTime * 1000 / nBufs;
|
|
|
|
|
unmapAllTime = unmapAllTime * 1000 / nBufs;
|
|
|
|
|
freeTime = freeTime * 1000 / nBufs;
|
|
|
|
|
|
|
|
|
|
unsigned bufSizeLog;
|
|
|
|
|
char bufSizeUnit;
|
|
|
|
|
if (bufSize < (1 << 20)) {
|
|
|
|
|
bufSizeLog = bufSize >> 10;
|
|
|
|
|
bufSizeUnit = 'K';
|
|
|
|
|
} else if (bufSize < (1 << 30)) {
|
|
|
|
|
bufSizeLog = bufSize >> 20;
|
|
|
|
|
bufSizeUnit = 'M';
|
|
|
|
|
} else {
|
|
|
|
|
bufSizeLog = bufSize >> 30;
|
|
|
|
|
bufSizeUnit = 'G';
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-23 14:45:44 -04:00
|
|
|
LOG() << std::dec << std::setiosflags(std::ios::right)
|
2019-07-11 16:41:00 -04:00
|
|
|
<< std::setw(3) << bufSizeLog << bufSizeUnit << "-"
|
2018-07-23 14:45:44 -04:00
|
|
|
<< memTypeStrings[memType] << "-"
|
|
|
|
|
<< (interleaveSDMA ? "SDMA\t" : "noSDMA\t")
|
2019-07-11 16:41:00 -04:00
|
|
|
<< std::setw(9) << allocTime
|
|
|
|
|
<< std::setw(9) << map1Time
|
|
|
|
|
<< std::setw(9) << unmap1Time
|
|
|
|
|
<< std::setw(9) << mapAllTime
|
|
|
|
|
<< std::setw(9) << unmapAllTime
|
|
|
|
|
<< std::setw(9) << freeTime << std::endl;
|
2018-09-12 17:30:42 +08:00
|
|
|
|
|
|
|
|
#define MMBENCH_KEY_PREFIX memTypeStrings[memType] << "-" \
|
|
|
|
|
<< (interleaveSDMA ? "SDMA" : "noSDMA") << "-" \
|
|
|
|
|
<< (bufSize >> 10) << "K-"
|
|
|
|
|
RECORD(allocTime) << MMBENCH_KEY_PREFIX << "alloc";
|
|
|
|
|
RECORD(map1Time) << MMBENCH_KEY_PREFIX << "mapOne";
|
|
|
|
|
RECORD(unmap1Time) << MMBENCH_KEY_PREFIX << "unmapOne";
|
|
|
|
|
RECORD(mapAllTime) << MMBENCH_KEY_PREFIX << "mapAll";
|
|
|
|
|
RECORD(unmapAllTime) << MMBENCH_KEY_PREFIX << "unmapAll";
|
|
|
|
|
RECORD(freeTime) << MMBENCH_KEY_PREFIX << "free";
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(KFDMemoryTest, QueryPointerInfo) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
|
|
|
|
unsigned int bufSize = PAGE_SIZE * 8; // CZ and Tonga need 8 pages
|
|
|
|
|
HsaPointerInfo ptrInfo;
|
|
|
|
|
const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
|
|
|
|
|
HSAuint64 nGPU = gpuNodes.size(); // number of gpu nodes
|
|
|
|
|
|
|
|
|
|
/* GraphicHandle is tested at KFDGraphicsInterop.RegisterGraphicsHandle */
|
|
|
|
|
|
|
|
|
|
/*** Memory allocated on CPU node ***/
|
|
|
|
|
HsaMemoryBuffer hostBuffer(bufSize, 0/*node*/, false, false/*local*/);
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtQueryPointerInfo(hostBuffer.As<void*>(), &ptrInfo));
|
|
|
|
|
EXPECT_EQ(ptrInfo.Type, HSA_POINTER_ALLOCATED);
|
|
|
|
|
EXPECT_EQ(ptrInfo.Node, 0);
|
|
|
|
|
EXPECT_EQ(ptrInfo.MemFlags.Value, hostBuffer.Flags().Value);
|
|
|
|
|
EXPECT_EQ(ptrInfo.CPUAddress, hostBuffer.As<void*>());
|
|
|
|
|
EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)hostBuffer.As<void*>());
|
|
|
|
|
EXPECT_EQ(ptrInfo.SizeInBytes, (HSAuint64)hostBuffer.Size());
|
|
|
|
|
if (is_dgpu()) {
|
|
|
|
|
EXPECT_EQ((HSAuint64)ptrInfo.NMappedNodes, nGPU);
|
|
|
|
|
// Check NMappedNodes again after unmapping the memory
|
|
|
|
|
hsaKmtUnmapMemoryToGPU(hostBuffer.As<void*>());
|
|
|
|
|
hsaKmtQueryPointerInfo(hostBuffer.As<void*>(), &ptrInfo);
|
|
|
|
|
}
|
|
|
|
|
EXPECT_EQ((HSAuint64)ptrInfo.NMappedNodes, 0);
|
|
|
|
|
|
|
|
|
|
/* Skip testing local memory if the platform does not have it */
|
|
|
|
|
if (GetVramSize(defaultGPUNode)) {
|
|
|
|
|
HsaMemoryBuffer localBuffer(bufSize, defaultGPUNode, false, true);
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtQueryPointerInfo(localBuffer.As<void*>(), &ptrInfo));
|
|
|
|
|
EXPECT_EQ(ptrInfo.Type, HSA_POINTER_ALLOCATED);
|
|
|
|
|
EXPECT_EQ(ptrInfo.Node, defaultGPUNode);
|
|
|
|
|
EXPECT_EQ(ptrInfo.MemFlags.Value, localBuffer.Flags().Value);
|
|
|
|
|
EXPECT_EQ(ptrInfo.CPUAddress, localBuffer.As<void*>());
|
|
|
|
|
EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)localBuffer.As<void*>());
|
|
|
|
|
EXPECT_EQ(ptrInfo.SizeInBytes, (HSAuint64)localBuffer.Size());
|
|
|
|
|
|
|
|
|
|
HSAuint32 *addr = localBuffer.As<HSAuint32 *>() + 4;
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtQueryPointerInfo(reinterpret_cast<void *>(addr), &ptrInfo));
|
2018-07-23 14:45:44 -04:00
|
|
|
EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)localBuffer.As<void*>());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Registered memory: user pointer */
|
|
|
|
|
static volatile HSAuint32 mem[4]; // 8 bytes for register only and
|
|
|
|
|
// 8 bytes for register to nodes
|
2018-08-13 09:03:31 -04:00
|
|
|
HsaMemoryBuffer hsaBuffer((void *)(&mem[0]), sizeof(HSAuint32)*2);
|
2018-07-23 14:45:44 -04:00
|
|
|
if (is_dgpu()) { // APU doesn't use userptr
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[0]), &ptrInfo));
|
2018-07-23 14:45:44 -04:00
|
|
|
EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_USER);
|
|
|
|
|
EXPECT_EQ(ptrInfo.CPUAddress, &mem[0]);
|
|
|
|
|
EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)hsaBuffer.As<void*>());
|
|
|
|
|
EXPECT_EQ(ptrInfo.SizeInBytes, sizeof(HSAuint32)*2);
|
|
|
|
|
EXPECT_EQ(ptrInfo.NRegisteredNodes, 0);
|
|
|
|
|
EXPECT_EQ(ptrInfo.NMappedNodes, nGPU);
|
|
|
|
|
// Register to nodes
|
|
|
|
|
HSAuint32 nodes[nGPU];
|
|
|
|
|
for (unsigned int i = 0; i < nGPU; i++)
|
|
|
|
|
nodes[i] = gpuNodes.at(i);
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)(&mem[2]),
|
2018-07-23 14:45:44 -04:00
|
|
|
sizeof(HSAuint32)*2, nGPU, nodes));
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[2]), &ptrInfo));
|
2018-07-23 14:45:44 -04:00
|
|
|
EXPECT_EQ(ptrInfo.NRegisteredNodes, nGPU);
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)(&mem[2])));
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Not a starting address, but an address inside the memory range
|
|
|
|
|
* should also get the memory information
|
|
|
|
|
*/
|
|
|
|
|
HSAuint32 *address = hostBuffer.As<HSAuint32 *>() + 1;
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtQueryPointerInfo(reinterpret_cast<void *>(address), &ptrInfo));
|
2018-07-23 14:45:44 -04:00
|
|
|
EXPECT_EQ(ptrInfo.Type, HSA_POINTER_ALLOCATED);
|
|
|
|
|
EXPECT_EQ(ptrInfo.CPUAddress, hostBuffer.As<void*>());
|
|
|
|
|
if (is_dgpu()) {
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[1]), &ptrInfo));
|
2018-07-23 14:45:44 -04:00
|
|
|
EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_USER);
|
|
|
|
|
EXPECT_EQ(ptrInfo.CPUAddress, &mem[0]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*** Set user data ***/
|
|
|
|
|
char userData[16] = "This is a test.";
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtSetMemoryUserData(hostBuffer.As<HSAuint32 *>(), reinterpret_cast<void *>(userData)));
|
2018-07-23 14:45:44 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtQueryPointerInfo(hostBuffer.As<void*>(), &ptrInfo));
|
|
|
|
|
EXPECT_EQ(ptrInfo.UserData, (void *)userData);
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Linux OS-specific test for a debugger accessing HSA memory in a
|
|
|
|
|
* debugged process.
|
|
|
|
|
*
|
|
|
|
|
* Allocates a system memory and a visible local memory buffer (if
|
|
|
|
|
* possible). Forks a child process that PTRACE_ATTACHes to the parent
|
|
|
|
|
* to access its memory like a debugger would. Child copies data in
|
|
|
|
|
* the parent process using PTRACE_PEEKDATA and PTRACE_POKEDATA. After
|
|
|
|
|
* the child terminates, the parent checks that the copy was
|
2018-08-14 09:52:31 -04:00
|
|
|
* successful.
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
TEST_F(KFDMemoryTest, PtraceAccess) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
|
|
|
|
HsaMemFlags memFlags = {0};
|
|
|
|
|
memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
|
|
|
|
memFlags.ui32.HostAccess = 1;
|
|
|
|
|
|
|
|
|
|
void *mem[2];
|
|
|
|
|
unsigned i;
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Offset in the VRAM buffer to test crossing non-contiguous
|
|
|
|
|
* buffer boundaries. The second access starting from offset
|
|
|
|
|
* sizeof(HSAint64)+1 will cross a node boundary in a single access,
|
|
|
|
|
* for node sizes of 4MB or smaller.
|
|
|
|
|
*/
|
2018-08-13 09:03:31 -04:00
|
|
|
const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(HSAint64);
|
2018-07-23 14:45:44 -04:00
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
// Alloc system memory from node 0 and initialize it
|
2018-07-23 14:45:44 -04:00
|
|
|
memFlags.ui32.NonPaged = 0;
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtAllocMemory(0, PAGE_SIZE*2, memFlags, &mem[0]));
|
2018-08-13 09:03:31 -04:00
|
|
|
for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) {
|
|
|
|
|
(reinterpret_cast<HSAuint8 *>(mem[0]))[i] = i; // source
|
|
|
|
|
(reinterpret_cast<HSAuint8 *>(mem[0]))[PAGE_SIZE+i] = 0; // destination
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
// Try to alloc local memory from GPU node
|
2018-07-23 14:45:44 -04:00
|
|
|
memFlags.ui32.NonPaged = 1;
|
|
|
|
|
if (m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode)) {
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE*2 + (4 << 20),
|
|
|
|
|
memFlags, &mem[1]));
|
2018-08-13 09:03:31 -04:00
|
|
|
mem[1] = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem[1]) + VRAM_OFFSET);
|
|
|
|
|
for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) {
|
|
|
|
|
(reinterpret_cast<HSAuint8 *>(mem[1]))[i] = i;
|
|
|
|
|
(reinterpret_cast<HSAuint8 *>(mem[1]))[PAGE_SIZE+i] = 0;
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
LOG() << "Not testing local memory, it's invisible" << std::endl;
|
|
|
|
|
mem[1] = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Allow any process to trace this one. If kernel is built without
|
|
|
|
|
* Yama, this is not needed, and this call will fail.
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
#ifdef PR_SET_PTRACER
|
|
|
|
|
prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
|
|
|
|
|
#endif
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
// Find current pid so the child can trace it
|
2018-07-23 14:45:44 -04:00
|
|
|
pid_t tracePid = getpid();
|
|
|
|
|
|
|
|
|
|
// Fork the child
|
|
|
|
|
pid_t childPid = fork();
|
|
|
|
|
ASSERT_GE(childPid, 0);
|
|
|
|
|
if (childPid == 0) {
|
|
|
|
|
int traceStatus;
|
|
|
|
|
int err = 0, r;
|
|
|
|
|
|
2018-08-15 12:06:34 -04:00
|
|
|
/* Child process: we catch any exceptions to make sure we detach
|
|
|
|
|
* from the traced process, because terminating without detaching
|
|
|
|
|
* leaves the traced process stopped.
|
|
|
|
|
*/
|
2018-07-23 14:45:44 -04:00
|
|
|
r = ptrace(PTRACE_ATTACH, tracePid, NULL, NULL);
|
|
|
|
|
if (r) {
|
|
|
|
|
WARN() << "PTRACE_ATTACH failed: " << r << std::endl;
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
do {
|
|
|
|
|
waitpid(tracePid, &traceStatus, 0);
|
|
|
|
|
} while (!WIFSTOPPED(traceStatus));
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
2018-08-14 09:52:31 -04:00
|
|
|
// Test 4 different (mis-)alignments, leaving 1-byte gaps between longs
|
2018-08-13 09:03:31 -04:00
|
|
|
HSAuint8 *addr = reinterpret_cast<HSAuint8 *>(reinterpret_cast<long *>(mem[0]) + i) + i;
|
2018-07-23 14:45:44 -04:00
|
|
|
errno = 0;
|
|
|
|
|
long data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL);
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(0, errno);
|
|
|
|
|
EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, addr + PAGE_SIZE,
|
2018-08-13 09:03:31 -04:00
|
|
|
reinterpret_cast<void *>(data)));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
if (mem[1] == NULL)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-08-13 09:03:31 -04:00
|
|
|
addr = reinterpret_cast<HSAuint8 *>(reinterpret_cast<long *>(mem[1]) + i) + i;
|
2018-07-23 14:45:44 -04:00
|
|
|
errno = 0;
|
|
|
|
|
data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL);
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(0, errno);
|
|
|
|
|
EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, addr + PAGE_SIZE,
|
2018-08-13 09:03:31 -04:00
|
|
|
reinterpret_cast<void *>(data)));
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
} catch (...) {
|
|
|
|
|
err = 1;
|
|
|
|
|
}
|
|
|
|
|
r = ptrace(PTRACE_DETACH, tracePid, NULL, NULL);
|
|
|
|
|
if (r) {
|
|
|
|
|
WARN() << "PTRACE_DETACH failed: " << r << std::endl;
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
exit(err);
|
|
|
|
|
} else {
|
|
|
|
|
int childStatus;
|
|
|
|
|
|
|
|
|
|
// Parent process, just wait for the child to finish
|
|
|
|
|
EXPECT_EQ(childPid, waitpid(childPid, &childStatus, 0));
|
|
|
|
|
EXPECT_NE(0, WIFEXITED(childStatus));
|
|
|
|
|
EXPECT_EQ(0, WEXITSTATUS(childStatus));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Clear gaps in the source that should not have been copied
|
2018-08-13 09:03:31 -04:00
|
|
|
(reinterpret_cast<uint8_t*>(mem[0]))[ sizeof(long) ] = 0;
|
|
|
|
|
(reinterpret_cast<uint8_t*>(mem[0]))[2*sizeof(long) + 1] = 0;
|
|
|
|
|
(reinterpret_cast<uint8_t*>(mem[0]))[3*sizeof(long) + 2] = 0;
|
|
|
|
|
(reinterpret_cast<uint8_t*>(mem[0]))[4*sizeof(long) + 3] = 0;
|
2018-07-23 14:45:44 -04:00
|
|
|
// Check results
|
2018-08-13 09:03:31 -04:00
|
|
|
EXPECT_EQ(0, memcmp(mem[0], reinterpret_cast<HSAuint8 *>(mem[0]) + PAGE_SIZE,
|
2018-07-23 14:45:44 -04:00
|
|
|
sizeof(long)*4 + 4));
|
|
|
|
|
// Free memory
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(mem[0], PAGE_SIZE*2));
|
|
|
|
|
|
|
|
|
|
if (mem[1]) {
|
2018-08-13 09:03:31 -04:00
|
|
|
(reinterpret_cast<uint8_t*>(mem[1]))[ sizeof(HSAint64) ] = 0;
|
|
|
|
|
(reinterpret_cast<uint8_t*>(mem[1]))[2*sizeof(HSAint64) + 1] = 0;
|
|
|
|
|
(reinterpret_cast<uint8_t*>(mem[1]))[3*sizeof(HSAint64) + 2] = 0;
|
|
|
|
|
(reinterpret_cast<uint8_t*>(mem[1]))[4*sizeof(HSAint64) + 3] = 0;
|
|
|
|
|
EXPECT_EQ(0, memcmp(mem[1], reinterpret_cast<HSAuint8 *>(mem[1]) + PAGE_SIZE,
|
|
|
|
|
sizeof(HSAint64)*4 + 4));
|
|
|
|
|
mem[1] = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem[1]) - VRAM_OFFSET);
|
2018-07-23 14:45:44 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(mem[1], PAGE_SIZE*2));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-13 09:03:31 -04:00
|
|
|
TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
|
2018-07-23 14:45:44 -04:00
|
|
|
char *hsaDebug = getenv("HSA_DEBUG");
|
|
|
|
|
|
|
|
|
|
if (!is_dgpu()) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: There is no VRAM on APU." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!hsaDebug || !strcmp(hsaDebug, "0")) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: HSA_DEBUG environment variable not set." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
2018-11-19 11:38:38 -05:00
|
|
|
HsaMemMapFlags mapFlags = {0};
|
2018-07-23 14:45:44 -04:00
|
|
|
HsaMemFlags memFlags = {0};
|
|
|
|
|
memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
|
|
|
|
/* Allocate host not accessible vram */
|
|
|
|
|
memFlags.ui32.HostAccess = 0;
|
|
|
|
|
memFlags.ui32.NonPaged = 1;
|
|
|
|
|
|
|
|
|
|
void *mem, *mem0, *mem1;
|
|
|
|
|
unsigned size = PAGE_SIZE*2 + (4 << 20);
|
|
|
|
|
HSAuint64 data[2] = {0xdeadbeefdeadbeef, 0xcafebabecafebabe};
|
|
|
|
|
unsigned int data0[2] = {0xdeadbeef, 0xdeadbeef};
|
|
|
|
|
unsigned int data1[2] = {0xcafebabe, 0xcafebabe};
|
|
|
|
|
|
|
|
|
|
const HSAuint64 VRAM_OFFSET = (4 << 20) - sizeof(HSAuint64);
|
|
|
|
|
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, size, memFlags, &mem));
|
2018-11-19 11:38:38 -05:00
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(mem, size, NULL,
|
|
|
|
|
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode)));
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Set the word before 4M boundary to 0xdeadbeefdeadbeef
|
2018-07-23 14:45:44 -04:00
|
|
|
* and the word after 4M boundary to 0xcafebabecafebabe
|
|
|
|
|
*/
|
2018-08-13 09:03:31 -04:00
|
|
|
mem0 = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem) + VRAM_OFFSET);
|
|
|
|
|
mem1 = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem) + VRAM_OFFSET + sizeof(HSAuint64));
|
2018-07-23 14:45:44 -04:00
|
|
|
PM4Queue queue;
|
|
|
|
|
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
|
|
|
|
queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)mem0,
|
|
|
|
|
data0[0], data0[1]));
|
|
|
|
|
queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)mem1,
|
|
|
|
|
data1[0], data1[1]));
|
|
|
|
|
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(true, 0, 0));
|
|
|
|
|
queue.Wait4PacketConsumption();
|
|
|
|
|
|
|
|
|
|
/* Allow any process to trace this one. If kernel is built without
|
|
|
|
|
* Yama, this is not needed, and this call will fail.
|
|
|
|
|
*/
|
|
|
|
|
#ifdef PR_SET_PTRACER
|
|
|
|
|
prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// Find out my pid so the child can trace it
|
|
|
|
|
pid_t tracePid = getpid();
|
|
|
|
|
|
|
|
|
|
// Fork the child
|
|
|
|
|
pid_t childPid = fork();
|
|
|
|
|
ASSERT_GE(childPid, 0);
|
|
|
|
|
if (childPid == 0) {
|
|
|
|
|
int traceStatus;
|
|
|
|
|
int err = 0, r;
|
|
|
|
|
|
2018-08-15 12:06:34 -04:00
|
|
|
/* Child process: we catch any exceptions to make sure we detach
|
|
|
|
|
* from the traced process, because terminating without detaching
|
|
|
|
|
* leaves the traced process stopped.
|
2018-07-23 14:45:44 -04:00
|
|
|
*/
|
|
|
|
|
r = ptrace(PTRACE_ATTACH, tracePid, NULL, NULL);
|
|
|
|
|
if (r) {
|
|
|
|
|
WARN() << "PTRACE_ATTACH failed: " << r << std::endl;
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
do {
|
|
|
|
|
waitpid(tracePid, &traceStatus, 0);
|
|
|
|
|
} while (!WIFSTOPPED(traceStatus));
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Peek the memory */
|
2018-07-23 14:45:44 -04:00
|
|
|
errno = 0;
|
2018-08-13 09:03:31 -04:00
|
|
|
HSAint64 data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL);
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(0, errno);
|
|
|
|
|
EXPECT_EQ(data[0], data0);
|
2018-08-13 09:03:31 -04:00
|
|
|
HSAint64 data1 = ptrace(PTRACE_PEEKDATA, tracePid, mem1, NULL);
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(0, errno);
|
|
|
|
|
EXPECT_EQ(data[1], data1);
|
2018-07-23 14:45:44 -04:00
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Swap mem0 and mem1 by poking */
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, reinterpret_cast<void *>(data[1])));
|
|
|
|
|
EXPECT_EQ(0, errno);
|
|
|
|
|
EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, reinterpret_cast<void *>(data[0])));
|
|
|
|
|
EXPECT_EQ(0, errno);
|
2018-07-23 14:45:44 -04:00
|
|
|
} catch (...) {
|
|
|
|
|
err = 1;
|
|
|
|
|
}
|
|
|
|
|
r = ptrace(PTRACE_DETACH, tracePid, NULL, NULL);
|
|
|
|
|
if (r) {
|
|
|
|
|
WARN() << "PTRACE_DETACH failed: " << r << std::endl;
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
exit(err);
|
|
|
|
|
} else {
|
|
|
|
|
int childStatus;
|
|
|
|
|
|
|
|
|
|
// Parent process, just wait for the child to finish
|
|
|
|
|
EXPECT_EQ(childPid, waitpid(childPid, &childStatus, 0));
|
|
|
|
|
EXPECT_NE(0, WIFEXITED(childStatus));
|
|
|
|
|
EXPECT_EQ(0, WEXITSTATUS(childStatus));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Use shader to read back data to check poke results */
|
|
|
|
|
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
2018-08-13 09:03:31 -04:00
|
|
|
// dstBuffer is cpu accessible gtt memory
|
2018-07-23 14:45:44 -04:00
|
|
|
HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode);
|
|
|
|
|
m_pIsaGen->CompileShader((m_FamilyId >= FAMILY_AI) ? gfx9_ScratchCopyDword : gfx8_ScratchCopyDword,
|
2018-08-13 09:03:31 -04:00
|
|
|
"ScratchCopyDword", isaBuffer);
|
2018-07-23 14:45:44 -04:00
|
|
|
Dispatch dispatch0(isaBuffer);
|
|
|
|
|
dispatch0.SetArgs(mem0, dstBuffer.As<void*>());
|
|
|
|
|
dispatch0.Submit(queue);
|
|
|
|
|
dispatch0.Sync();
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(data1[0], dstBuffer.As<unsigned int*>()[0]);
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
Dispatch dispatch1(isaBuffer);
|
|
|
|
|
dispatch1.SetArgs(mem1, dstBuffer.As<int*>());
|
|
|
|
|
dispatch1.Submit(queue);
|
|
|
|
|
dispatch1.Sync();
|
|
|
|
|
WaitOnValue(dstBuffer.As<uint32_t *>(), data0[0]);
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(data0[0], dstBuffer.As<unsigned int*>()[0]);
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
// Clean up
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(mem));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(mem, size));
|
|
|
|
|
EXPECT_SUCCESS(queue.Destroy());
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CatchSignal(int IntrSignal) {
|
|
|
|
|
LOG() << "Interrupt Signal " << std::dec << IntrSignal
|
|
|
|
|
<< " Received" << std::endl;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(KFDMemoryTest, SignalHandling) {
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
|
|
|
|
|
|
if (!is_dgpu()) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: Test not supported on APU." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned int *nullPtr = NULL;
|
|
|
|
|
unsigned int* pDb = NULL;
|
|
|
|
|
struct sigaction sa;
|
|
|
|
|
SDMAQueue queue;
|
|
|
|
|
HSAuint64 size, sysMemSize;
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
|
|
|
|
sa.sa_handler = CatchSignal;
|
|
|
|
|
sigemptyset(&sa.sa_mask);
|
|
|
|
|
sa.sa_flags = 0;
|
|
|
|
|
pid_t ParentPid = getpid();
|
|
|
|
|
EXPECT_EQ(0, sigaction(SIGUSR1, &sa, NULL)) << "An error occurred while setting a signal handler";
|
|
|
|
|
|
|
|
|
|
sysMemSize = GetSysMemSize();
|
|
|
|
|
|
|
|
|
|
/* System (kernel) memory are limited to 3/8th System RAM
|
|
|
|
|
* Try to allocate 1/4th System RAM
|
|
|
|
|
*/
|
|
|
|
|
size = (sysMemSize >> 2) & ~(HSAuint64)(PAGE_SIZE - 1);
|
|
|
|
|
|
2018-08-13 09:03:31 -04:00
|
|
|
ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, reinterpret_cast<void**>(&pDb)));
|
2018-08-14 09:52:31 -04:00
|
|
|
// Verify that pDb is not null before it's being used
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer";
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
pid_t childPid = fork();
|
|
|
|
|
ASSERT_GE(childPid, 0);
|
|
|
|
|
if (childPid == 0) {
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(0, kill(ParentPid, SIGUSR1));
|
2018-07-23 14:45:44 -04:00
|
|
|
exit(0);
|
|
|
|
|
} else {
|
|
|
|
|
LOG() << "Start Memory Mapping..." << std::endl;
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(pDb, size, NULL));
|
|
|
|
|
LOG() << "Mapping finished" << std::endl;
|
|
|
|
|
int childStatus;
|
|
|
|
|
|
|
|
|
|
// Parent process, just wait for the child to finish
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_EQ(childPid, waitpid(childPid, &childStatus, 0));
|
|
|
|
|
EXPECT_NE(0, WIFEXITED(childStatus));
|
|
|
|
|
EXPECT_EQ(0, WEXITSTATUS(childStatus));
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pDb[0] = 0x02020202;
|
|
|
|
|
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
|
|
|
|
queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(pDb, 0x01010101) );
|
|
|
|
|
queue.Wait4PacketConsumption();
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_TRUE(WaitOnValue(pDb, 0x01010101));
|
|
|
|
|
EXPECT_SUCCESS(queue.Destroy());
|
2018-07-23 14:45:44 -04:00
|
|
|
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(pDb));
|
2018-07-23 14:45:44 -04:00
|
|
|
// Release the buffers
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb, size));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
|
|
|
|
|
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL);
|
|
|
|
|
|
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
|
|
|
|
HSAuint64 sysMemSizeMB = GetSysMemSize() >> 20;
|
|
|
|
|
|
|
|
|
|
/* Testing system memory */
|
|
|
|
|
HSAuint64 * pDb = NULL;
|
|
|
|
|
|
|
|
|
|
HSAuint64 sysBufSizeMB = sysMemSizeMB >> 2;
|
|
|
|
|
HSAuint64 sysBufSize = sysBufSizeMB * 1024 * 1024;
|
|
|
|
|
|
|
|
|
|
int count = 5;
|
|
|
|
|
|
|
|
|
|
LOG() << "Using " << std::dec << sysBufSizeMB
|
|
|
|
|
<< "MB system buffer to test " << std::dec << count
|
|
|
|
|
<< " times" << std::endl;
|
|
|
|
|
|
|
|
|
|
unsigned int offset = 257; // a constant offset, should be smaller than 512.
|
|
|
|
|
unsigned int size = sysBufSize / sizeof(*pDb);
|
|
|
|
|
|
|
|
|
|
while (count--) {
|
|
|
|
|
ret = hsaKmtAllocMemory(0 /* system */, sysBufSize, m_MemoryFlags,
|
2018-08-13 09:03:31 -04:00
|
|
|
reinterpret_cast<void**>(&pDb));
|
2018-07-23 14:45:44 -04:00
|
|
|
if (ret) {
|
|
|
|
|
LOG() << "Failed to allocate system buffer of" << std::dec << sysBufSizeMB
|
|
|
|
|
<< "MB" << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Check the first 64 bits */
|
2018-07-23 14:45:44 -04:00
|
|
|
EXPECT_EQ(0, pDb[0]);
|
|
|
|
|
pDb[0] = 1;
|
|
|
|
|
|
|
|
|
|
for (HSAuint64 i = offset; i < size;) {
|
|
|
|
|
EXPECT_EQ(0, pDb[i]);
|
|
|
|
|
pDb[i] = i + 1; // set it to non zero
|
|
|
|
|
|
|
|
|
|
i += 4096 / sizeof(*pDb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* check the last 64 bit */
|
|
|
|
|
EXPECT_EQ(0, pDb[size-1]);
|
|
|
|
|
pDb[size-1] = size;
|
|
|
|
|
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb, sysBufSize));
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-13 09:03:31 -04:00
|
|
|
static inline void access(volatile void *sd, int size, int rw) {
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Most likely sitting in cache*/
|
2018-07-23 14:45:44 -04:00
|
|
|
static struct DUMMY {
|
|
|
|
|
char dummy[1024];
|
|
|
|
|
} dummy;
|
|
|
|
|
|
|
|
|
|
while ((size -= sizeof(dummy)) >= 0) {
|
|
|
|
|
if (rw == 0)
|
|
|
|
|
dummy = *(struct DUMMY *)((char*)sd + size);
|
|
|
|
|
else
|
|
|
|
|
*(struct DUMMY *)((char*)sd + size) = dummy;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
2018-08-14 09:52:31 -04:00
|
|
|
* On large-bar system, test the visible vram access speed.
|
|
|
|
|
* KFD is not allowed to alloc visible vram on non-largebar system.
|
2018-07-23 14:45:44 -04:00
|
|
|
*/
|
|
|
|
|
TEST_F(KFDMemoryTest, MMBandWidth) {
|
|
|
|
|
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL);
|
|
|
|
|
|
|
|
|
|
const unsigned nBufs = 1000; /* measure us, report ns */
|
|
|
|
|
unsigned testIndex, sizeIndex, memType;
|
|
|
|
|
const unsigned nMemTypes = 2;
|
2018-09-12 17:30:42 +08:00
|
|
|
const char *memTypeStrings[nMemTypes] = {"SysMem", "VRAM"};
|
2018-07-23 14:45:44 -04:00
|
|
|
const unsigned nSizes = 4;
|
|
|
|
|
const unsigned bufSizes[nSizes] = {PAGE_SIZE, PAGE_SIZE*4, PAGE_SIZE*16, PAGE_SIZE*64};
|
|
|
|
|
const unsigned nTests = nSizes * nMemTypes;
|
|
|
|
|
const unsigned tmpBufferSize = PAGE_SIZE*64;
|
|
|
|
|
#define _TEST_BUFSIZE(index) (bufSizes[index % nSizes])
|
|
|
|
|
#define _TEST_MEMTYPE(index) ((index / nSizes) % nMemTypes)
|
|
|
|
|
|
|
|
|
|
void *bufs[nBufs];
|
2018-08-13 09:03:31 -04:00
|
|
|
HSAuint64 start;
|
2018-07-23 14:45:44 -04:00
|
|
|
unsigned i;
|
|
|
|
|
HSAKMT_STATUS ret;
|
|
|
|
|
HsaMemFlags memFlags = {0};
|
|
|
|
|
HsaMemMapFlags mapFlags = {0};
|
|
|
|
|
|
|
|
|
|
HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
|
|
|
|
|
HSAuint64 vramSizeMB = GetVramSize(defaultGPUNode) >> 20;
|
|
|
|
|
|
|
|
|
|
LOG() << "Found VRAM of " << std::dec << vramSizeMB << "MB." << std::endl;
|
|
|
|
|
|
|
|
|
|
if (!m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode) || !vramSizeMB) {
|
2018-08-13 10:18:04 -04:00
|
|
|
LOG() << "Skipping test: Test requires a large bar GPU." << std::endl;
|
2018-07-23 14:45:44 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void *tmp = mmap(0,
|
|
|
|
|
tmpBufferSize,
|
|
|
|
|
PROT_READ | PROT_WRITE,
|
|
|
|
|
MAP_ANONYMOUS | MAP_PRIVATE,
|
|
|
|
|
-1,
|
|
|
|
|
0);
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_NE(tmp, MAP_FAILED);
|
2018-07-23 14:45:44 -04:00
|
|
|
memset(tmp, 0, tmpBufferSize);
|
|
|
|
|
|
|
|
|
|
LOG() << "Test (avg. ns)\t memcpyRTime memcpyWTime accessRTime accessWTime" << std::endl;
|
|
|
|
|
for (testIndex = 0; testIndex < nTests; testIndex++) {
|
|
|
|
|
unsigned bufSize = _TEST_BUFSIZE(testIndex);
|
|
|
|
|
unsigned memType = _TEST_MEMTYPE(testIndex);
|
2018-08-13 09:03:31 -04:00
|
|
|
HSAuint64 mcpRTime, mcpWTime, accessRTime, accessWTime;
|
2018-07-23 14:45:44 -04:00
|
|
|
HSAuint32 allocNode;
|
|
|
|
|
|
|
|
|
|
if ((testIndex & (nSizes-1)) == 0)
|
|
|
|
|
LOG() << "----------------------------------------------------------------------" << std::endl;
|
|
|
|
|
|
|
|
|
|
if (memType == 0) {
|
|
|
|
|
allocNode = 0;
|
|
|
|
|
memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
|
|
|
|
memFlags.ui32.HostAccess = 1;
|
|
|
|
|
memFlags.ui32.NonPaged = 0;
|
|
|
|
|
} else {
|
2018-08-14 09:52:31 -04:00
|
|
|
/* Alloc visible vram*/
|
2018-07-23 14:45:44 -04:00
|
|
|
allocNode = defaultGPUNode;
|
|
|
|
|
memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
|
|
|
|
memFlags.ui32.HostAccess = 1;
|
|
|
|
|
memFlags.ui32.NonPaged = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < nBufs; i++)
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtAllocMemory(allocNode, bufSize, memFlags,
|
|
|
|
|
&bufs[i]));
|
|
|
|
|
|
|
|
|
|
start = GetSystemTickCountInMicroSec();
|
|
|
|
|
for (i = 0; i < nBufs; i++) {
|
|
|
|
|
memcpy(bufs[i], tmp, bufSize);
|
|
|
|
|
}
|
|
|
|
|
mcpWTime = GetSystemTickCountInMicroSec() - start;
|
|
|
|
|
|
|
|
|
|
start = GetSystemTickCountInMicroSec();
|
|
|
|
|
for (i = 0; i < nBufs; i++) {
|
|
|
|
|
access(bufs[i], bufSize, 1);
|
|
|
|
|
}
|
|
|
|
|
accessWTime = GetSystemTickCountInMicroSec() - start;
|
|
|
|
|
|
|
|
|
|
start = GetSystemTickCountInMicroSec();
|
|
|
|
|
for (i = 0; i < nBufs; i++) {
|
|
|
|
|
memcpy(tmp, bufs[i], bufSize);
|
|
|
|
|
}
|
|
|
|
|
mcpRTime = GetSystemTickCountInMicroSec() - start;
|
|
|
|
|
|
|
|
|
|
start = GetSystemTickCountInMicroSec();
|
|
|
|
|
for (i = 0; i < nBufs; i++) {
|
|
|
|
|
access(bufs[i], bufSize, 0);
|
|
|
|
|
}
|
|
|
|
|
accessRTime = GetSystemTickCountInMicroSec() - start;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < nBufs; i++)
|
2018-08-20 09:54:26 -04:00
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(bufs[i], bufSize));
|
2018-07-23 14:45:44 -04:00
|
|
|
|
2018-09-12 17:30:42 +08:00
|
|
|
LOG() << std::dec
|
|
|
|
|
<< std::right << std::setw(3) << (bufSize >> 10) << "K-"
|
|
|
|
|
<< std::left << std::setw(14) << memTypeStrings[memType]
|
|
|
|
|
<< std::right
|
2018-07-23 14:45:44 -04:00
|
|
|
<< std::setw(12) << mcpRTime
|
|
|
|
|
<< std::setw(12) << mcpWTime
|
|
|
|
|
<< std::setw(12) << accessRTime
|
|
|
|
|
<< std::setw(12) << accessWTime
|
|
|
|
|
<< std::endl;
|
2018-09-12 17:30:42 +08:00
|
|
|
|
|
|
|
|
#define MMBANDWIDTH_KEY_PREFIX memTypeStrings[memType] << "-" \
|
|
|
|
|
<< (bufSize >> 10) << "K" << "-"
|
|
|
|
|
RECORD(mcpRTime) << MMBANDWIDTH_KEY_PREFIX << "mcpRTime";
|
|
|
|
|
RECORD(mcpWTime) << MMBANDWIDTH_KEY_PREFIX << "mcpWTime";
|
|
|
|
|
RECORD(accessRTime) << MMBANDWIDTH_KEY_PREFIX << "accessRTime";
|
|
|
|
|
RECORD(accessWTime) << MMBANDWIDTH_KEY_PREFIX << "accessWTime";
|
2018-07-23 14:45:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
munmap(tmp, tmpBufferSize);
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
2019-04-30 15:32:01 -05:00
|
|
|
|
|
|
|
|
/* For the purpose of testing HDP flush from CPU.
|
|
|
|
|
* Use CPU to write to coherent vram and check
|
|
|
|
|
* from shader.
|
|
|
|
|
* Asic before gfx9 doesn't support user space
|
|
|
|
|
* HDP flush so only run on vega10 and after.
|
|
|
|
|
* This should only run on large bar system.
|
|
|
|
|
*/
|
|
|
|
|
TEST_F(KFDMemoryTest, HostHdpFlush) {
|
|
|
|
|
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL);
|
|
|
|
|
|
|
|
|
|
HsaMemFlags memoryFlags = m_MemoryFlags;
|
|
|
|
|
/* buffer[0]: signal; buffer[1]: Input to shader; buffer[2]: Output to
|
|
|
|
|
* shader
|
|
|
|
|
*/
|
|
|
|
|
unsigned int *buffer = NULL;
|
|
|
|
|
HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
|
const HsaNodeProperties *pNodeProperties = m_NodeInfo.GetNodeProperties(defaultGPUNode);
|
|
|
|
|
HSAuint32 *mmioBase = NULL;
|
|
|
|
|
unsigned int *nullPtr = NULL;
|
|
|
|
|
|
|
|
|
|
if (!pNodeProperties) {
|
|
|
|
|
LOG() << "Failed to get gpu node properties." << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (m_FamilyId < FAMILY_AI) {
|
|
|
|
|
LOG() << "Skipping test: Test requires gfx9 and later asics." << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
HSAuint64 vramSizeMB = GetVramSize(defaultGPUNode) >> 20;
|
|
|
|
|
|
|
|
|
|
if (!m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode) || !vramSizeMB) {
|
|
|
|
|
LOG() << "Skipping test: Test requires a large bar GPU." << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(defaultGPUNode, pNodeProperties->NumMemoryBanks,
|
|
|
|
|
memoryProperties));
|
|
|
|
|
for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
|
|
|
|
|
if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_MMIO_REMAP) {
|
|
|
|
|
mmioBase = (unsigned int *)memoryProperties[bank].VirtualBaseAddress;
|
2019-05-27 14:57:57 -05:00
|
|
|
break;
|
2019-04-30 15:32:01 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ASSERT_NE(mmioBase, nullPtr) << "mmio base is NULL";
|
|
|
|
|
|
|
|
|
|
memoryFlags.ui32.NonPaged = 1;
|
|
|
|
|
memoryFlags.ui32.CoarseGrain = 0;
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE, memoryFlags,
|
|
|
|
|
reinterpret_cast<void**>(&buffer)));
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(buffer, PAGE_SIZE, NULL));
|
|
|
|
|
|
|
|
|
|
/* Signal is dead from the beginning*/
|
|
|
|
|
buffer[0] = 0xdead;
|
|
|
|
|
buffer[1] = 0xfeeb;
|
|
|
|
|
buffer[2] = 0xfeed;
|
|
|
|
|
/* Submit a shader to poll the signal*/
|
|
|
|
|
PM4Queue queue;
|
|
|
|
|
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
|
|
|
|
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
|
|
|
|
m_pIsaGen->CompileShader(gfx9_CopyOnSignal,"CopyOnSignal", isaBuffer);
|
|
|
|
|
Dispatch dispatch0(isaBuffer);
|
|
|
|
|
dispatch0.SetArgs(buffer, NULL);
|
|
|
|
|
dispatch0.Submit(queue);
|
|
|
|
|
|
|
|
|
|
buffer[1] = 0xbeef;
|
|
|
|
|
/* Flush HDP */
|
|
|
|
|
mmioBase[KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL/4] = 0x1;
|
|
|
|
|
buffer[0] = 0xcafe;
|
|
|
|
|
|
|
|
|
|
/* Check test result*/
|
|
|
|
|
dispatch0.Sync();
|
2019-05-27 14:57:57 -05:00
|
|
|
mmioBase[KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL/4] = 0x1;
|
2019-04-30 15:32:01 -05:00
|
|
|
EXPECT_EQ(0xbeef, buffer[2]);
|
|
|
|
|
|
|
|
|
|
// Clean up
|
|
|
|
|
EXPECT_SUCCESS(queue.Destroy());
|
|
|
|
|
delete [] memoryProperties;
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(buffer));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(buffer, PAGE_SIZE));
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|
2019-05-30 16:09:06 -05:00
|
|
|
|
|
|
|
|
/* Test HDP flush from device.
|
|
|
|
|
* Use shader on device 1 to write vram of device 0
|
|
|
|
|
* and flush HDP of device 0. Read vram from device 0
|
|
|
|
|
* and write back to vram to check the result from CPU.
|
|
|
|
|
* Asic before gfx9 doesn't support device HDP flush
|
|
|
|
|
* so only run on vega10 and after.
|
|
|
|
|
* This should only run on system with at least one
|
|
|
|
|
* large bar node (which is used as device 0).
|
|
|
|
|
*/
|
|
|
|
|
TEST_F(KFDMemoryTest, DeviceHdpFlush) {
|
|
|
|
|
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
|
|
|
|
|
TEST_START(TESTPROFILE_RUNALL);
|
|
|
|
|
|
|
|
|
|
HsaMemFlags memoryFlags = m_MemoryFlags;
|
|
|
|
|
/* buffer is physically on device 0.
|
|
|
|
|
* buffer[0]: Use as signaling b/t devices;
|
|
|
|
|
* buffer[1]: Device 1 write to buffer[1] and device 0 read it
|
|
|
|
|
* buffer[2]: Device 0 copy buffer[1] to buffer[2] for CPU to check
|
|
|
|
|
*/
|
|
|
|
|
unsigned int *buffer = NULL;
|
|
|
|
|
const HsaNodeProperties *pNodeProperties;
|
|
|
|
|
HSAuint32 *mmioBase = NULL;
|
|
|
|
|
unsigned int *nullPtr = NULL;
|
|
|
|
|
std::vector<HSAuint32> nodes;
|
|
|
|
|
|
|
|
|
|
const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
|
|
|
|
|
if (gpuNodes.size() < 2) {
|
|
|
|
|
LOG() << "Skipping test: At least two GPUs are required." << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Users can use "--node=gpu1 --dst_node=gpu2" to specify devices */
|
|
|
|
|
if (g_TestDstNodeId != -1 && g_TestNodeId != -1) {
|
|
|
|
|
nodes.push_back(g_TestNodeId);
|
|
|
|
|
nodes.push_back(g_TestDstNodeId);
|
|
|
|
|
if (!m_NodeInfo.IsGPUNodeLargeBar(nodes[0])) {
|
|
|
|
|
LOG() << "Skipping test: first GPU specified is not a large bar GPU." << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (nodes[0] == nodes[1]) {
|
|
|
|
|
LOG() << "Skipping test: Different GPUs must be specified (2 GPUs required)." << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
HSAint32 defaultGPU = m_NodeInfo.HsaDefaultGPUNode();
|
|
|
|
|
if (!m_NodeInfo.IsGPUNodeLargeBar(defaultGPU)) {
|
|
|
|
|
LOG() << "Skipping test: Default GPUs must be large bar." << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
nodes.push_back(defaultGPU);
|
|
|
|
|
for (unsigned i = 0; i < gpuNodes.size(); i++)
|
|
|
|
|
if (gpuNodes.at(i) != defaultGPU)
|
|
|
|
|
nodes.push_back(gpuNodes.at(i));
|
|
|
|
|
if (nodes.size() < 2) {
|
|
|
|
|
LOG() << "Skipping test: At least 2 GPUs required." << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pNodeProperties = m_NodeInfo.GetNodeProperties(nodes[0]);
|
|
|
|
|
if (!pNodeProperties) {
|
|
|
|
|
LOG() << "Failed to get gpu node properties." << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (m_FamilyId < FAMILY_AI) {
|
|
|
|
|
LOG() << "Skipping test: Test requires gfx9 and later asics." << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(nodes[0], pNodeProperties->NumMemoryBanks,
|
|
|
|
|
memoryProperties));
|
|
|
|
|
for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
|
|
|
|
|
if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_MMIO_REMAP) {
|
|
|
|
|
mmioBase = (unsigned int *)memoryProperties[bank].VirtualBaseAddress;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ASSERT_NE(mmioBase, nullPtr) << "mmio base is NULL";
|
|
|
|
|
|
|
|
|
|
memoryFlags.ui32.NonPaged = 1;
|
|
|
|
|
memoryFlags.ui32.CoarseGrain = 0;
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtAllocMemory(nodes[0], PAGE_SIZE, memoryFlags,
|
|
|
|
|
reinterpret_cast<void**>(&buffer)));
|
|
|
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(buffer, PAGE_SIZE, NULL));
|
|
|
|
|
|
|
|
|
|
/* Signal is dead from the beginning*/
|
|
|
|
|
buffer[0] = 0xdead;
|
|
|
|
|
buffer[1] = 0xfeeb;
|
|
|
|
|
buffer[2] = 0xfeeb;
|
|
|
|
|
/* Submit shaders*/
|
|
|
|
|
PM4Queue queue;
|
|
|
|
|
ASSERT_SUCCESS(queue.Create(nodes[0]));
|
|
|
|
|
HsaMemoryBuffer isaBuffer(PAGE_SIZE, nodes[0], true/*zero*/, false/*local*/, true/*exec*/);
|
|
|
|
|
m_pIsaGen->CompileShader(gfx9_CopyOnSignal, "CopyOnSignal", isaBuffer);
|
|
|
|
|
Dispatch dispatch(isaBuffer);
|
|
|
|
|
dispatch.SetArgs(buffer, NULL);
|
|
|
|
|
dispatch.Submit(queue);
|
|
|
|
|
|
|
|
|
|
PM4Queue queue0;
|
|
|
|
|
ASSERT_SUCCESS(queue0.Create(nodes[1]));
|
|
|
|
|
HsaMemoryBuffer isaBuffer0(PAGE_SIZE, nodes[1], true/*zero*/, false/*local*/, true/*exec*/);
|
|
|
|
|
m_pIsaGen->CompileShader(gfx9_WriteAndSignal, "WriteAndSignal", isaBuffer0);
|
|
|
|
|
Dispatch dispatch0(isaBuffer0);
|
|
|
|
|
dispatch0.SetArgs(buffer, mmioBase);
|
|
|
|
|
dispatch0.Submit(queue0);
|
|
|
|
|
|
|
|
|
|
/* Check test result*/
|
|
|
|
|
dispatch0.Sync();
|
|
|
|
|
dispatch.Sync();
|
|
|
|
|
EXPECT_EQ(0xbeef, buffer[2]);
|
|
|
|
|
|
|
|
|
|
// Clean up
|
|
|
|
|
EXPECT_SUCCESS(queue.Destroy());
|
|
|
|
|
EXPECT_SUCCESS(queue0.Destroy());
|
|
|
|
|
delete [] memoryProperties;
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(buffer));
|
|
|
|
|
EXPECT_SUCCESS(hsaKmtFreeMemory(buffer, PAGE_SIZE));
|
|
|
|
|
|
|
|
|
|
TEST_END
|
|
|
|
|
}
|