Files
rocm-systems/tests/kfdtest/src/KFDRASTest.cpp
T
Eric Huang e5b215570b kfdtest: fix and change in RAS test
1. RAS error injection debugfs interface has been changed which
is using ras_ctrl instead of *_err_inject.

2. Remove ASSERT_SUCCESS for fwrite, because fwrite returns
the size of written item but not the error number.

3. Using throw exception instead of return to avoid a segment fault.

Change-Id: I6c4d9c2f7e66719faec99abd1552105a08c238a4
Signed-off-by: Eric Huang <JinhuiEric.Huang@amd.com>
2019-03-29 11:00:01 -04:00

150 γραμμές
4.3 KiB
C++

/*
* Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <math.h>
#include <limits.h>
#include "linux/kfd_ioctl.h"
#include "KFDRASTest.hpp"
#include "PM4Queue.hpp"
#define AMDGPU_DEBUGFS_NODES "/sys/kernel/debug/dri/"
#define RAS_CONTROL "ras/ras_ctrl"
void KFDRASTest::SetUp() {
ROUTINE_START
KFDBaseComponentTest::SetUp();
char path[256];
int renderNode;
uint32_t rasFeatures = 0;
HsaEventDescriptor eventDesc;
m_pRasEvent = NULL;
m_setupStatus = false;
m_defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
renderNode = KFDBaseComponentTest::FindDRMRenderNode(m_defaultGPUNode);
if (renderNode < 0) {
LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
throw;
}
amdgpu_query_info(m_RenderNodes[renderNode].device_handle,
AMDGPU_INFO_RAS_ENABLED_FEATURES,
sizeof(uint32_t), &rasFeatures);
if (!(rasFeatures &
(AMDGPU_INFO_RAS_ENABLED_SDMA ||
AMDGPU_INFO_RAS_ENABLED_UMC ||
AMDGPU_INFO_RAS_ENABLED_GFX))) {
LOG() << "Skipping test: GPU doesn't support RAS features!" << std::endl;
throw;
}
snprintf(path, sizeof(path), "%s/%d/%s", AMDGPU_DEBUGFS_NODES, renderNode, RAS_CONTROL);
m_pFile = fopen(path, "w");
if (!m_pFile) {
LOG() << "Skipping test: RAS error injection requires root access!" << std::endl;
throw;
}
eventDesc.EventType = HSA_EVENTTYPE_MEMORY;
eventDesc.NodeId = m_defaultGPUNode;
eventDesc.SyncVar.SyncVar.UserData = NULL;
eventDesc.SyncVar.SyncVarSize = 0;
ASSERT_SUCCESS(hsaKmtCreateEvent(&eventDesc, true, false, &m_pRasEvent));
m_setupStatus = true;
ROUTINE_END
}
void KFDRASTest::TearDown() {
ROUTINE_START
if (m_pRasEvent != NULL) {
EXPECT_SUCCESS(hsaKmtDestroyEvent(m_pRasEvent));
}
fclose(m_pFile);
KFDBaseComponentTest::TearDown();
ROUTINE_END
}
TEST_F(KFDRASTest, BasicTest) {
TEST_START(TESTPROFILE_RUNALL);
if (!m_setupStatus) {
return;
}
// write an uncorrectable error injection at address 1 as value 1
fwrite("inject umc ue 1 1", sizeof(char), 17, m_pFile);
EXPECT_SUCCESS(hsaKmtWaitOnEvent(m_pRasEvent, g_TestTimeOut));
EXPECT_EQ(1, m_pRasEvent->EventData.EventData.MemoryAccessFault.Failure.ErrorType);
TEST_END;
}
TEST_F(KFDRASTest, MixEventsTest) {
TEST_START(TESTPROFILE_RUNALL);
if (!m_setupStatus) {
return;
}
PM4Queue queue;
HsaEvent* pHsaEvent;
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, m_defaultGPUNode, &pHsaEvent));
ASSERT_NE(0, pHsaEvent->EventData.HWData2);
ASSERT_SUCCESS(queue.Create(m_defaultGPUNode));
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false,
pHsaEvent->EventData.HWData2, pHsaEvent->EventId));
queue.Wait4PacketConsumption();
EXPECT_SUCCESS(hsaKmtWaitOnEvent(pHsaEvent, g_TestTimeOut));
fwrite("inject umc ue 1 1", sizeof(char), 17, m_pFile);
EXPECT_SUCCESS(hsaKmtWaitOnEvent(m_pRasEvent, g_TestTimeOut));
EXPECT_EQ(1, m_pRasEvent->EventData.EventData.MemoryAccessFault.Failure.ErrorType);
EXPECT_SUCCESS(queue.Destroy());
EXPECT_SUCCESS(hsaKmtDestroyEvent(pHsaEvent));
TEST_END;
}