Reimplement hipMemGetInfo (#1447)
Addresses SWDEV-136570. hipMemGetInfo changed to compute free memory based on information from kfd instead of relying on hc::am_tracker.
[ROCm/hip commit: 3d661e4706]
Этот коммит содержится в:
коммит произвёл
Maneesh Gupta
родитель
52f126b557
Коммит
bfb64c43a4
@@ -505,13 +505,19 @@ ihipDevice_t::ihipDevice_t(unsigned deviceId, unsigned deviceCnt, hc::accelerato
|
||||
: _deviceId(deviceId), _acc(acc), _state(0), _criticalData(this) {
|
||||
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
if (agent) {
|
||||
int err = hsa_agent_get_info(
|
||||
int err;
|
||||
err = hsa_agent_get_info(
|
||||
*agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &_computeUnits);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
_computeUnits = 1;
|
||||
}
|
||||
|
||||
_hsaAgent = *agent;
|
||||
err = hsa_agent_get_info(
|
||||
*agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &_driver_node_id);
|
||||
if (err != HSA_STATUS_SUCCESS){
|
||||
_driver_node_id = 0;
|
||||
}
|
||||
|
||||
_hsaAgent = *agent;
|
||||
} else {
|
||||
_hsaAgent.handle = static_cast<uint64_t>(-1);
|
||||
}
|
||||
|
||||
@@ -802,6 +802,9 @@ class ihipDevice_t {
|
||||
|
||||
// TODO - report this through device properties, base on HCC API call.
|
||||
int _isLargeBar;
|
||||
|
||||
// Node id reported by kfd for this device
|
||||
uint32_t _driver_node_id;
|
||||
|
||||
ihipCtx_t* _primaryCtx;
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hc_am.hpp>
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
@@ -28,6 +27,8 @@ THE SOFTWARE.
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include <fstream>
|
||||
|
||||
__device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP];
|
||||
__device__ uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
|
||||
|
||||
@@ -1967,16 +1968,27 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) {
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
|
||||
if (free) {
|
||||
// TODO - replace with kernel-level for reporting free memory:
|
||||
size_t deviceMemSize, hostMemSize, userMemSize;
|
||||
hc::am_memtracker_sizeinfo(device->_acc, &deviceMemSize, &hostMemSize, &userMemSize);
|
||||
|
||||
*free = device->_props.totalGlobalMem - deviceMemSize;
|
||||
|
||||
// Deduct the amount of memory from the free memory reported from the system
|
||||
if (HIP_HIDDEN_FREE_MEM) *free -= (size_t)HIP_HIDDEN_FREE_MEM * 1024 * 1024;
|
||||
if (!device->_driver_node_id) return ihipLogStatus(hipErrorInvalidDevice);
|
||||
|
||||
std::string fileName = std::string("/sys/class/kfd/kfd/topology/nodes/") + std::to_string(device->_driver_node_id) + std::string("/mem_banks/0/used_memory");
|
||||
std::ifstream file;
|
||||
file.open(fileName);
|
||||
if (!file) return ihipLogStatus(hipErrorFileNotFound);
|
||||
|
||||
std::string deviceSize;
|
||||
size_t deviceMemSize;
|
||||
|
||||
file >> deviceSize;
|
||||
file.close();
|
||||
if ((deviceMemSize=strtol(deviceSize.c_str(),NULL,10))){
|
||||
*free = device->_props.totalGlobalMem - deviceMemSize;
|
||||
// Deduct the amount of memory from the free memory reported from the system
|
||||
if (HIP_HIDDEN_FREE_MEM) *free -= (size_t)HIP_HIDDEN_FREE_MEM * 1024 * 1024;
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user