diff --git a/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt index e060a21b97..c127e7b1cc 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt +++ b/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt @@ -150,6 +150,7 @@ set_property(TARGET ${CORE_RUNTIME_TARGET} PROPERTY LINK_FLAGS ${HSA_SHARED_LINK ## Source files. set ( SRCS core/driver/driver.cpp + core/driver/xdna/amd_xdna_driver.cpp core/util/lnx/os_linux.cpp core/util/small_heap.cpp core/util/timer.cpp diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/driver.cpp index 02407e6b2a..3a2f7862f5 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/driver.cpp @@ -50,8 +50,9 @@ namespace rocr { namespace core { -Driver::Driver(const std::string devnode_name, Agent::DeviceType agent_device_type) - : agent_device_type_(agent_device_type), devnode_name_(devnode_name) { } +Driver::Driver(DriverType kernel_driver_type, std::string devnode_name) + : kernel_driver_type_(std::move(kernel_driver_type)), + devnode_name_(std::move(devnode_name)) {} hsa_status_t Driver::Open() { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp new file mode 100644 index 0000000000..73788aa519 --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp @@ -0,0 +1,130 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "core/inc/amd_xdna_driver.h" + +#include + +#include +#include + +#include "core/inc/runtime.h" +#include "uapi/amdxdna_accel.h" + +namespace rocr { +namespace AMD { + +XdnaDriver::XdnaDriver(std::string devnode_name) + : core::Driver(core::DriverType::XDNA, devnode_name) {} + +hsa_status_t XdnaDriver::DiscoverDriver() { + const int max_minor_num(64); + const std::string devnode_prefix("/dev/accel/accel"); + + for (int i = 0; i < max_minor_num; ++i) { + std::unique_ptr xdna_drv( + new XdnaDriver(devnode_prefix + std::to_string(i))); + if (xdna_drv->Open() == HSA_STATUS_SUCCESS) { + if (xdna_drv->QueryKernelModeDriver( + core::DriverQuery::GET_DRIVER_VERSION) == HSA_STATUS_SUCCESS) { + core::Runtime::runtime_singleton_->RegisterDriver(xdna_drv); + return HSA_STATUS_SUCCESS; + } else { + xdna_drv->Close(); + } + } + } + + return HSA_STATUS_ERROR; +} + +hsa_status_t XdnaDriver::QueryKernelModeDriver(core::DriverQuery query) { + switch (query) { + case core::DriverQuery::GET_DRIVER_VERSION: + return QueryDriverVersion(); + default: + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t +XdnaDriver::GetMemoryProperties(uint32_t node_id, + core::MemProperties &mprops) const { + return HSA_STATUS_SUCCESS; +} + +hsa_status_t XdnaDriver::AllocateMemory(void **mem, size_t size, + uint32_t node_id, + core::MemFlags flags) { + return HSA_STATUS_SUCCESS; +} + +hsa_status_t XdnaDriver::FreeMemory(void *mem, uint32_t node_id) { + return HSA_STATUS_SUCCESS; +} + +hsa_status_t XdnaDriver::CreateQueue(core::Queue &queue) { + return HSA_STATUS_SUCCESS; +} + +hsa_status_t XdnaDriver::DestroyQueue(core::Queue &queue) const { + return HSA_STATUS_SUCCESS; +} + +hsa_status_t XdnaDriver::QueryDriverVersion() { + amdxdna_drm_query_aie_version aie_version{0, 0}; + amdxdna_drm_get_info args{DRM_AMDXDNA_QUERY_AIE_VERSION, sizeof(aie_version), + reinterpret_cast(&aie_version)}; + + if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_INFO, &args) < 0) { + return HSA_STATUS_ERROR; + } + + version_.major = aie_version.major; + version_.minor = aie_version.minor; + + return HSA_STATUS_SUCCESS; +} + +} // namespace AMD +} // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/uapi/amdxdna_accel.h b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/uapi/amdxdna_accel.h new file mode 100644 index 0000000000..9182a0fd7b --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/uapi/amdxdna_accel.h @@ -0,0 +1,620 @@ +/* SPDX-License-Identifier: NCSA */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef AMDXDNA_ACCEL_H_ +#define AMDXDNA_ACCEL_H_ + +#include +#include +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +#ifndef __counted_by +#define __counted_by(cnt) +#endif + +#define AMDXDNA_DRIVER_MAJOR 1 +#define AMDXDNA_DRIVER_MINOR 0 + +#define AMDXDNA_INVALID_CMD_HANDLE (~0UL) +#define AMDXDNA_INVALID_ADDR (~0UL) +#define AMDXDNA_INVALID_CTX_HANDLE 0 +#define AMDXDNA_INVALID_BO_HANDLE 0 +#define AMDXDNA_INVALID_FENCE_HANDLE 0 + +/* + * The interface can grow/extend over time. + * On each struct amdxdna_drm_*, to support potential extension, we defined it + * like this. + * + * Example code: + * + * struct amdxdna_drm_example_data { + * .ext = (uintptr_t)&example_data_ext; + * ... + * }; + * + * We don't have extension now. The extension struct will define in the future. + */ + +enum amdxdna_drm_ioctl_id { + DRM_AMDXDNA_CREATE_HWCTX, + DRM_AMDXDNA_DESTROY_HWCTX, + DRM_AMDXDNA_CONFIG_HWCTX, + DRM_AMDXDNA_CREATE_BO, + DRM_AMDXDNA_GET_BO_INFO, + DRM_AMDXDNA_SYNC_BO, + DRM_AMDXDNA_EXEC_CMD, + DRM_AMDXDNA_WAIT_CMD, + DRM_AMDXDNA_GET_INFO, + DRM_AMDXDNA_SET_STATE, + DRM_AMDXDNA_SUBMIT_WAIT, + DRM_AMDXDNA_SUBMIT_SIGNAL, + DRM_AMDXDNA_NUM_IOCTLS +}; + +enum amdxdna_device_type { + AMDXDNA_DEV_TYPE_UNKNOWN = -1, + AMDXDNA_DEV_TYPE_KMQ, + AMDXDNA_DEV_TYPE_UMQ, +}; + +/** + * struct qos_info - QoS information for driver. + * @gops: Giga operations per second. + * @fps: Frames per second. + * @dma_bandwidth: DMA bandwidtha. + * @latency: Frame response latency. + * @frame_exec_time: Frame execution time. + * @priority: Request priority. + * + * User program can provide QoS hints to driver. + */ +struct amdxdna_qos_info { + __u32 gops; + __u32 fps; + __u32 dma_bandwidth; + __u32 latency; + __u32 frame_exec_time; + __u32 priority; +}; + +/** + * struct amdxdna_drm_create_hwctx - Create hardware context. + * @ext: MBZ. + * @ext_flags: MBZ. + * @qos_p: Address of QoS info. + * @umq_bo: BO handle for user mode queue(UMQ). + * @log_buf_bo: BO handle for log buffer. + * @max_opc: Maximum operations per cycle. + * @num_tiles: Number of AIE tiles. + * @mem_size: Size of AIE tile memory. + * @umq_doorbell: Returned offset of doorbell associated with UMQ. + * @handle: Returned hardware context handle. + */ +struct amdxdna_drm_create_hwctx { + __u64 ext; + __u64 ext_flags; + __u64 qos_p; + __u32 umq_bo; + __u32 log_buf_bo; + __u32 max_opc; + __u32 num_tiles; + __u32 mem_size; + __u32 umq_doorbell; + __u32 handle; +}; + +/** + * struct amdxdna_drm_destroy_hwctx - Destroy hardware context. + * @handle: Hardware context handle. + * @pad: MBZ. + */ +struct amdxdna_drm_destroy_hwctx { + __u32 handle; + __u32 pad; +}; + +/** + * struct amdxdna_cu_config - configuration for one CU + * @cu_bo: CU configuration buffer bo handle + * @cu_func: Functional of a CU + * @pad: MBZ + */ +struct amdxdna_cu_config { + __u32 cu_bo; + __u8 cu_func; + __u8 pad[3]; +}; + +/** + * struct amdxdna_hwctx_param_config_cu - configuration for CUs in hardware + * context + * @num_cus: Number of CUs to configure + * @pad: MBZ + * @cu_configs: Array of CU configurations of struct amdxdna_cu_config + */ +struct amdxdna_hwctx_param_config_cu { + __u16 num_cus; + __u16 pad[3]; + struct amdxdna_cu_config cu_configs[] __counted_by(num_cus); +}; + +enum amdxdna_drm_config_hwctx_param { + DRM_AMDXDNA_HWCTX_CONFIG_CU, + DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF, + DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF, + DRM_AMDXDNA_HWCTX_CONFIG_NUM +}; + +/** + * struct amdxdna_drm_config_hwctx - Configure hardware context. + * @handle: hardware context handle. + * @param_type: Value in enum amdxdna_drm_config_hwctx_param. Specifies the + * structure passed in via param_val. + * @param_val: A structure specified by the param_type struct member. + * @param_val_size: Size of the parameter buffer pointed to by the param_val. + * If param_val is not a pointer, driver can ignore this. + * + * Note: if the param_val is a pointer pointing to a buffer, the maximum size + * of the buffer is 4KiB(PAGE_SIZE). + */ +struct amdxdna_drm_config_hwctx { + __u32 handle; + __u32 param_type; + __u64 param_val; + __u32 param_val_size; + __u32 pad; +}; + +/* + * AMDXDNA_BO_SHMEM: DRM GEM SHMEM bo + * AMDXDNA_BO_DEV_HEAP: Shared host memory to device as heap memory + * AMDXDNA_BO_DEV_BO: Allocated from BO_DEV_HEAP + * AMDXDNA_BO_CMD: User and driver accessible bo + * AMDXDNA_BO_DMA: DRM GEM DMA bo + */ +enum amdxdna_bo_type { + AMDXDNA_BO_INVALID = 0, + AMDXDNA_BO_SHMEM, + AMDXDNA_BO_DEV_HEAP, + AMDXDNA_BO_DEV, + AMDXDNA_BO_CMD, + AMDXDNA_BO_DMA, +}; + +/** + * struct amdxdna_drm_create_bo - Create a buffer object. + * @flags: Buffer flags. MBZ. + * @type: Buffer type. + * @vaddr: User VA of buffer if applied. MBZ. + * @size: Size in bytes. + * @handle: Returned DRM buffer object handle. + */ +struct amdxdna_drm_create_bo { + __u64 flags; + __u32 type; + __u32 _pad; + __u64 vaddr; + __u64 size; + __u32 handle; +}; + +/** + * struct amdxdna_drm_get_bo_info - Get buffer object information. + * @ext: MBZ. + * @ext_flags: MBZ. + * @handle: DRM buffer object handle. + * @map_offset: Returned DRM fake offset for mmap(). + * @vaddr: Returned user VA of buffer. 0 in case user needs mmap(). + * @xdna_addr: Returned XDNA device virtual address. + */ +struct amdxdna_drm_get_bo_info { + __u64 ext; + __u64 ext_flags; + __u32 handle; + __u32 _pad; + __u64 map_offset; + __u64 vaddr; + __u64 xdna_addr; +}; + +/** + * struct amdxdna_drm_sync_bo - Sync buffer object. + * @handle: Buffer object handle. + * @direction: Direction of sync, can be from device or to device. + * @offset: Offset in the buffer to sync. + * @size: Size in bytes. + */ +struct amdxdna_drm_sync_bo { + __u32 handle; +#define SYNC_DIRECT_TO_DEVICE 0U +#define SYNC_DIRECT_FROM_DEVICE 1U + __u32 direction; + __u64 offset; + __u64 size; +}; + +enum amdxdna_cmd_type { + AMDXDNA_CMD_SUBMIT_EXEC_BUF = 0, + AMDXDNA_CMD_SUBMIT_DEPENDENCY, + AMDXDNA_CMD_SUBMIT_SIGNAL, +}; + +/** + * struct amdxdna_drm_exec_cmd - Execute command. + * @ext: MBZ. + * @ext_flags: MBZ. + * @hwctx: Hardware context handle. + * @type: One of command type in enum amdxdna_cmd_type. + * @cmd_handles: Array of command handles or the command handle itself in case + * of just one. + * @args: Array of arguments for all command handles. + * @cmd_count: Number of command handles in the cmd_handles array. + * @arg_count: Number of arguments in the args array. + * @seq: Returned sequence number for this command. + */ +struct amdxdna_drm_exec_cmd { + __u64 ext; + __u64 ext_flags; + __u32 hwctx; + __u32 type; + __u64 cmd_handles; + __u64 args; + __u32 cmd_count; + __u32 arg_count; + __u64 seq; +}; + +/** + * struct amdxdna_drm_wait_cmd - Wait exectuion command. + * + * @hwctx: hardware context handle. + * @timeout: timeout in ms, 0 implies infinite wait. + * @seq: sequence number of the command returned by execute command. + * + * Wait a command specified by seq to be completed. + * Using AMDXDNA_INVALID_CMD_HANDLE as seq means wait till there is a free slot + * to submit a new command. + */ +struct amdxdna_drm_wait_cmd { + __u32 hwctx; + __u32 timeout; + __u64 seq; +}; + +/** + * struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware + * @buffer: The user space buffer that will return the AIE status + * @buffer_size: The size of the user space buffer + * @cols_filled: A bitmap of AIE columns whose data has been returned in the + * buffer. + */ +struct amdxdna_drm_query_aie_status { + __u64 buffer; /* out */ + __u32 buffer_size; /* in */ + __u32 cols_filled; /* out */ +}; + +/** + * struct amdxdna_drm_query_aie_version - Query the version of the AIE hardware + * @major: The major version number + * @minor: The minor version number + */ +struct amdxdna_drm_query_aie_version { + __u32 major; /* out */ + __u32 minor; /* out */ +}; + +/** + * struct amdxdna_drm_query_aie_tile_metadata - Query the metadata of AIE tile + * (core, mem, shim) + * @row_count: The number of rows. + * @row_start: The starting row number. + * @dma_channel_count: The number of dma channels. + * @lock_count: The number of locks. + * @event_reg_count: The number of events. + * @pad: MBZ. + */ +struct amdxdna_drm_query_aie_tile_metadata { + __u16 row_count; + __u16 row_start; + __u16 dma_channel_count; + __u16 lock_count; + __u16 event_reg_count; + __u16 pad[3]; +}; + +/** + * struct amdxdna_drm_query_aie_metadata - Query the metadata of the AIE + * hardware + * @col_size: The size of a column in bytes. + * @cols: The total number of columns. + * @rows: The total number of rows. + * @version: The version of the AIE hardware. + * @core: The metadata for all core tiles. + * @mem: The metadata for all mem tiles. + * @shim: The metadata for all shim tiles. + */ +struct amdxdna_drm_query_aie_metadata { + __u32 col_size; + __u16 cols; + __u16 rows; + struct amdxdna_drm_query_aie_version version; + struct amdxdna_drm_query_aie_tile_metadata core; + struct amdxdna_drm_query_aie_tile_metadata mem; + struct amdxdna_drm_query_aie_tile_metadata shim; +}; + +/** + * struct amdxdna_drm_query_clock - Metadata for a clock + * @name: The clock name. + * @freq_mhz: The clock frequency. + * @pad: MBZ. + */ +struct amdxdna_drm_query_clock { + __u8 name[16]; + __u32 freq_mhz; + __u32 pad; +}; + +/** + * struct amdxdna_drm_query_clock_metadata - Query metadata for clocks + * @mp_npu_clock: The metadata for MP-NPU clock. + * @h_clock: The metadata for H clock. + */ +struct amdxdna_drm_query_clock_metadata { + struct amdxdna_drm_query_clock mp_npu_clock; + struct amdxdna_drm_query_clock h_clock; +}; + +enum amdxdna_sensor_type { AMDXDNA_SENSOR_TYPE_POWER }; + +/** + * struct amdxdna_drm_query_sensor - The data for single sensor. + * @label: The name for a sensor. + * @input: The current value of the sensor. + * @max: The maximum value possible for the sensor. + * @average: The average value of the sensor. + * @highest: The highest recorded sensor value for this driver load for the + * sensor. + * @status: The sensor status. + * @units: The sensor units. + * @unitm: Translates value member variables into the correct unit via (pow(10, + * unitm) * value) + * @type: The sensor type from enum amdxdna_sensor_type + * @pad: MBZ. + */ +struct amdxdna_drm_query_sensor { + __u8 label[64]; + __u32 input; + __u32 max; + __u32 average; + __u32 highest; + __u8 status[64]; + __u8 units[16]; + __s8 unitm; + __u8 type; + __u8 pad[6]; +}; + +/** + * struct amdxdna_drm_query_hwctx - The data for single context. + * @context_id: The ID for this context. + * @start_col: The starting column for the partition assigned to this context. + * @num_col: The number of columns in the partition assigned to this context. + * @pid: The Process ID of the process that created this context. + * @command_submissions: The number of commands submitted to this context. + * @command_completions: The number of commands completed by this context. + * @migrations: The number of times this context has been moved to a different + * partition. + * @preemptions: The number of times this context has been preempted by another + * context in the same partition. + * @pad: MBZ. + */ +struct amdxdna_drm_query_hwctx { + __u32 context_id; + __u32 start_col; + __u32 num_col; + __u32 pad; + __s64 pid; + __u64 command_submissions; + __u64 command_completions; + __u64 migrations; + __u64 preemptions; + __u64 errors; +}; + +/** + * struct amdxdna_drm_aie_mem - The data for AIE memory read/write + * @col: The AIE column index + * @row: The AIE row index + * @addr: The AIE memory address to read/write + * @size: The size of bytes to read/write + * @buf_p: The buffer to store read/write data + * + * This is used for DRM_AMDXDNA_READ_AIE_MEM and DRM_AMDXDNA_WRITE_AIE_MEM + * parameters. + */ +struct amdxdna_drm_aie_mem { + __u32 col; + __u32 row; + __u32 addr; + __u32 size; + __u64 buf_p; +}; + +/** + * struct amdxdna_drm_aie_reg - The data for AIE register read/write + * @col: The AIE column index + * @row: The AIE row index + * @addr: The AIE register address to read/write + * @val: The value to write or returned value from AIE + * + * This is used for DRM_AMDXDNA_READ_AIE_REG and DRM_AMDXDNA_WRITE_AIE_REG + * parameters. + */ +struct amdxdna_drm_aie_reg { + __u32 col; + __u32 row; + __u32 addr; + __u32 val; +}; + +enum amdxdna_power_mode_type { + POWER_MODE_DEFAULT, /**< Fallback to calculated DPM */ + POWER_MODE_LOW, /**< Set frequency to lowest DPM */ + POWER_MODE_MEDIUM, /**< Set frequency to medium DPM */ + POWER_MODE_HIGH, /**< Set frequency to highest DPM */ +}; + +/** + * struct amdxdna_drm_get_power_mode - Get the power mode of the AIE hardware + * @power_mode: The sensor type from enum amdxdna_power_mode_type + * @pad: MBZ. + */ +struct amdxdna_drm_get_power_mode { + __u8 power_mode; + __u8 pad[7]; +}; + +/** + * struct amdxdna_drm_query_firmware_version - Query the version of the firmware + * @major: The major version number + * @minor: The minor version number + * @patch: The patch level version number + * @build: The build ID + */ +struct amdxdna_drm_query_firmware_version { + __u32 major; /* out */ + __u32 minor; /* out */ + __u32 patch; /* out */ + __u32 build; /* out */ +}; + +enum amdxdna_drm_get_param { + DRM_AMDXDNA_QUERY_AIE_STATUS, + DRM_AMDXDNA_QUERY_AIE_METADATA, + DRM_AMDXDNA_QUERY_AIE_VERSION, + DRM_AMDXDNA_QUERY_CLOCK_METADATA, + DRM_AMDXDNA_QUERY_SENSORS, + DRM_AMDXDNA_QUERY_HW_CONTEXTS, + DRM_AMDXDNA_READ_AIE_MEM, + DRM_AMDXDNA_READ_AIE_REG, + DRM_AMDXDNA_QUERY_FIRMWARE_VERSION, + DRM_AMDXDNA_GET_POWER_MODE, + DRM_AMDXDNA_NUM_GET_PARAM, +}; + +/** + * struct amdxdna_drm_get_info - Get some information from the AIE hardware. + * @param: Value in enum amdxdna_drm_get_param. Specifies the structure passed + * in the buffer. + * @buffer_size: Size of the input buffer. Size needed/written by the kernel. + * @buffer: A structure specified by the param struct member. + */ +struct amdxdna_drm_get_info { + __u32 param; /* in */ + __u32 buffer_size; /* in/out */ + __u64 buffer; /* in/out */ +}; + +/** + * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE hardware + * @power_mode: The sensor type from enum amdxdna_power_mode_type + * @pad: MBZ. + */ +struct amdxdna_drm_set_power_mode { + __u8 power_mode; + __u8 pad[7]; +}; + +enum amdxdna_drm_set_param { + DRM_AMDXDNA_SET_POWER_MODE, + DRM_AMDXDNA_WRITE_AIE_MEM, + DRM_AMDXDNA_WRITE_AIE_REG, + DRM_AMDXDNA_NUM_SET_PARAM, +}; + +/** + * struct amdxdna_drm_set_state - Set the state of some component within the AIE + * hardware. + * @param: Value in enum amdxdna_drm_set_param. Specifies the structure passed + * in the buffer. + * @buffer_size: Size of the input buffer. + * @buffer: A structure specified by the param struct member. + */ +struct amdxdna_drm_set_state { + __u32 param; /* in */ + __u32 buffer_size; /* in */ + __u64 buffer; /* in */ +}; + +/** + * struct amdxdna_drm_syncobjs - Signal or wait on array of DRM timelined sync + * objects. + * @handles: Array of handles of sync objects. + * @points: Array of time points for each sync objects. + * @count: Number of elements in the above array. + */ +struct amdxdna_drm_syncobjs { + __u64 handles; /* in */ + __u64 points; /* in */ + __u32 count; /* in */ + __u32 pad; +}; + +#define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \ + struct amdxdna_drm_create_hwctx) + +#define DRM_IOCTL_AMDXDNA_DESTROY_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_DESTROY_HWCTX, \ + struct amdxdna_drm_destroy_hwctx) + +#define DRM_IOCTL_AMDXDNA_CONFIG_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CONFIG_HWCTX, \ + struct amdxdna_drm_config_hwctx) + +#define DRM_IOCTL_AMDXDNA_CREATE_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_BO, \ + struct amdxdna_drm_create_bo) + +#define DRM_IOCTL_AMDXDNA_GET_BO_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_BO_INFO, \ + struct amdxdna_drm_get_bo_info) + +#define DRM_IOCTL_AMDXDNA_SYNC_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SYNC_BO, struct amdxdna_drm_sync_bo) + +#define DRM_IOCTL_AMDXDNA_EXEC_CMD \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_EXEC_CMD, struct amdxdna_drm_exec_cmd) + +#define DRM_IOCTL_AMDXDNA_WAIT_CMD \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, struct amdxdna_drm_wait_cmd) + +#define DRM_IOCTL_AMDXDNA_GET_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, struct amdxdna_drm_get_info) + +#define DRM_IOCTL_AMDXDNA_SET_STATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \ + struct amdxdna_drm_set_state) + +#define DRM_IOCTL_AMDXDNA_SUBMIT_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SUBMIT_WAIT, \ + struct amdxdna_drm_syncobjs) + +#define DRM_IOCTL_AMDXDNA_SUBMIT_SIGNAL \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SUBMIT_SIGNAL, \ + struct amdxdna_drm_syncobjs) + +#if defined(__cplusplus) +} /* extern c end */ +#endif + +#endif /* AMDXDNA_ACCEL_H_ */ diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h new file mode 100644 index 0000000000..5fe7eae315 --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h @@ -0,0 +1,75 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// +#ifndef HSA_RUNTIME_CORE_INC_AMD_XDNA_DRIVER_H_ +#define HSA_RUNTIME_CORE_INC_AMD_XDNA_DRIVER_H_ + +#include + +#include "core/inc/driver.h" + +namespace rocr { +namespace AMD { + +class XdnaDriver : public core::Driver { +public: + XdnaDriver() = delete; + XdnaDriver(std::string devnode_name); + + static hsa_status_t DiscoverDriver(); + hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override; + + hsa_status_t GetMemoryProperties(uint32_t node_id, + core::MemProperties &mprops) const override; + hsa_status_t AllocateMemory(void **mem, size_t size, uint32_t node_id, + core::MemFlags flags) override; + hsa_status_t FreeMemory(void *mem, uint32_t node_id) override; + hsa_status_t CreateQueue(core::Queue &queue) override; + hsa_status_t DestroyQueue(core::Queue &queue) const override; + +private: + hsa_status_t QueryDriverVersion(); +}; + +} // namespace AMD +} // namespace rocr + +#endif // header guard diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h index c6b7ffac11..9d6e623c29 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h @@ -43,6 +43,7 @@ #ifndef HSA_RUNTME_CORE_INC_DRIVER_H_ #define HSA_RUNTME_CORE_INC_DRIVER_H_ +#include #include #include "core/inc/agent.h" @@ -60,6 +61,15 @@ struct MemProperties { uint64_t virtual_base_addr_; }; +struct DriverVersionInfo { + uint32_t major; + uint32_t minor; +}; + +enum class DriverQuery { GET_DRIVER_VERSION }; + +enum class DriverType { XDNA = 0, NUM_DRIVER_TYPES }; + /// @brief Kernel driver interface. /// /// @details A class used to provide an interface between the core runtime @@ -68,15 +78,22 @@ struct MemProperties { class Driver { public: Driver() = delete; - Driver(const std::string devnode_name, Agent::DeviceType agent_device_type); - virtual ~Driver() {} + Driver(DriverType kernel_driver_type, std::string devnode_name); + virtual ~Driver() = default; + /// @brief Query the kernel-model driver. + /// @retval HSA_STATUS_SUCCESS if the kernel-model driver query was + /// successful. + virtual hsa_status_t QueryKernelModeDriver(DriverQuery query) = 0; /// @brief Open a connection to the driver using name_. /// @retval HSA_STATUS_SUCCESS if the driver was opened successfully. hsa_status_t Open(); /// @brief Close a connection to the open driver using fd_. /// @retval HSA_STATUS_SUCCESS if the driver was opened successfully. hsa_status_t Close(); + /// @brief Get driver version information. + /// @retval DriverVersionInfo containing the driver's version information. + DriverVersionInfo Version() const { return version_; } virtual hsa_status_t GetMemoryProperties(uint32_t node_id, MemProperties &mprops) const = 0; @@ -95,10 +112,13 @@ class Driver { virtual hsa_status_t DestroyQueue(Queue &queue) const = 0; - /// Specify the agent device type this driver is for. - const Agent::DeviceType agent_device_type_; + /// Unique identifier for supported kernel-mode drivers. + const DriverType kernel_driver_type_; + +protected: + DriverVersionInfo version_{std::numeric_limits::max(), + std::numeric_limits::max()}; - protected: const std::string devnode_name_; int fd_ = -1; }; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h index e4fb8f4dee..96de57653f 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h @@ -63,10 +63,11 @@ #include "core/inc/hsa_ext_amd_impl.h" #include "core/inc/agent.h" +#include "core/inc/amd_xdna_driver.h" #include "core/inc/exceptions.h" +#include "core/inc/interrupt_signal.h" #include "core/inc/memory_region.h" #include "core/inc/signal.h" -#include "core/inc/interrupt_signal.h" #include "core/inc/svm_profiler.h" #include "core/util/flag.h" #include "core/util/locks.h" @@ -155,9 +156,16 @@ class Runtime { /// @param [in] agent Pointer to the agent object. void RegisterAgent(Agent* agent, bool Enabled); + /// @brief Insert agent into the driver list. + /// @param [in] driver Unique pointer to the driver object. + void RegisterDriver(std::unique_ptr &driver); + /// @brief Delete all agent objects from ::agents_. void DestroyAgents(); + /// @brief Close and delete all agent driver objects from ::agent_drivers_. + void DestroyDrivers(); + /// @brief Set the number of links connecting the agents in the platform. void SetLinkCount(size_t num_link); @@ -469,6 +477,22 @@ class Runtime { bool XnackEnabled() const { return xnack_enabled_; } void XnackEnabled(bool enable) { xnack_enabled_ = enable; } + Driver &AgentDriver(DriverType drv_type) { + auto is_drv_type = [&](const std::unique_ptr &d) { + return d->kernel_driver_type_ == drv_type; + }; + + auto driver(std::find_if(agent_drivers_.begin(), agent_drivers_.end(), + is_drv_type)); + + if (driver == agent_drivers_.end()) { + throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, + "Invalid agent device type, no driver found."); + } + + return **driver; + } + protected: static void AsyncEventsLoop(void*); static void AsyncIPCSockServerConnLoop(void*); @@ -617,6 +641,10 @@ class Runtime { // registered & mapped arrays. KernelSharedMutex memory_lock_; + // Array containing driver interfaces for compatible agent kernel-mode + // drivers. Currently supports AIE agents. + std::vector> agent_drivers_; + // Array containing tools library handles. std::vector tool_libs_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_topology.cpp index ee3e461a8e..4932de9051 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_topology.cpp @@ -72,6 +72,12 @@ namespace AMD { static const uint kKfdVersionMajor = 0; static const uint kKfdVersionMinor = 99; +void DiscoverDrivers(bool &gpu_found, bool &aie_found) { + // Open connection to GPU and AIE kernel drivers. + gpu_found = (hsaKmtOpenKFD() == HSAKMT_STATUS_SUCCESS); + aie_found = (XdnaDriver::DiscoverDriver() == HSA_STATUS_SUCCESS); +} + // Query for user preference and use that to determine Xnack mode of ROCm system. // Return true if Xnack mode is ON or false if OFF. Xnack mode of a system is // orthogonal to devices that do not support Xnack mode. It is legal for a @@ -419,13 +425,10 @@ void BuildTopology() { } bool Load() { - bool gpu_found = true; + bool gpu_found = false; bool aie_found = false; - // Open connection to kernel driver. - if (hsaKmtOpenKFD() != HSAKMT_STATUS_SUCCESS) { - gpu_found = false; - } + DiscoverDrivers(gpu_found, aie_found); if (!(gpu_found || aie_found)) { return false; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index fef56b2197..e25cf55653 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -244,6 +244,11 @@ void Runtime::RegisterAgent(Agent* agent, bool Enabled) { } } +// Register driver. +void Runtime::RegisterDriver(std::unique_ptr &driver) { + agent_drivers_.push_back(std::move(driver)); +} + void Runtime::DestroyAgents() { agents_by_node_.clear(); @@ -267,6 +272,14 @@ void Runtime::DestroyAgents() { system_regions_coarse_.clear(); } +void Runtime::DestroyDrivers() { + for (auto &d : agent_drivers_) { + d->Close(); + } + + agent_drivers_.clear(); +} + void Runtime::SetLinkCount(size_t num_nodes) { num_nodes_ = num_nodes; link_matrix_.resize(num_nodes * num_nodes); @@ -2062,6 +2075,8 @@ void Runtime::Unload() { CloseTools(); + DestroyDrivers(); + AMD::Unload(); }