diff --git a/inc/pm4_cmds.h b/inc/pm4_cmds.h deleted file mode 100644 index 44b7fb00aa..0000000000 --- a/inc/pm4_cmds.h +++ /dev/null @@ -1,1090 +0,0 @@ -#ifndef _WSL_INC_PM4_CMDS_H_ -#define _WSL_INC_PM4_CMDS_H_ - -#include - -#define mmCOMPUTE_NUM_THREAD_X 0x2E07 -#define mmCOMPUTE_PGM_LO 0x2E0C -#define mmCOMPUTE_DISPATCH_SCRATCH_BASE_LO 0x2E10 -#define mmCOMPUTE_PGM_RSRC1 0x2E12 -#define mmCOMPUTE_PGM_RSRC3 0x2E28 -#define mmCOMPUTE_RESOURCE_LIMITS 0x2E15 -#define mmCOMPUTE_USER_DATA_0 0x2E40 - -#define PM4_TYPE_SHIFT 30 -#define PM4_COUNT_SHIFT 16 -#define PM4_OPCODE_SHIFT 8 -#define PM4_SHADER_TYPE_SHIFT 1 - -#define PM4_GFX_SHADER 0 -#define PM4_COMPUTE_SHADER 1 - -#define PM4_TYPE3_HDR(_opc_, _count_) \ - (uint32_t)((3) << PM4_TYPE_SHIFT | \ - ((_count_) - 2) << PM4_COUNT_SHIFT | \ - (_opc_) << PM4_OPCODE_SHIFT) | \ - (PM4_COMPUTE_SHADER << PM4_SHADER_TYPE_SHIFT) - -union PM4_MEC_TYPE_3_HEADER { - struct { - uint32_t reserved1 : 8; ///< reserved - uint32_t opcode : 8; ///< IT opcode - uint32_t count : 14;///< number of DWORDs - 1 in the information body. - uint32_t type : 2; ///< packet identifier. It should be 3 for type 3 packets - }; - uint32_t u32All; -}; - -#define IT_DISPATCH_DIRECT 0x15 -#define IT_ATOMIC_MEM 0x1E -#define IT_WRITE_DATA 0x37 -#define IT_INDIRECT_BUFFER 0x3F -#define IT_COPY_DATA 0x40 -#define IT_EVENT_WRITE 0x46 -#define IT_RELEASE_MEM 0x49 -#define IT_ACQUIRE_MEM 0x58 -#define IT_SET_SH_REG 0x76 - -struct PM4_MEC_SET_SH_REG { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - union { - struct { - uint32_t reg_offset:16; - uint32_t reserved1:16; - } bitfields2; - uint32_t ordinal2; - }; -}; - -struct PM4_MEC_DISPATCH_DIRECT { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - uint32_t dim_x; - uint32_t dim_y; - uint32_t dim_z; - uint32_t dispatch_initiator; -}; - -// ------------------------------- MEC_EVENT_WRITE_event_index_enum ------------------------------- -enum MEC_EVENT_WRITE_event_index_enum { - event_index__mec_event_write__other = 0, - event_index__mec_event_write__sample_pipelinestat = 2, - event_index__mec_event_write__cs_partial_flush = 4, - event_index__mec_event_write__sample_streamoutstats__GFX11 = 8, - event_index__mec_event_write__sample_streamoutstats1__GFX11 = 9, - event_index__mec_event_write__sample_streamoutstats2__GFX11 = 10, - event_index__mec_event_write__sample_streamoutstats3__GFX11 = 11, -}; - -enum VGT_EVENT_TYPE { - Reserved_0x00 = 0x00000000, - SAMPLE_STREAMOUTSTATS1 = 0x00000001, - SAMPLE_STREAMOUTSTATS2 = 0x00000002, - SAMPLE_STREAMOUTSTATS3 = 0x00000003, - CACHE_FLUSH_TS = 0x00000004, - CONTEXT_DONE = 0x00000005, - CACHE_FLUSH = 0x00000006, - CS_PARTIAL_FLUSH = 0x00000007, - VGT_STREAMOUT_SYNC = 0x00000008, - VGT_STREAMOUT_RESET = 0x0000000a, - END_OF_PIPE_INCR_DE = 0x0000000b, - END_OF_PIPE_IB_END = 0x0000000c, - RST_PIX_CNT = 0x0000000d, - BREAK_BATCH = 0x0000000e, - VS_PARTIAL_FLUSH = 0x0000000f, - PS_PARTIAL_FLUSH = 0x00000010, - FLUSH_HS_OUTPUT = 0x00000011, - FLUSH_DFSM = 0x00000012, - RESET_TO_LOWEST_VGT = 0x00000013, - CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014, - CACHE_FLUSH_AND_INV_EVENT = 0x00000016, - PERFCOUNTER_START = 0x00000017, - PERFCOUNTER_STOP = 0x00000018, - PIPELINESTAT_START = 0x00000019, - PIPELINESTAT_STOP = 0x0000001a, - PERFCOUNTER_SAMPLE = 0x0000001b, - SAMPLE_PIPELINESTAT = 0x0000001e, - SO_VGTSTREAMOUT_FLUSH = 0x0000001f, - SAMPLE_STREAMOUTSTATS = 0x00000020, - RESET_VTX_CNT = 0x00000021, - BLOCK_CONTEXT_DONE = 0x00000022, - CS_CONTEXT_DONE = 0x00000023, - VGT_FLUSH = 0x00000024, - TGID_ROLLOVER = 0x00000025, - SQ_NON_EVENT = 0x00000026, - SC_SEND_DB_VPZ = 0x00000027, - BOTTOM_OF_PIPE_TS = 0x00000028, - FLUSH_SX_TS = 0x00000029, - DB_CACHE_FLUSH_AND_INV = 0x0000002a, - FLUSH_AND_INV_DB_DATA_TS = 0x0000002b, - FLUSH_AND_INV_DB_META = 0x0000002c, - FLUSH_AND_INV_CB_DATA_TS = 0x0000002d, - FLUSH_AND_INV_CB_META = 0x0000002e, - CS_DONE = 0x0000002f, - PS_DONE = 0x00000030, - FLUSH_AND_INV_CB_PIXEL_DATA = 0x00000031, - SX_CB_RAT_ACK_REQUEST = 0x00000032, - THREAD_TRACE_START = 0x00000033, - THREAD_TRACE_STOP = 0x00000034, - THREAD_TRACE_MARKER = 0x00000035, - THREAD_TRACE_FINISH = 0x00000037, - PIXEL_PIPE_STAT_CONTROL = 0x00000038, - PIXEL_PIPE_STAT_DUMP = 0x00000039, - PIXEL_PIPE_STAT_RESET = 0x0000003a, - CONTEXT_SUSPEND = 0x0000003b, - OFFCHIP_HS_DEALLOC = 0x0000003c, - ENABLE_NGG_PIPELINE = 0x0000003d, - SET_FE_ID__GFX09 = 0x00000009, - Available_0x1c__GFX09 = 0x0000001c, - Available_0x1d__GFX09 = 0x0000001d, - THREAD_TRACE_FLUSH__GFX09 = 0x00000036, - Reserved_0x3f__GFX09 = 0x0000003f, - ZPASS_DONE__GFX09_10 = 0x00000015, - ENABLE_LEGACY_PIPELINE__GFX09_10 = 0x0000003e, - Reserved_0x09__GFX10PLUS = 0x00000009, - FLUSH_ES_OUTPUT__GFX10PLUS = 0x0000001c, - BIN_CONF_OVERRIDE_CHECK__GFX10PLUS = 0x0000001d, - THREAD_TRACE_DRAW__GFX10PLUS = 0x00000036, - DRAW_DONE__GFX10PLUS = 0x0000003f, - WAIT_SYNC__GFX11 = 0x00000015, - ENABLE_PIPELINE_NOT_USED__GFX11 = 0x0000003e, -}; - -struct PM4_MEC_EVENT_WRITE { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - union { - struct { - uint32_t event_type:6; - uint32_t reserved1:2; - uint32_t event_index:4; - uint32_t reserved2:19; - uint32_t offload_enable:1; - } bitfields2; - uint32_t ordinal2; - }; -}; - -struct PM4_MEC_ATOMIC_MEM { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - union { - struct { - uint32_t atomic:7; - uint32_t reserved1:1; - uint32_t command:4; - uint32_t reserved2:13; - uint32_t cache_policy:2; - uint32_t reserved3:5; - } bitfields2; - uint32_t ordinal2; - }; - uint32_t addr_lo; - uint32_t addr_hi; - uint32_t src_data_lo; - uint32_t src_data_hi; - uint32_t cmp_data_lo; - uint32_t cmp_data_hi; - union { - struct { - uint32_t loop_interval:13; - uint32_t reserved4:19; - } bitfields9; - uint32_t ordinal9; - }; -}; - -struct PM4_MEC_WRITE_DATA { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - union { - struct { - uint32_t reserved1:8; - uint32_t dst_sel:4; - uint32_t reserved2:4; - uint32_t addr_incr:1; - uint32_t reserved3:2; - uint32_t resume_vf:1; - uint32_t wr_confirm:1; - uint32_t reserved4:4; - uint32_t cache_policy:2; - uint32_t reserved5:5; - } bitfields2; - uint32_t ordinal2; - }; - union { - struct { - uint32_t dst_mmreg_addr:18; - uint32_t reserved6:14; - } bitfields3a; - struct { - uint32_t dst_gds_addr:16; - uint32_t reserved7:16; - } bitfields3b; - struct { - uint32_t reserved8:2; - uint32_t dst_mem_addr_lo:30; - } bitfields3c; - uint32_t ordinal3; - }; - uint32_t dst_mem_addr_hi; - uint64_t write_data_value; -}; - -#define PERSISTENT_SPACE_START 0x00002c00 - -template -void GenerateSetShRegHeader(T* pm4, uint32_t reg_addr) { - pm4->cmd_set_data.header.u32All = PM4_TYPE3_HDR(IT_SET_SH_REG, - sizeof(T) / sizeof(uint32_t)); - pm4->cmd_set_data.bitfields2.reg_offset = reg_addr - PERSISTENT_SPACE_START; -} - -template -void GenerateCmdHeader(T* pm4, int op_code) { - pm4->header.u32All = PM4_TYPE3_HDR(op_code, sizeof(T) / sizeof(uint32_t)); -} - -/// @brief Defines the Gpu command to dispatch a kernel. It embeds -/// various Gpu hardware specific data structures for initialization -/// and configuration before a dispatch begins to run -struct DispatchTemplate { - - /// @brief Structure used to initialize the group dimensions - /// of a kernel dispatch and if performance counters are enabled - struct DispatchDimensionRegs { - PM4_MEC_SET_SH_REG cmd_set_data; - uint32_t compute_num_thread_x; - uint32_t compute_num_thread_y; - uint32_t compute_num_thread_z; - } dimension_regs; - - struct DispatchProgramRegs { - PM4_MEC_SET_SH_REG cmd_set_data; - uint32_t compute_pgm_lo; - uint32_t compute_pgm_hi; - } program_regs; - - struct DispatchProgramResourceRegs { - PM4_MEC_SET_SH_REG cmd_set_data; - uint32_t compute_pgm_rsrc1; - uint32_t compute_pgm_rsrc2; - } program_resource_regs; - - /// @brief Structure used to initialize parameters related to - /// thread management i.e. number of waves to issue and number - /// of Compute Units to use - struct DispatchResourceRegs { - PM4_MEC_SET_SH_REG cmd_set_data; - uint32_t compute_resource_limits; - uint32_t compute_static_thread_mgmt_se0; - uint32_t compute_static_thread_mgmt_se1; - uint32_t compute_tmpring_size; - uint32_t compute_static_thread_mgmt_se2; - uint32_t compute_static_thread_mgmt_se3; - } resource_regs; - - /// @brief Structure used to pass handles of the Aql dispatch - /// packet, Aql queue, Kernel argument address block, Scratch - /// buffer - struct DispatchComputeUserDataRegs { - PM4_MEC_SET_SH_REG cmd_set_data; - uint32_t compute_user_data[16]; - } compute_user_data_regs; - - /// @brief Structure used to configure Cache flush policy - /// and dimensions of total work size - PM4_MEC_DISPATCH_DIRECT dispatch_direct; -}; - -struct DispatchProgramResourceRegs { - PM4_MEC_SET_SH_REG cmd_set_data; - uint32_t compute_pgm_rsrc3; -}; - - -/// @brief Structure used to issue a programing scratch command for gfx11+ -struct SetScratchTemplate { - PM4_MEC_SET_SH_REG cmd_set_data; - uint32_t scratch_lo; - uint32_t scratch_hi; -}; - -/// @brief Structure used to issue a Gpu Barrier command -struct BarrierTemplate { - PM4_MEC_EVENT_WRITE event_write; -}; - -//--------------------MEC_ATOMIC_MEM-------------------- -enum MEC_ATOMIC_MEM_command_enum { - command__mec_atomic_mem__single_pass_atomic = 0, - command__mec_atomic_mem__loop_until_compare_satisfied = 1, - command__mec_atomic_mem__wait_for_write_confirmation = 2, - command__mec_atomic_mem__send_and_continue = 3, -}; - -enum MEC_ATOMIC_MEM_cache_policy_enum { - cache_policy__mec_atomic_mem__lru = 0, - cache_policy__mec_atomic_mem__stream = 1, - cache_policy__mec_atomic_mem__noa = 2, - cache_policy__mec_atomic_mem__bypass = 3, -}; - -enum TC_OP { - TC_OP_READ = 0x00000000, - TC_OP_ATOMIC_FCMPSWAP_RTN_32 = 0x00000001, - TC_OP_ATOMIC_FMIN_RTN_32 = 0x00000002, - TC_OP_ATOMIC_FMAX_RTN_32 = 0x00000003, - TC_OP_RESERVED_FOP_RTN_32_0 = 0x00000004, - TC_OP_RESERVED_FOP_RTN_32_2 = 0x00000006, - TC_OP_ATOMIC_SWAP_RTN_32 = 0x00000007, - TC_OP_ATOMIC_CMPSWAP_RTN_32 = 0x00000008, - TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_RTN_32 = 0x00000009, - TC_OP_ATOMIC_FMIN_FLUSH_DENORM_RTN_32 = 0x0000000a, - TC_OP_ATOMIC_FMAX_FLUSH_DENORM_RTN_32 = 0x0000000b, - TC_OP_PROBE_FILTER = 0x0000000c, - TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_2 = 0x0000000e, - TC_OP_ATOMIC_ADD_RTN_32 = 0x0000000f, - TC_OP_ATOMIC_SUB_RTN_32 = 0x00000010, - TC_OP_ATOMIC_SMIN_RTN_32 = 0x00000011, - TC_OP_ATOMIC_UMIN_RTN_32 = 0x00000012, - TC_OP_ATOMIC_SMAX_RTN_32 = 0x00000013, - TC_OP_ATOMIC_UMAX_RTN_32 = 0x00000014, - TC_OP_ATOMIC_AND_RTN_32 = 0x00000015, - TC_OP_ATOMIC_OR_RTN_32 = 0x00000016, - TC_OP_ATOMIC_XOR_RTN_32 = 0x00000017, - TC_OP_ATOMIC_INC_RTN_32 = 0x00000018, - TC_OP_ATOMIC_DEC_RTN_32 = 0x00000019, - TC_OP_WBINVL1_VOL = 0x0000001a, - TC_OP_WBINVL1_SD = 0x0000001b, - TC_OP_RESERVED_NON_FLOAT_RTN_32_0 = 0x0000001c, - TC_OP_RESERVED_NON_FLOAT_RTN_32_1 = 0x0000001d, - TC_OP_RESERVED_NON_FLOAT_RTN_32_2 = 0x0000001e, - TC_OP_RESERVED_NON_FLOAT_RTN_32_3 = 0x0000001f, - TC_OP_WRITE = 0x00000020, - TC_OP_ATOMIC_FCMPSWAP_RTN_64 = 0x00000021, - TC_OP_ATOMIC_FMIN_RTN_64 = 0x00000022, - TC_OP_ATOMIC_FMAX_RTN_64 = 0x00000023, - TC_OP_RESERVED_FOP_RTN_64_0 = 0x00000024, - TC_OP_RESERVED_FOP_RTN_64_1 = 0x00000025, - TC_OP_RESERVED_FOP_RTN_64_2 = 0x00000026, - TC_OP_ATOMIC_SWAP_RTN_64 = 0x00000027, - TC_OP_ATOMIC_CMPSWAP_RTN_64 = 0x00000028, - TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_RTN_64 = 0x00000029, - TC_OP_ATOMIC_FMIN_FLUSH_DENORM_RTN_64 = 0x0000002a, - TC_OP_ATOMIC_FMAX_FLUSH_DENORM_RTN_64 = 0x0000002b, - TC_OP_WBINVL2_SD = 0x0000002c, - TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_0 = 0x0000002d, - TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_1 = 0x0000002e, - TC_OP_ATOMIC_ADD_RTN_64 = 0x0000002f, - TC_OP_ATOMIC_SUB_RTN_64 = 0x00000030, - TC_OP_ATOMIC_SMIN_RTN_64 = 0x00000031, - TC_OP_ATOMIC_UMIN_RTN_64 = 0x00000032, - TC_OP_ATOMIC_SMAX_RTN_64 = 0x00000033, - TC_OP_ATOMIC_UMAX_RTN_64 = 0x00000034, - TC_OP_ATOMIC_AND_RTN_64 = 0x00000035, - TC_OP_ATOMIC_OR_RTN_64 = 0x00000036, - TC_OP_ATOMIC_XOR_RTN_64 = 0x00000037, - TC_OP_ATOMIC_INC_RTN_64 = 0x00000038, - TC_OP_ATOMIC_DEC_RTN_64 = 0x00000039, - TC_OP_WBL2_NC = 0x0000003a, - TC_OP_WBL2_WC = 0x0000003b, - TC_OP_RESERVED_NON_FLOAT_RTN_64_1 = 0x0000003c, - TC_OP_RESERVED_NON_FLOAT_RTN_64_2 = 0x0000003d, - TC_OP_RESERVED_NON_FLOAT_RTN_64_3 = 0x0000003e, - TC_OP_RESERVED_NON_FLOAT_RTN_64_4 = 0x0000003f, - TC_OP_WBINVL1 = 0x00000040, - TC_OP_ATOMIC_FCMPSWAP_32 = 0x00000041, - TC_OP_ATOMIC_FMIN_32 = 0x00000042, - TC_OP_ATOMIC_FMAX_32 = 0x00000043, - TC_OP_RESERVED_FOP_32_0 = 0x00000044, - TC_OP_RESERVED_FOP_32_2 = 0x00000046, - TC_OP_ATOMIC_SWAP_32 = 0x00000047, - TC_OP_ATOMIC_CMPSWAP_32 = 0x00000048, - TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_32 = 0x00000049, - TC_OP_ATOMIC_FMIN_FLUSH_DENORM_32 = 0x0000004a, - TC_OP_ATOMIC_FMAX_FLUSH_DENORM_32 = 0x0000004b, - TC_OP_INV_METADATA = 0x0000004c, - TC_OP_RESERVED_FOP_FLUSH_DENORM_32_2 = 0x0000004e, - TC_OP_ATOMIC_ADD_32 = 0x0000004f, - TC_OP_ATOMIC_SUB_32 = 0x00000050, - TC_OP_ATOMIC_SMIN_32 = 0x00000051, - TC_OP_ATOMIC_UMIN_32 = 0x00000052, - TC_OP_ATOMIC_SMAX_32 = 0x00000053, - TC_OP_ATOMIC_UMAX_32 = 0x00000054, - TC_OP_ATOMIC_AND_32 = 0x00000055, - TC_OP_ATOMIC_OR_32 = 0x00000056, - TC_OP_ATOMIC_XOR_32 = 0x00000057, - TC_OP_ATOMIC_INC_32 = 0x00000058, - TC_OP_ATOMIC_DEC_32 = 0x00000059, - TC_OP_INVL2_NC = 0x0000005a, - TC_OP_NOP_RTN0 = 0x0000005b, - TC_OP_RESERVED_NON_FLOAT_32_1 = 0x0000005c, - TC_OP_RESERVED_NON_FLOAT_32_2 = 0x0000005d, - TC_OP_RESERVED_NON_FLOAT_32_3 = 0x0000005e, - TC_OP_RESERVED_NON_FLOAT_32_4 = 0x0000005f, - TC_OP_WBINVL2 = 0x00000060, - TC_OP_ATOMIC_FCMPSWAP_64 = 0x00000061, - TC_OP_ATOMIC_FMIN_64 = 0x00000062, - TC_OP_ATOMIC_FMAX_64 = 0x00000063, - TC_OP_RESERVED_FOP_64_0 = 0x00000064, - TC_OP_RESERVED_FOP_64_1 = 0x00000065, - TC_OP_RESERVED_FOP_64_2 = 0x00000066, - TC_OP_ATOMIC_SWAP_64 = 0x00000067, - TC_OP_ATOMIC_CMPSWAP_64 = 0x00000068, - TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_64 = 0x00000069, - TC_OP_ATOMIC_FMIN_FLUSH_DENORM_64 = 0x0000006a, - TC_OP_ATOMIC_FMAX_FLUSH_DENORM_64 = 0x0000006b, - TC_OP_RESERVED_FOP_FLUSH_DENORM_64_0 = 0x0000006c, - TC_OP_RESERVED_FOP_FLUSH_DENORM_64_1 = 0x0000006d, - TC_OP_RESERVED_FOP_FLUSH_DENORM_64_2 = 0x0000006e, - TC_OP_ATOMIC_ADD_64 = 0x0000006f, - TC_OP_ATOMIC_SUB_64 = 0x00000070, - TC_OP_ATOMIC_SMIN_64 = 0x00000071, - TC_OP_ATOMIC_UMIN_64 = 0x00000072, - TC_OP_ATOMIC_SMAX_64 = 0x00000073, - TC_OP_ATOMIC_UMAX_64 = 0x00000074, - TC_OP_ATOMIC_AND_64 = 0x00000075, - TC_OP_ATOMIC_OR_64 = 0x00000076, - TC_OP_ATOMIC_XOR_64 = 0x00000077, - TC_OP_ATOMIC_INC_64 = 0x00000078, - TC_OP_ATOMIC_DEC_64 = 0x00000079, - TC_OP_WBINVL2_NC = 0x0000007a, - TC_OP_NOP_ACK = 0x0000007b, - TC_OP_RESERVED_NON_FLOAT_64_1 = 0x0000007c, - TC_OP_RESERVED_NON_FLOAT_64_2 = 0x0000007d, - TC_OP_RESERVED_NON_FLOAT_64_3 = 0x0000007e, - TC_OP_RESERVED_NON_FLOAT_64_4 = 0x0000007f, - TC_OP_RESERVED_FOP_RTN_32_1__GFX09_10 = 0x00000005, - TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_1__GFX09_10 = 0x0000000d, - TC_OP_RESERVED_FOP_32_1__GFX09_10 = 0x00000045, - TC_OP_RESERVED_FOP_FLUSH_DENORM_32_1__GFX09_10 = 0x0000004d, - TC_OP_RESERVED_FADD_RTN_32__GFX11 = 0x00000005, - TC_OP_ATOMIC_FADD_FLUSH_DENORM_RTN_32__GFX11 = 0x0000000d, - TC_OP_RESERVED_FADD_32__GFX11 = 0x00000045, - TC_OP_ATOMIC_FADD_FLUSH_DENORM_32__GFX11 = 0x0000004d, -}; - -// Desc: Strucuture used to perform various atomic -// operations - add, subtract, increment, etc -struct AtomicTemplate { - PM4_MEC_ATOMIC_MEM atomic; -}; - -/// @brief PM4 command to write a 64-bit value into a memory -/// location accessible to Gpu -struct WriteDataTemplate { - PM4_MEC_WRITE_DATA write_data; -}; - -// ---------------------------------- MEC_COPY_DATA_src_sel_enum ---------------------------------- -enum MEC_COPY_DATA_src_sel_enum { - src_sel__mec_copy_data__mem_mapped_register = 0, - src_sel__mec_copy_data__tc_l2_obsolete = 1, - src_sel__mec_copy_data__tc_l2 = 2, - src_sel__mec_copy_data__gds = 3, - src_sel__mec_copy_data__perfcounters = 4, - src_sel__mec_copy_data__immediate_data = 5, - src_sel__mec_copy_data__atomic_return_data = 6, - src_sel__mec_copy_data__gds_atomic_return_data0 = 7, - src_sel__mec_copy_data__gds_atomic_return_data1 = 8, - src_sel__mec_copy_data__gpu_clock_count = 9, - src_sel__mec_copy_data__system_clock_count = 10, - src_sel__mec_copy_data__ext32perfcntr = 11, -}; - -// ---------------------------------- MEC_COPY_DATA_dst_sel_enum ---------------------------------- -enum MEC_COPY_DATA_dst_sel_enum { - dst_sel__mec_copy_data__mem_mapped_register = 0, - dst_sel__mec_copy_data__tc_l2 = 2, - dst_sel__mec_copy_data__gds = 3, - dst_sel__mec_copy_data__perfcounters = 4, - dst_sel__mec_copy_data__tc_l2_obsolete = 5, - dst_sel__mec_copy_data__mem_mapped_reg_dc = 6, - dst_sel__mec_copy_data__ext32perfcntr = 11, -}; - -// ------------------------------ MEC_COPY_DATA_src_cache_policy_enum ------------------------------ -enum MEC_COPY_DATA_src_cache_policy_enum { - src_cache_policy__mec_copy_data__lru = 0, - src_cache_policy__mec_copy_data__stream = 1, - src_cache_policy__mec_copy_data__noa = 2, - src_cache_policy__mec_copy_data__bypass = 3, -}; - -// --------------------------------- MEC_COPY_DATA_count_sel_enum --------------------------------- -enum MEC_COPY_DATA_count_sel_enum { - count_sel__mec_copy_data__32_bits_of_data = 0, - count_sel__mec_copy_data__64_bits_of_data = 1, -}; - -// --------------------------------- MEC_COPY_DATA_wr_confirm_enum --------------------------------- -enum MEC_COPY_DATA_wr_confirm_enum { - wr_confirm__mec_copy_data__do_not_wait_for_confirmation = 0, - wr_confirm__mec_copy_data__wait_for_confirmation = 1, -}; - -// ------------------------------ MEC_COPY_DATA_dst_cache_policy_enum ------------------------------ -enum MEC_COPY_DATA_dst_cache_policy_enum { - dst_cache_policy__mec_copy_data__lru = 0, - dst_cache_policy__mec_copy_data__stream = 1, - dst_cache_policy__mec_copy_data__noa = 2, - dst_cache_policy__mec_copy_data__bypass = 3, -}; - -// ------------------------------- MEC_COPY_DATA_pq_exe_status_enum ------------------------------- -enum MEC_COPY_DATA_pq_exe_status_enum { - pq_exe_status__mec_copy_data__default = 0, - pq_exe_status__mec_copy_data__phase_update = 1, -}; - -// ------------------------------- MEC_WRITE_DATA_dst_sel_enum ------------------------------- -enum MEC_WRITE_DATA_dst_sel_enum { - dst_sel__mec_write_data__mem_mapped_register = 0, - dst_sel__mec_write_data__tc_l2 = 2, - dst_sel__mec_write_data__gds = 3, - dst_sel__mec_write_data__memory = 5, - dst_sel__mec_write_data__memory_mapped_adc_persistent_state = 6 }; - -// ------------------------------- MEC_WRITE_DATA_addr_incr_enum ------------------------------- -enum MEC_WRITE_DATA_addr_incr_enum { - addr_incr__mec_write_data__increment_address = 0, - addr_incr__mec_write_data__do_not_increment_address = 1 }; - -// ------------------------------- MEC_WRITE_DATA_wr_confirm_enum ------------------------------- -enum MEC_WRITE_DATA_wr_confirm_enum { - wr_confirm__mec_write_data__do_not_wait_for_write_confirmation = 0, - wr_confirm__mec_write_data__wait_for_write_confirmation = 1 }; - -// ------------------------------- MEC_WRITE_DATA_cache_policy_enum ------------------------------- -enum MEC_WRITE_DATA_cache_policy_enum { - cache_policy__mec_write_data__lru = 0, - cache_policy__mec_write_data__stream = 1, - cache_policy__mec_write_data__noa = 2, - cache_policy__mec_write_data__bypass = 3 }; - -typedef struct PM4_MEC_COPY_DATA { - union { - PM4_MEC_TYPE_3_HEADER header; /// header - uint32_t ordinal1; - }; - union { - struct { - uint32_t src_sel : 4; - uint32_t reserved1 : 4; - uint32_t dst_sel : 4; - uint32_t reserved2 : 1; - uint32_t src_cache_policy : 2; - uint32_t reserved3 : 1; - uint32_t count_sel : 1; - uint32_t reserved4 : 3; - uint32_t wr_confirm : 1; - uint32_t reserved5 : 4; - uint32_t dst_cache_policy : 2; - uint32_t reserved6 : 2; - uint32_t pq_exe_status : 1; - uint32_t reserved7 : 2; - } bitfields2; - uint32_t ordinal2; - }; - union { - struct { - uint32_t src_reg_offset : 18; - uint32_t reserved8 : 14; - } bitfields3a; - struct { - uint32_t reserved9 : 2; - uint32_t src_32b_addr_lo : 30; - } bitfields3b; - struct { - uint32_t reserved10 : 3; - uint32_t src_64b_addr_lo : 29; - } bitfields3c; - struct { - uint32_t src_gds_addr_lo : 16; - uint32_t reserved11 : 16; - } bitfields3d; - uint32_t imm_data; - uint32_t ordinal3; - }; - union { - uint32_t src_memtc_addr_hi; - uint32_t src_imm_data; - uint32_t ordinal4; - }; - union { - struct { - uint32_t dst_reg_offset : 18; - uint32_t reserved12 : 14; - } bitfields5a; - struct { - uint32_t reserved13 : 2; - uint32_t dst_32b_addr_lo : 30; - } bitfields5b; - struct { - uint32_t reserved14 : 3; - uint32_t dst_64b_addr_lo : 29; - } bitfields5c; - struct { - uint32_t dst_gds_addr_lo : 16; - uint32_t reserved15 : 16; - } bitfields5d; - uint32_t ordinal5; - }; - uint32_t dst_addr_hi; -} PM4MEC_COPY_DATA; -namespace gfx9 { - -struct PM4_MEC_ACQUIRE_MEM { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - union { - struct { - uint32_t coher_cntl:31; - uint32_t reserved1:1; - } bitfields2; - uint32_t ordinal2; - }; - uint32_t coher_size; - union { - struct { - uint32_t coher_size_hi:8; - uint32_t reserved2:24; - } bitfields4; - uint32_t ordinal4; - }; - uint32_t coher_base_lo; - union { - struct { - uint32_t coher_base_hi:24; - uint32_t reserved3:8; - } bitfields6; - uint32_t ordinal6; - }; - union { - struct { - uint32_t poll_interval:16; - uint32_t reserved4:16; - } bitfields7; - uint32_t ordinal7; - }; -}; - -struct PM4_MEC_RELEASE_MEM { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - union { - struct { - uint32_t event_type:6; - uint32_t reserved1:2; - uint32_t event_index:4; - uint32_t tcl1_vol_action_ena:1; - uint32_t tc_vol_action_ena:1; - uint32_t reserved2:1; - uint32_t tc_wb_action_ena:1; - uint32_t tcl1_action_ena:1; - uint32_t tc_action_ena:1; - uint32_t reserved3:1; - uint32_t tc_nc_action_ena:1; - uint32_t tc_wc_action_ena:1; - uint32_t tc_md_action_ena:1; - uint32_t reserved4:3; - uint32_t cache_policy:2; - uint32_t reserved5:2; - uint32_t pq_exe_status:1; - uint32_t reserved6:2; - } bitfields2; - uint32_t ordinal2; - }; - union { - struct { - uint32_t reserved7:16; - uint32_t dst_sel:2; - uint32_t reserved8:6; - uint32_t int_sel:3; - uint32_t reserved9:2; - uint32_t data_sel:3; - } bitfields3; - uint32_t ordinal3; - }; - union { - struct { - uint32_t reserved10:2; - uint32_t address_lo_32b:30; - } bitfields4a; - struct { - uint32_t reserved11:3; - uint32_t address_lo_64b:29; - } bitfields4b; - uint32_t reserved12; - uint32_t ordinal4; - }; - union { - uint32_t address_hi; - uint32_t reserved13; - uint32_t ordinal5; - }; - union { - uint32_t data_lo; - uint32_t cmp_data_lo; - struct { - uint32_t dw_offset:16; - uint32_t num_dwords:16; - } bitfields6c; - uint32_t reserved14; - uint32_t ordinal6; - }; - union { - uint32_t data_hi; - uint32_t cmp_data_hi; - uint32_t reserved15; - uint32_t reserved16; - uint32_t ordinal7; - }; - uint32_t int_ctxid; -}; - -struct PM4_MEC_WAIT_REG_MEM64 { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - union { - struct { - uint32_t function:3; - uint32_t reserved1:1; - uint32_t mem_space:2; - uint32_t operation:2; - uint32_t reserved2:24; - } bitfields2; - uint32_t ordinal2; - }; - union { - struct { - uint32_t reserved3:3; - uint32_t mem_poll_addr_lo:29; - } bitfields3a; - struct { - uint32_t reg_poll_addr:18; - uint32_t reserved4:14; - } bitfields3b; - struct { - uint32_t reg_write_addr1:18; - uint32_t reserved5:14; - } bitfields3c; - uint32_t ordinal3; - }; - union { - uint32_t mem_poll_addr_hi; - struct { - uint32_t reg_write_addr2:18; - uint32_t reserved6:14; - } bitfields4b; - uint32_t ordinal4; - }; - uint32_t reference; - uint32_t reference_hi; - uint32_t mask; - uint32_t mask_hi; - union { - struct { - uint32_t poll_interval:16; - uint32_t reserved7:16; - } bitfields9; - uint32_t ordinal9; - }; -}; - -/// @brief Structure used to configure the flushing of -/// various caches - instruction, constants, L1 and L2 -struct AcquireMemTemplate { - PM4_MEC_ACQUIRE_MEM acquire_mem; -}; - -struct EndofKernelNotifyTemplate { - PM4_MEC_RELEASE_MEM release_mem; -}; - -/// @brief PM4 command to wait for a certain event before proceeding -/// to process another command on the queue -struct WaitRegMem64Template { - PM4_MEC_WAIT_REG_MEM64 wait_reg_mem; -}; - -} // gfx9 namespace - -namespace gfx10 { - -struct PM4_MEC_ACQUIRE_MEM { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - uint32_t reserved1; - uint32_t coher_size; - union { - struct { - uint32_t coher_size_hi:8; - uint32_t reserved2:24; - } bitfields4; - uint32_t ordinal4; - }; - uint32_t coher_base_lo; - union { - struct { - uint32_t coher_base_hi:24; - uint32_t reserved3:8; - } bitfields6; - uint32_t ordinal6; - }; - union { - struct { - uint32_t poll_interval:16; - uint32_t reserved4:16; - } bitfields7; - uint32_t ordinal7; - }; - union { - struct { - uint32_t gcr_cntl:19; - uint32_t reserved4:13; - } bitfields8; - uint32_t ordinal8; - }; -}; - -struct PM4_MEC_RELEASE_MEM { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - union { - struct { - uint32_t event_type:6; - uint32_t reserved1:2; - uint32_t event_index:4; - uint32_t gcr_cntl:12; - uint32_t reserved2:1; - uint32_t cache_policy:2; - uint32_t reserved3:2; - uint32_t pq_exe_status:1; - uint32_t reserved4:2; - } bitfields2; - uint32_t ordinal2; - }; - union { - struct { - uint32_t reserved7:16; - uint32_t dst_sel:2; - uint32_t reserved8:2; - uint32_t mes_intr_pipe:2; - uint32_t mes_action_id:2; - uint32_t int_sel:3; - uint32_t reserved9:2; - uint32_t data_sel:3; - } bitfields3; - uint32_t ordinal3; - }; - union { - struct { - uint32_t reserved10:2; - uint32_t address_lo_32b:30; - } bitfields4a; - struct { - uint32_t reserved11:3; - uint32_t address_lo_64b:29; - } bitfields4b; - uint32_t reserved12; - uint32_t ordinal4; - }; - union { - uint32_t address_hi; - uint32_t reserved13; - uint32_t ordinal5; - }; - union { - uint32_t data_lo; - uint32_t cmp_data_lo; - struct { - uint32_t dw_offset:16; - uint32_t num_dwords:16; - } bitfields6c; - uint32_t reserved14; - uint32_t ordinal6; - }; - union { - uint32_t data_hi; - uint32_t cmp_data_hi; - uint32_t reserved15; - uint32_t reserved16; - uint32_t ordinal7; - }; - uint32_t int_ctxid; -}; - -struct PM4_MEC_WAIT_REG_MEM64 { - union { - PM4_MEC_TYPE_3_HEADER header; ///header - uint32_t ordinal1; - }; - union { - struct { - uint32_t function:3; - uint32_t reserved1:1; - uint32_t mem_space:2; - uint32_t operation:2; - uint32_t reserved2:14; - uint32_t mes_intr_pipe:2; - uint32_t mes_action:1; - uint32_t cache_policy:2; - uint32_t reserved3:5; - } bitfields2; - uint32_t ordinal2; - }; - union { - struct { - uint32_t reserved4:3; - uint32_t mem_poll_addr_lo:29; - } bitfields3a; - struct { - uint32_t reg_poll_addr:18; - uint32_t reserved5:14; - } bitfields3b; - struct { - uint32_t reg_write_addr1:18; - uint32_t reserved6:14; - } bitfields3c; - uint32_t ordinal3; - }; - union { - uint32_t mem_poll_addr_hi; - struct { - uint32_t reg_write_addr2:18; - uint32_t reserved7:14; - } bitfields4b; - uint32_t ordinal4; - }; - uint32_t reference; - uint32_t reference_hi; - uint32_t mask; - uint32_t mask_hi; - union { - struct { - uint32_t poll_interval:16; - uint32_t reserved8:15; - uint32_t optimize_ace_offload_mode:1; - } bitfields9; - uint32_t ordinal9; - }; -}; - -/// @brief Structure used to configure the flushing of -/// various caches - instruction, constants, L1 and L2 -struct AcquireMemTemplate { - PM4_MEC_ACQUIRE_MEM acquire_mem; -}; - -struct EndofKernelNotifyTemplate { - PM4_MEC_RELEASE_MEM release_mem; -}; - -struct WaitRegMem64Template { - PM4_MEC_WAIT_REG_MEM64 wait_reg_mem; -}; - -} // gfx10 namespace - -namespace gfx11 { - -struct PM4_MEC_RELEASE_MEM { - union { - PM4_MEC_TYPE_3_HEADER header; - uint32_t ordinal1; - }; - union { - struct { - uint32_t event_type:6; - uint32_t reserved1:2; - uint32_t event_index:4; - uint32_t gcr_cntl:13; - uint32_t cache_policy:2; - uint32_t reserved2:1; - uint32_t pq_exe_status:1; - uint32_t reserved3:1; - uint32_t glk_inv:1; - uint32_t reserved4:1; - } bitfields2; - uint32_t ordinal2; - }; - union { - struct { - uint32_t reserved5:16; - uint32_t dst_sel:2; - uint32_t reserved6:2; - uint32_t mes_intr_pipe:2; - uint32_t mes_action_id:2; - uint32_t int_sel:3; - uint32_t reserved7:2; - uint32_t data_sel:3; - } bitfields3; - uint32_t ordinal3; - }; - union { - struct { - uint32_t reserved8:2; - uint32_t address_lo_32b:30; - } bitfields4a; - struct { - uint32_t reserved9:3; - uint32_t address_lo_64b:29; - } bitfields4b; - uint32_t reserved10; - uint32_t ordinal4; - }; - union { - uint32_t address_hi; - uint32_t reserved11; - uint32_t ordinal5; - }; - union { - uint32_t data_lo; - uint32_t cmp_data_lo; - struct { - uint32_t dw_offset:16; - uint32_t num_dwords:16; - } bitfields6c; - uint32_t reserved12; - uint32_t ordinal6; - }; - union { - uint32_t data_hi; - uint32_t cmp_data_hi; - uint32_t reserved13; - uint32_t reserved14; - uint32_t ordinal7; - }; - uint32_t int_ctxid; -}; - -struct EndofKernelNotifyTemplate { - PM4_MEC_RELEASE_MEM release_mem; -}; - -} // gfx11 namespace - -#endif diff --git a/inc/registers.h b/inc/registers.h deleted file mode 100644 index 4d430b41e4..0000000000 --- a/inc/registers.h +++ /dev/null @@ -1,363 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -// This file is used only for open source cmake builds, if we hardcode the -// register values in amd_aql_queue.cpp then this file won't be required. For -// now we are using this file where register details are spelled out in the -// structs/unions below. -#ifndef _WSL_INC_REGISTERS_H_ -#define _WSL_INC_REGISTERS_H_ - -typedef enum SQ_RSRC_BUF_TYPE { -SQ_RSRC_BUF = 0x00000000, -SQ_RSRC_BUF_RSVD_1 = 0x00000001, -SQ_RSRC_BUF_RSVD_2 = 0x00000002, -SQ_RSRC_BUF_RSVD_3 = 0x00000003, -} SQ_RSRC_BUF_TYPE; - -typedef enum BUF_DATA_FORMAT { -BUF_DATA_FORMAT_INVALID = 0x00000000, -BUF_DATA_FORMAT_8 = 0x00000001, -BUF_DATA_FORMAT_16 = 0x00000002, -BUF_DATA_FORMAT_8_8 = 0x00000003, -BUF_DATA_FORMAT_32 = 0x00000004, -BUF_DATA_FORMAT_16_16 = 0x00000005, -BUF_DATA_FORMAT_10_11_11 = 0x00000006, -BUF_DATA_FORMAT_11_11_10 = 0x00000007, -BUF_DATA_FORMAT_10_10_10_2 = 0x00000008, -BUF_DATA_FORMAT_2_10_10_10 = 0x00000009, -BUF_DATA_FORMAT_8_8_8_8 = 0x0000000a, -BUF_DATA_FORMAT_32_32 = 0x0000000b, -BUF_DATA_FORMAT_16_16_16_16 = 0x0000000c, -BUF_DATA_FORMAT_32_32_32 = 0x0000000d, -BUF_DATA_FORMAT_32_32_32_32 = 0x0000000e, -BUF_DATA_FORMAT_RESERVED_15 = 0x0000000f, -} BUF_DATA_FORMAT; - -typedef enum BUF_NUM_FORMAT { -BUF_NUM_FORMAT_UNORM = 0x00000000, -BUF_NUM_FORMAT_SNORM = 0x00000001, -BUF_NUM_FORMAT_USCALED = 0x00000002, -BUF_NUM_FORMAT_SSCALED = 0x00000003, -BUF_NUM_FORMAT_UINT = 0x00000004, -BUF_NUM_FORMAT_SINT = 0x00000005, -BUF_NUM_FORMAT_SNORM_OGL__SI__CI = 0x00000006, -BUF_NUM_FORMAT_RESERVED_6__VI = 0x00000006, -BUF_NUM_FORMAT_FLOAT = 0x00000007, -} BUF_NUM_FORMAT; - -typedef enum BUF_FORMAT { -BUF_FORMAT_32_UINT = 0x00000014, -} BUF_FORMAT; - -typedef enum SQ_SEL_XYZW01 { -SQ_SEL_0 = 0x00000000, -SQ_SEL_1 = 0x00000001, -SQ_SEL_RESERVED_0 = 0x00000002, -SQ_SEL_RESERVED_1 = 0x00000003, -SQ_SEL_X = 0x00000004, -SQ_SEL_Y = 0x00000005, -SQ_SEL_Z = 0x00000006, -SQ_SEL_W = 0x00000007, -} SQ_SEL_XYZW01; - - union COMPUTE_TMPRING_SIZE { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int WAVES : 12; - unsigned int WAVESIZE : 13; - unsigned int : 7; -#elif defined(BIGENDIAN_CPU) - unsigned int : 7; - unsigned int WAVESIZE : 13; - unsigned int WAVES : 12; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - union COMPUTE_TMPRING_SIZE_GFX11 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int WAVES : 12; - unsigned int WAVESIZE : 15; - unsigned int : 5; -#elif defined(BIGENDIAN_CPU) - unsigned int : 5; - unsigned int WAVESIZE : 15; - unsigned int WAVES : 12; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - union COMPUTE_TMPRING_SIZE_GFX12 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int WAVES : 12; - unsigned int WAVESIZE : 18; - unsigned int : 2; -#elif defined(BIGENDIAN_CPU) - unsigned int : 2; - unsigned int WAVESIZE : 18; - unsigned int WAVES : 12; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - union SQ_BUF_RSRC_WORD0 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int BASE_ADDRESS : 32; -#elif defined(BIGENDIAN_CPU) - unsigned int BASE_ADDRESS : 32; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - - union SQ_BUF_RSRC_WORD1 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int BASE_ADDRESS_HI : 16; - unsigned int STRIDE : 14; - unsigned int CACHE_SWIZZLE : 1; - unsigned int SWIZZLE_ENABLE : 1; -#elif defined(BIGENDIAN_CPU) - unsigned int SWIZZLE_ENABLE : 1; - unsigned int CACHE_SWIZZLE : 1; - unsigned int STRIDE : 14; - unsigned int BASE_ADDRESS_HI : 16; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - union SQ_BUF_RSRC_WORD1_GFX11 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int BASE_ADDRESS_HI : 16; - unsigned int STRIDE : 14; - unsigned int SWIZZLE_ENABLE : 2; -#elif defined(BIGENDIAN_CPU) - unsigned int SWIZZLE_ENABLE : 2; - unsigned int STRIDE : 14; - unsigned int BASE_ADDRESS_HI : 16; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - - union SQ_BUF_RSRC_WORD2 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int NUM_RECORDS : 32; -#elif defined(BIGENDIAN_CPU) - unsigned int NUM_RECORDS : 32; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - - union SQ_BUF_RSRC_WORD3 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int DST_SEL_X : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_W : 3; - unsigned int NUM_FORMAT : 3; - unsigned int DATA_FORMAT : 4; - unsigned int ELEMENT_SIZE : 2; - unsigned int INDEX_STRIDE : 2; - unsigned int ADD_TID_ENABLE : 1; - unsigned int ATC__CI__VI : 1; - unsigned int HASH_ENABLE : 1; - unsigned int HEAP : 1; - unsigned int MTYPE__CI__VI : 3; - unsigned int TYPE : 2; -#elif defined(BIGENDIAN_CPU) - unsigned int TYPE : 2; - unsigned int MTYPE__CI__VI : 3; - unsigned int HEAP : 1; - unsigned int HASH_ENABLE : 1; - unsigned int ATC__CI__VI : 1; - unsigned int ADD_TID_ENABLE : 1; - unsigned int INDEX_STRIDE : 2; - unsigned int ELEMENT_SIZE : 2; - unsigned int DATA_FORMAT : 4; - unsigned int NUM_FORMAT : 3; - unsigned int DST_SEL_W : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_X : 3; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - union SQ_BUF_RSRC_WORD3_GFX10 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int DST_SEL_X : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_W : 3; - unsigned int FORMAT : 7; - unsigned int RESERVED1 : 2; - unsigned int INDEX_STRIDE : 2; - unsigned int ADD_TID_ENABLE : 1; - unsigned int RESOURCE_LEVEL : 1; - unsigned int RESERVED2 : 3; - unsigned int OOB_SELECT : 2; - unsigned int TYPE : 2; -#elif defined(BIGENDIAN_CPU) - unsigned int TYPE : 2; - unsigned int OOB_SELECT : 2; - unsigned int RESERVED2 : 3; - unsigned int RESOURCE_LEVEL : 1; - unsigned int ADD_TID_ENABLE : 1; - unsigned int INDEX_STRIDE : 2; - unsigned int RESERVED1 : 2; - unsigned int FORMAT : 7; - unsigned int DST_SEL_W : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_X : 3; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - - // From V# Table - union SQ_BUF_RSRC_WORD3_GFX11 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int DST_SEL_X : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_W : 3; - unsigned int FORMAT : 6; - unsigned int RESERVED1 : 3; - unsigned int INDEX_STRIDE : 2; - unsigned int ADD_TID_ENABLE : 1; - unsigned int RESERVED2 : 4; - unsigned int OOB_SELECT : 2; - unsigned int TYPE : 2; -#elif defined(BIGENDIAN_CPU) - unsigned int TYPE : 2; - unsigned int OOB_SELECT : 2; - unsigned int RESERVED2 : 4; - unsigned int ADD_TID_ENABLE : 1; - unsigned int INDEX_STRIDE : 2; - unsigned int RESERVED1 : 3; - unsigned int FORMAT : 6; - unsigned int DST_SEL_W : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_X : 3; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; - // From V# Table - union SQ_BUF_RSRC_WORD3_GFX12 { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int DST_SEL_X : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_W : 3; - unsigned int FORMAT : 6; - unsigned int RESERVED1 : 3; - unsigned int INDEX_STRIDE : 2; - unsigned int ADD_TID_ENABLE : 1; - unsigned int WRITE_COMPRESS_ENABLE : 1; - unsigned int COMPRESSION_EN : 1; - unsigned int COMPRESSION_ACCESS_MODE : 2; - unsigned int OOB_SELECT : 2; - unsigned int TYPE : 2; -#elif defined(BIGENDIAN_CPU) - unsigned int TYPE : 2; - unsigned int OOB_SELECT : 2; - unsigned int COMPRESSION_ACCESS_MODE : 2; - unsigned int COMPRESSION_EN : 1; - unsigned int WRITE_COMPRESS_ENABLE : 1; - unsigned int ADD_TID_ENABLE : 1; - unsigned int INDEX_STRIDE : 2; - unsigned int RESERVED1 : 3; - unsigned int FORMAT : 6; - unsigned int DST_SEL_W : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_X : 3; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; - }; -#endif // header guard diff --git a/inc/thunk_proxy/thunk_proxy.h b/inc/thunk_proxy/thunk_proxy.h deleted file mode 100644 index fcdb5ea4de..0000000000 --- a/inc/thunk_proxy/thunk_proxy.h +++ /dev/null @@ -1,128 +0,0 @@ -#ifndef _WSL_INC_THUNK_PROXY_H_ -#define _WSL_INC_THUNK_PROXY_H_ - -#include - -namespace thunk_proxy { -enum AllocDomain { - kSystem, - kLocal, - kUserMemory, - kUserQueue, - kDomainCount, -}; - -enum MemFlag { - kFineGrain = (1ULL << 0), - kKernarg = (1ULL << 1), -}; - -enum EngineFlag { - KCOMPUTE0 = (1ULL << 0), - KDRMDMA = (1ULL << 1), - KDRMDMA1 = (1ULL << 2), -}; - -enum SchedLevel { - kLow = 0, - kNormal = 1, - kHigh = 2, -}; - -enum AsicFamilyType { - kPlumBONITO, - kNavi44, - kNavi48 -}; - -struct HwsInfo { - union { - struct { - uint32_t gfxHwsEnabled : 1; - uint32_t computeHwsEnabled : 1; - uint32_t dmaHwsEnabled : 1; - uint32_t dma1HwsEnabled : 1; - uint32_t reserved : 28; - } hwsMask; - uint32_t osHwsEnableFlags; - }; - uint64_t engineOrdinalMask; // Indicates which engines (by ordinal) support MES HWS -}; - -typedef struct { - int major; - int minor; - int stepping; - bool is_dgpu; - char product_name[MAX_PATH]; - const char *uuid; - AsicFamilyType family; - uint32_t device_id; - uint32_t wavefront_size; - uint32_t compute_unit_count; - uint32_t max_engine_clock_mhz; - uint32_t watch_points_num; - uint32_t pci_bus_addr; - uint32_t memory_bus_width; - uint32_t max_memory_clock_mhz; - uint64_t gpu_counter_frequency; - uint32_t wave_per_cu; - uint32_t simd_per_cu; - uint32_t max_scratch_slots_per_cu; - uint32_t num_shader_engine; - uint32_t shader_array_per_shader_engine; - uint32_t domain; - uint32_t num_gws; - uint32_t asic_revision; - uint64_t local_visible_heap_size; - uint64_t local_invisible_heap_size; - uint64_t private_aperture_base; - uint64_t private_aperture_size; - uint64_t shared_aperture_base; - uint64_t shared_aperture_size; - uint32_t user_queue_size; - uint32_t lds_size; - uint32_t big_page_alignment_size; - uint32_t hw_big_page_min_alignment_size; - uint32_t hw_big_page_alignment_size; - bool enable_big_page_alignment; - uint32_t mec_fw_version; - uint32_t sdma_fw_version; - uint32_t l1_cache_size; - uint32_t l2_cache_size; - uint32_t l3_cache_size; - uint32_t gl2_cacheline_size; - uint32_t num_cp_queues; - HwsInfo hwsInfo; - std::vector sdma_schedid; - uint32_t compute_schedid; - bool state_shadowing_by_cpfw; - bool platform_atomic_support; - void *adapter_info; - void *adapter_ex_info; -} DeviceInfo; - -int EngineOrdinal(int engine, DeviceInfo *device_info); -bool GetHwsEnabled(int engine, DeviceInfo *device_info); -bool ShouldDisableGpuTimeout(int engine, DeviceInfo *device_info); -bool ParseAdapterInfo(D3DKMT_HANDLE adapter, DeviceInfo *device_info); -bool QueryAdapterSupported(D3DKMT_HANDLE adapter); - -uint32_t QueueEngine2EngineFlag(uint32_t queue_engine); -void SetAllocationInfo(void *data, uint64_t size, AllocDomain domain, - uint64_t addr, uint32_t mem_flags, uint32_t engine_flag, const DeviceInfo &device_info); -bool CreatePrivateAllocInfo(int num_handles, void **ppdrv_priv, void **ppalloc_priv, - int *pdrv_priv_data_size, int *palloc_priv_data_size); -void DestroyPrivateAllocInfo(void *drv_priv, void *alloc_priv); - -int CreateSubmitPrivData(void **priv_data, D3DKMT_HANDLE queue, uint64_t command_addr, - uint64_t command_size, bool is_hw_queue); -int CreateHwQueuePrivData(void **priv_data, D3DKMT_HANDLE context, - bool FwManagedGfxState, SchedLevel level = kNormal); -int CreateContextPrivData(void **priv_data, bool FwManagedGfxState); -int CreatePowerOptPrivData(void **priv_data, bool restore); -int CreateCalibratedTimestampsPrivData(void **priv_data); -void QueryCalibratedTimestamps(void* priv, uint64_t* gpu, uint64_t* cpu); -void DestroyPrivData(void *priv_data); -} -#endif diff --git a/inc/thunk_proxy/wddm_types.h b/inc/thunk_proxy/wddm_types.h deleted file mode 100644 index 83422a83d3..0000000000 --- a/inc/thunk_proxy/wddm_types.h +++ /dev/null @@ -1,155 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef _WSL_INC_THUNK_PROXY_WDDM_TYPES_H_ -#define _WSL_INC_THUNK_PROXY_WDDM_TYPES_H_ - -#include - -#include - -typedef uint32_t UINT, *UINT_PTR; -typedef int32_t INT32; -typedef int32_t LONG; -typedef uint32_t ULONG, *ULONG_PTR; -typedef int64_t LONGLONG; -typedef int64_t LONG64; -typedef uint64_t ULONGLONG; -typedef uint64_t ULONG64, *ULONG64_PTR; -typedef uint8_t BYTE; -typedef uint16_t WORD; -typedef uint32_t DWORD; -typedef int32_t BOOL; -typedef int32_t NTSTATUS; -typedef uint16_t USHORT; -typedef uint16_t UINT16; -typedef uint32_t UINT32; -typedef uint64_t UINT64; -typedef int32_t INT; -typedef uint64_t SIZE_T; -typedef void VOID; -typedef float FLOAT; -typedef char CHAR; -typedef unsigned char UCHAR; -typedef UCHAR BOOLEAN; -typedef int16_t WCHAR; -typedef void *HANDLE; -typedef void *PVOID; -typedef void *LPVOID; -typedef const int16_t *PCWSTR; - -#define ULONG ULONG -#define ULONG_PTR ULONG_PTR -#define USHORT USHORT - -#define DECLARE_HANDLE(name) struct name##__{int unused;}; typedef struct name##__ *name -#define C_ASSERT(e) typedef char __C_ASSERT__[(e)?1:-1] - -DECLARE_HANDLE(HWND); -DECLARE_HANDLE(HDC); -DECLARE_HANDLE(PALETTEENTRY); - -typedef struct tagPOINT { - LONG x; - LONG y; -} POINT; - -typedef struct tagRECT { - LONG left; - LONG top; - LONG right; - LONG bottom; -} RECT; - -typedef struct tagRECTL { - LONG left; - LONG top; - LONG right; - LONG bottom; -} RECTL; - -typedef union _LARGE_INTEGER { - struct { - DWORD LowPart; - DWORD HighPart; - } u; - LONGLONG QuadPart; -} LARGE_INTEGER; - -typedef LARGE_INTEGER *PLARGE_INTEGER; - -typedef struct _LUID { - ULONG LowPart; - LONG HighPart; -} LUID, *PLUID; - -typedef enum _DEVICE_POWER_STATE { - PowerDeviceUnspecified = 0, - PowerDeviceD0, - PowerDeviceD1, - PowerDeviceD2, - PowerDeviceD3, - PowerDeviceMaximum -} DEVICE_POWER_STATE, *PDEVICE_POWER_STATE; - -#define _Check_return_ -#define APIENTRY -#define CONST const -#define IN -#define OUT -#define FAR -#define MAX_PATH 260 -#define __stdcall - -#ifndef GUID_DEFINED -#define GUID_DEFINED -typedef struct _GUID { - uint32_t Data1; - uint16_t Data2; - uint16_t Data3; - uint8_t Data4[ 8 ]; -} GUID; -#endif - -#include - -#endif diff --git a/inc/wddm/cmd_util.h b/inc/wddm/cmd_util.h deleted file mode 100644 index 7ea0d80d3a..0000000000 --- a/inc/wddm/cmd_util.h +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. */ - -#ifndef _WSL_INC_WDDM_CMD_UTIL_H_ -#define _WSL_INC_WDDM_CMD_UTIL_H_ - -#include -#include "hsa-runtime/inc/hsa.h" -#include "hsa-runtime/inc/amd_hsa_queue.h" -#include "hsa-runtime/inc/amd_hsa_kernel_code.h" -#include "inc/pm4_cmds.h" -#include "util/utils.h" -#include "libhsakmt.h" - -namespace wsl { -namespace thunk { - -struct DispatchInfo { - uint8_t major; - hsa_kernel_dispatch_packet_t *pPacket; - void *pEntry; - const amd_kernel_code_t *pKernelObject; - uint32_t ldsBlks; - amd_queue_t *pAmdQueue; - bool wave32; - uint32_t srd; - void *pScratchBase; - uint32_t scratchSizePerWave; - uint32_t scratchBaseOffset[2]; - uint32_t offsetCnt; -}; - -class CmdUtil { -public: - CmdUtil() {}; - ~CmdUtil() {}; - - size_t BuildCopyData( - uint64_t *pDstAddr, - void *pBuffer, - uint32_t dstSel = dst_sel__mec_copy_data__tc_l2, - uint32_t dstCachePolicy = dst_cache_policy__mec_copy_data__stream, - uint32_t srcSel = src_sel__mec_copy_data__gpu_clock_count, - uint32_t srcCachePolicy = src_cache_policy__mec_copy_data__lru, - uint32_t countSel = count_sel__mec_copy_data__64_bits_of_data, - uint32_t wrConfirm = wr_confirm__mec_copy_data__wait_for_confirmation); - - size_t BuildBarrier( - void *pBuffer, - uint32_t eventIndex = event_index__mec_event_write__cs_partial_flush, - uint32_t eventType = CS_PARTIAL_FLUSH); - - size_t BuildWriteData64Command( - void *pBuffer, - uint64_t* write_addr, - uint64_t write_value); - - size_t BuildAcquireMem( - uint8_t major, - void *pBuffer); - - size_t BuildScratch( - void *pScratchBase, - void *pBuffer); - - size_t BuildComputeShaderParams( - void *pBuffer); - - size_t BuildDispatch( - struct DispatchInfo *pInfo, - void *pBuffer); - - size_t BuildAtomicMem( - uint64_t *pAddr, - uint32_t atomic, - void *pBuffer, - uint32_t cachePolicy = cache_policy__mec_atomic_mem__stream, - uint64_t srcData = 1); -}; - -} // namespace thunk -} // namespace wsl - -#endif \ No newline at end of file diff --git a/inc/wddm/device.h b/inc/wddm/device.h deleted file mode 100644 index 98a759adf3..0000000000 --- a/inc/wddm/device.h +++ /dev/null @@ -1,257 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef _WSL_INC_WDDM_DEVICE_H_ -#define _WSL_INC_WDDM_DEVICE_H_ - -#include -#include - -#include -#include -#include - -#include "inc/wddm/types.h" -#include "inc/thunk_proxy/thunk_proxy.h" -#include "inc/wddm/va_mgr.h" -#include "inc/wddm/status.h" -#include "inc/wddm/types.h" -#include "inc/wddm/gpu_memory.h" -#include "inc/wddm/cmd_util.h" - -namespace wsl { -namespace thunk { - -//class Queue; -class WDDMQueue; - -// WSL2 hyperv GPADL protocol limitation -#define MAX_USERPTR_BLOCK_SIZE 0xf0000000 -#define START_NON_CANONICAL_ADDR (1ULL << 47) -#define END_NON_CANONICAL_ADDR (~0UL - (1UL << 47)) -#define IS_OVERLAPPING(start1, size1, start2, size2) \ - ((start1 < (start2 + size2)) && (start2 < (start1 + size1))) - -class WDDMDevice { -public: - static constexpr size_t GpuMemoryChunkSize = 2 * (1ULL << 30); // 2 GB - - WDDMDevice(D3DKMT_HANDLE adapter, LUID adapter_luid); - ~WDDMDevice(); - - int Major() { return device_info_.major; } - int Minor() { return device_info_.minor; } - int Stepping() { return device_info_.stepping; } - bool IsDgpu() { return device_info_.is_dgpu; } - const char *ProductName() { return device_info_.product_name; } - const char *Uuid() { return device_info_.uuid; } - thunk_proxy::AsicFamilyType GfxFamily() { return device_info_.family; } - uint32_t DeviceId() { return device_info_.device_id; } - uint32_t WavefrontSize() { return device_info_.wavefront_size; } - uint32_t ComputeUnitCount() { return device_info_.compute_unit_count; } - uint32_t MaxEngineClockMhz() { return device_info_.max_engine_clock_mhz; } - uint32_t WatchPointsNum() { return device_info_.watch_points_num; } - uint32_t PciBusAddr() { return device_info_.pci_bus_addr; } - - uint32_t MemoryBusWidth() { return device_info_.memory_bus_width; } - uint32_t MaxMemoryClockMhz() { return device_info_.max_memory_clock_mhz; } - uint32_t WavePerCu() { return device_info_.wave_per_cu; } - uint32_t SimdPerCu() { return device_info_.simd_per_cu; } - uint32_t MaxScratchSlotsPerCu() { return device_info_.max_scratch_slots_per_cu; } - uint32_t NumShaderEngine() { return device_info_.num_shader_engine; } - uint32_t ShaderArrayPerShaderEngine() { return device_info_.shader_array_per_shader_engine; } - uint32_t NumSdmaEngine() { return device_info_.sdma_schedid.size(); } - uint32_t Domain() { return device_info_.domain; } - uint32_t NumGws() { return device_info_.num_gws; } - uint32_t AsicRevision() { return device_info_.asic_revision; } - uint64_t LocalHeapSize() { return device_info_.local_visible_heap_size + device_info_.local_invisible_heap_size; } - uint64_t LocalVisibleHeapSize() { return device_info_.local_visible_heap_size; } - uint64_t LocalInvisibleHeapSize() { return device_info_.local_invisible_heap_size; } - uint64_t PrivateApertureBase() { return device_info_.private_aperture_base; } - uint64_t PrivateApertureSize() { return device_info_.private_aperture_size; } - uint64_t SharedApertureBase() { return device_info_.shared_aperture_base; } - uint64_t SharedApertureSize() { return device_info_.shared_aperture_size; } - uint32_t LdsSize() { return device_info_.lds_size; } - uint64_t GPUCounterFrequency() { return device_info_.gpu_counter_frequency; } - uint32_t GetSwsQueueSize(void) const { return device_info_.user_queue_size; } - uint32_t GetMecFwVersion() { return device_info_.mec_fw_version; } - uint32_t GetSdmaFwVersion() { return device_info_.sdma_fw_version; } - uint32_t GetL1CacheSize() { return device_info_.l1_cache_size; } - uint32_t GetL2CacheSize() { return device_info_.l2_cache_size; } - uint32_t GetL3CacheSize() { return device_info_.l3_cache_size; } - uint32_t Gl2CacheLineSize() { return device_info_.gl2_cacheline_size; } - bool SupportStateShadowingByCpFw(void) const { return device_info_.state_shadowing_by_cpfw; } - bool SupportPlatformAtomic(void) const { return device_info_.platform_atomic_support; } - uint32_t GetSdmaEngine(uint32_t idx) { - assert(idx < NumSdmaEngine()); - return device_info_.sdma_schedid[idx]; - } - uint32_t GetComputeEngine() { return device_info_.compute_schedid; } - - uint64_t VramAvail(); - - void GetClockCounters(uint64_t *gpu, uint64_t *cpu); - uint32_t GetNumCpQueues() { return device_info_.num_cp_queues; } - - bool CreateSyncobj(D3DKMT_HANDLE *handle, uint64_t **addr); - void DestroySyncobj(D3DKMT_HANDLE handle); - - bool CreateQueue(WDDMQueue *queue); - void DestroyQueue(WDDMQueue *queue); - bool CreateHwQueue(WDDMQueue *queue); - bool DestroyHwQueue(WDDMQueue *queue); - bool SubmitToSwQueue(WDDMQueue *queue, uint64_t command_addr, - uint64_t command_size, uint64_t fence_value); - bool SubmitToHwQueue(WDDMQueue *queue, uint64_t command_addr, - uint64_t command_size, uint64_t fence_value); - - bool WaitPagingFence(WDDMQueue *queue) { - uint64_t value = page_fence_value_; - - if (*page_fence_addr_ < value && - !GpuWait(queue, &page_syncobj_, &value, 1)) - return false; - - return true; - } - - bool GpuWait(WDDMQueue *queue, const D3DKMT_HANDLE *syncobjs, - uint64_t *values, int count); - bool GpuSignal(D3DKMT_HANDLE context, const D3DKMT_HANDLE *syncobjs, - uint64_t *value, int count); - bool CpuWait(const D3DKMT_HANDLE *syncobjs, uint64_t *value, - int count, bool wait_any); - bool WaitOnPagingFenceFromCpu(); - - uint32_t LdsBlocks(const hsa_kernel_dispatch_packet_t *pkt); - uint32_t GetCmdbufSize(void) const { return cmdbuf_size_; } - uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size_; } - static uint32_t GetAqlFrameNum(void) { return cmdbuf_aql_frame_num_; } - - // Both legacy HWS and stage 1 HWS use KMD to alloc use queue memory, - // return false by default - bool AllocUserQueueMemFromUMD(void) const { return false; } - - bool IsHwsEnabled(int engine) { - return thunk_proxy::GetHwsEnabled(engine, &device_info_); - } - - void UpdatePageFence(uint64_t fence_value); - - D3DKMT_HANDLE PagingQueue() const { return page_queue_; } - D3DKMT_HANDLE PagingFence() const { return page_syncobj_; } - D3DKMT_HANDLE DeviceHandle() const { return device_; } - LUID GetLuid() const { return adapter_luid_; } - - const thunk_proxy::DeviceInfo& DeviceInfo() const { return device_info_; } - - ErrorCode ReserveGpuVirtualAddress(thunk_proxy::AllocDomain domain, - gpusize hit_base_addr, - gpusize size, - gpusize *out_gpu_virtual_addr, - gpusize alignment, - bool lock=false); - - ErrorCode FreeGpuVirtualAddress(thunk_proxy::AllocDomain domain, - gpusize base_addr, - gpusize size); - - ErrorCode CreateGpuMemory(const GpuMemoryCreateInfo &create_info, GpuMemory **gpu_mem); - ErrorCode HandleApertureAlloc(gpusize size, gpusize *out_gpu_virt_addr); - void HandleApertureFree(gpusize gpu_addr); - -private: - bool ParseDeviceInfo(void); - void DestroyDeviceInfo(void); - bool CreateDevice(void); - bool DestroyDevice(void); - bool CreatePagingQueue(void); - bool DestroyPagingQueue(void); - void *Lock(D3DKMT_HANDLE handle); - bool Unlock(D3DKMT_HANDLE handle); - bool CreateContext(int engine, D3DKMT_HANDLE *handle); - bool DestroyContext(D3DKMT_HANDLE handle); - - void SetPowerOptimization(bool restore); - void InitCmdbufInfo(void); - bool ReserveSystemHeapSpace(void); - bool FreeSystemHeapSpace(void); - bool ReserveLocalHeapSpace(void); - bool InitHandleApertureSpace(void); - bool CommitSystemHeapSpace(void* addr, int64_t size, bool lock=false); - bool DecommitSystemHeapSpace(void* addr, int64_t size); - bool FreeLocalHeapSpace(void); - void InitVaMgr(); - void InitHandleApertureMgr(); - - D3DKMT_HANDLE adapter_; - LUID adapter_luid_; - D3DKMT_HANDLE device_; - - D3DKMT_HANDLE page_queue_; - D3DKMT_HANDLE page_syncobj_; - uint64_t *page_fence_addr_; - std::atomic page_fence_value_; - - uint64_t handle_aperture_start_; - uint64_t handle_aperture_size_; - uint64_t local_heap_space_start_; - uint64_t local_heap_space_size_; - uint64_t system_heap_space_start_; - uint64_t system_heap_space_size_; - uint32_t cmdbuf_size_; - uint32_t cmdbuf_aql_frame_size_; - static const uint32_t cmdbuf_aql_frame_num_; - // device info - thunk_proxy::DeviceInfo device_info_; - - std::unique_ptr local_va_mgr_; - std::unique_ptr handle_aperture_mgr_; - //CmdUtil cmd_util; -}; - -NTSTATUS WDDMGetAdapters(D3DKMT_ADAPTERINFO *&adapters, int &num_adapters); - -} // namespace thunk -} // namespace wsl - -#endif diff --git a/inc/wddm/gpu_memory.h b/inc/wddm/gpu_memory.h deleted file mode 100644 index da0433c7bc..0000000000 --- a/inc/wddm/gpu_memory.h +++ /dev/null @@ -1,227 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef _WSL_INC_WDDM_GPU_MEMORY_H_ -#define _WSL_INC_WDDM_GPU_MEMORY_H_ - -#include -#include -#include "util/utils.h" -#include "inc/wddm/types.h" -#include "inc/wddm/thunks.h" -#include "inc/thunk_proxy/thunk_proxy.h" - -namespace wsl { -namespace thunk { - -class WDDMDevice; - -union GpuMemoryCreateFlags { - struct { - uint64_t virtual_alloc : 1; // only allocate virtual address, without physical buffer - uint64_t physical_only : 1; // only allocate physical buffer, without virutal address - uint64_t interprocess : 1; // physical buffer need share info between exporter and importer - uint64_t locked : 1; // lock virtual address space into RAM, preventing that memory from being paged to the swap area - uint64_t physical_contiguous : 1; // contiguous physical pages - uint64_t unused : 59; - }; - uint64_t reserved; -}; - -struct GpuMemoryCreateInfo { - GpuMemoryCreateInfo() { - flags.reserved = 0; - domain = thunk_proxy::kLocal; - size = 0; - alignment = 0; - mem_flags = 0; - engine_flag = 0; - va_hint = 0; - user_ptr = nullptr; - dmabuf_fd = -1; - } - - GpuMemoryCreateFlags flags; - thunk_proxy::AllocDomain domain; - gpusize size; - gpusize alignment; - int mem_flags; - int engine_flag; - int dmabuf_fd; // Import from dmabuf - - void *user_ptr; - gpusize va_hint; -}; - -struct GpuMemoryDesc { - GpuMemoryDesc() { - gpu_addr = 0; - cpu_addr = nullptr; - client_size = 0; - size = alignment = 0; - flags.reserved = 0; - mem_flags = 0; - engine_flag = 0; - handle_ape_addr = 0; - } - - thunk_proxy::AllocDomain domain; - LUID adapter_luid; // Where is the backing store location - gpusize gpu_addr; - void *cpu_addr; - gpusize client_size; // user request size - gpusize size; - gpusize alignment; - gpusize handle_ape_addr; - - union { - struct { - uint32_t is_virtual : 1; - uint32_t is_shared : 1; - uint32_t is_external : 1; - uint32_t is_physical_only : 1; - uint32_t is_locked : 1; - uint32_t is_queue_referenced : 1; - uint32_t is_physical_contiguous : 1; - uint32_t unused : 25; - }; - - uint32_t reserved; - } flags; - - int mem_flags; - int engine_flag; -}; - -struct SharedHandleInfo { - thunk_proxy::AllocDomain domain; - LUID adapter_luid; - gpusize client_size; // user request size - uint64_t size; - uint32_t flags; - int mem_flags; -}; - -using GpuMemoryHandle = void *; - -class GpuMemory { -public: - static size_t CalcChunkNumbers(gpusize size); - - ErrorCode Init(const GpuMemoryCreateInfo &create_info); - - WDDMDevice *GetDevice() const { return device_; } - gpusize Size() const { return desc_.size; } - gpusize ClientSize() const { return desc_.client_size; } - uint64_t GpuAddress() const { return desc_.gpu_addr; } - void *CpuAddress() const { return desc_.cpu_addr; } - uint64_t HandleApeAddress() const { return desc_.handle_ape_addr; } - - inline bool IsLocal() const { return desc_.domain == thunk_proxy::kLocal; } - inline bool IsUserMemory() const { return desc_.domain == thunk_proxy::kUserMemory; } - inline bool IsSystem() const { return desc_.domain == thunk_proxy::kSystem; } - inline bool IsUserQueue() const { return desc_.domain == thunk_proxy::kUserQueue; } - inline bool IsPhysicalOnly() const { return desc_.flags.is_physical_only; } - inline bool IsPhysicalContiguous() const { return desc_.flags.is_physical_contiguous; } - inline bool IsVirtual() const { return desc_.flags.is_virtual; } - inline bool IsShared() const { return desc_.flags.is_shared; } - inline bool IsExternal() const { return desc_.flags.is_external; } - - inline uint32_t Flags() const { return desc_.flags.reserved; } - inline int GetAllocInfo() const { return desc_.mem_flags; } - inline bool IsFineGrain() const { return (desc_.mem_flags & thunk_proxy::kFineGrain); } - inline bool IsSameAdapter(const LUID &luid) const { - return (desc_.adapter_luid.HighPart == luid.HighPart && - desc_.adapter_luid.LowPart == luid.LowPart); - } - inline void GetQueueReference() { desc_.flags.is_queue_referenced = 1; } - inline void PutQueueReference() { desc_.flags.is_queue_referenced = 0; } - inline bool IsQueueReferenced() const { return desc_.flags.is_queue_referenced; } - - WinAllocationHandle GetAllocationHandle(size_t index) const { return alloc_handles_ptr_[index]; } - size_t NumChunks() const { return num_allocations_; } - - const GpuMemoryHandle GetGpuMemoryHandle() const { - return reinterpret_cast(const_cast(this)); - } - - static GpuMemory *Convert(GpuMemoryHandle handle) { return reinterpret_cast(handle); } - - ErrorCode ReserveGpuVirtualAddress(gpusize base_virt_addr, gpusize va_size, gpusize alignment); - ErrorCode FreeGpuVirtualAddress(gpusize va_start_address, gpusize va_size); - - ErrorCode MapGpuVirtualAddress(const gpusize map_addr, const gpusize size, gpusize offset = 0); - ErrorCode UnmapGpuVirtualAddress(const gpusize map_addr, const gpusize size, gpusize offset = 0); - - ErrorCode MakeResident(); - ErrorCode Evict(); - - ErrorCode ExportPhysicalHandle(int* dmabuf_fd, uint32_t flags = SHARED_ALLOCATION_ALL_ACCESS); - ErrorCode ImportPhysicalHandle(int dmabuf_fd); - ~GpuMemory(); -protected: - explicit GpuMemory(WDDMDevice *device); -private: - ErrorCode CreatePhysicalMemory(); - ErrorCode FreePhysicalMemory(); - - uint64_t AdjustSize(gpusize size) const; -private: - friend class WDDMDevice; - - WDDMDevice *const device_; - - GpuMemoryDesc desc_; - - size_t num_allocations_; - WinAllocationHandle *alloc_handles_ptr_; - WinAllocationHandle alloc_handle_; // Optimization for num_allocations_ is 1 - - WinResourceHandle resource_; // Handle to a resource object that wraps the allocation. Used for shared resources - - DISALLOW_COPY_AND_ASSIGN(GpuMemory); -}; - -} // namespace thunk -} // namespace wsl - -#endif diff --git a/inc/wddm/queue.h b/inc/wddm/queue.h deleted file mode 100644 index fc600562f5..0000000000 --- a/inc/wddm/queue.h +++ /dev/null @@ -1,363 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// -#ifndef _WSL_INC_WDDM_QUEUE_H_ -#define _WSL_INC_WDDM_QUEUE_H_ - -#include -#include -#include -#include -#include -#include "inc/wddm/types.h" -#include "inc/wddm/device.h" -#include "inc/wddm/gpu_memory.h" -#include "hsa-runtime/inc/hsa_ext_amd.h" -#include "hsa-runtime/inc/amd_hsa_queue.h" -#include "hsa-runtime/inc/amd_hsa_signal.h" -#include "inc/wddm/cmd_util.h" - -namespace wsl { -namespace thunk { - -class Queue; -class WDDMDevice; - -class WDDMQueue { -public: - WDDMQueue(WDDMDevice *device, - uint64_t cmdbuf_addr, - uint32_t cmdbuf_size, - uint32_t engine, - bool use_hws = true) : - device(device), - context(0), - queue(0), - syncobj(0), - sync_addr(NULL), - cmdbuf(0), - cmdbuf_addr(cmdbuf_addr), - cmdbuf_size(cmdbuf_size), - queue_engine(engine), - use_hws(use_hws), - prio(thunk_proxy::kNormal) { - - } - - virtual ~WDDMQueue() { } - - virtual hsa_status_t Init(void) { return HSA_STATUS_SUCCESS; } - virtual hsa_status_t Fini(void) { return HSA_STATUS_SUCCESS; } - virtual void RingDoorbell() { } - virtual void* GetHsaQueueAddr(void) const { return reinterpret_cast(GetCmdbufAddr()); } - - hsa_status_t SwsInit(void); - hsa_status_t SwsFini(void); - hsa_status_t SwsSubmit(uint64_t command_addr, - uint64_t command_size, - uint64_t fence_value); - - hsa_status_t HwsInit(void); - hsa_status_t HwsFini(void); - hsa_status_t HwsSubmit(uint64_t command_addr, - uint64_t command_size, - uint64_t fence_value); - hsa_status_t SetPriority(hsa_amd_queue_priority_t priority); - - uint64_t *GetSyncAddr(void) const { return sync_addr; } - uint64_t GetCmdbufAddr(void) const { return cmdbuf_addr; } - - thunk_proxy::SchedLevel ConvertSchedLevel(hsa_amd_queue_priority_t prio) const { - switch (prio) { - case HSA_AMD_QUEUE_PRIORITY_LOW: - return thunk_proxy::kLow; - case HSA_AMD_QUEUE_PRIORITY_HIGH: - return thunk_proxy::kHigh; - case HSA_AMD_QUEUE_PRIORITY_NORMAL: - default: - return thunk_proxy::kNormal; - } - } - - WDDMDevice *device; - - D3DKMT_HANDLE context; - D3DKMT_HANDLE queue; - - D3DKMT_HANDLE syncobj; - uint64_t *sync_addr; - - GpuMemoryHandle cmdbuf; - uint64_t cmdbuf_addr; - uint32_t cmdbuf_size; - - GpuMemoryHandle queue_mem; - uint64_t queue_addr; - - uint32_t queue_engine; - - bool use_hws; - thunk_proxy::SchedLevel prio; -}; - -class ComputeQueue : public WDDMQueue { -public: - ComputeQueue(WDDMDevice *device, - void *ring, - uint64_t ring_size, - std::atomic *ring_wptr, - std::atomic *ring_rptr, - volatile int64_t *error_addr, - uint32_t cmdbuf_size, - uint32_t engine, - bool use_hws = true); - - ~ComputeQueue(); - - virtual hsa_status_t Init(void); - virtual hsa_status_t Fini(void); - virtual hsa_status_t Submit(void); - - void* GetRing(void) const { return ring; } - uint64_t GetRingSize(void) const { return ring_size; } - std::atomic* GetRingWptr(void) const { return ring_wptr; } - std::atomic* GetRingRptr(void) const { return ring_rptr; } - - uint64_t GetAqlWriteIndex(void) const { return cmdbuf_aql_frame_write_index; } - uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size; } - void* GetHsaQueueAddr(void) const { return ring; } - - bool IsInvalidPacket(void) const { - uint16_t *packet = (uint16_t *)((char *)ring + - (cmdbuf_aql_frame_write_index % ring_size) * 64); - return ((*packet >> HSA_PACKET_HEADER_TYPE) & ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1)) - == HSA_PACKET_TYPE_INVALID; - } - - hsa_status_t Process(void); - uint64_t * GetDoorbellPtr() const { return (uint64_t *)&doorbell_signal_value_; } - void RingDoorbell(); -private: - hsa_status_t KernelDispatchAqlToPm4(char *cpu, hsa_kernel_dispatch_packet_t *packet); - hsa_status_t BarrierGenericAqlToPm4(char *cpu, hsa_barrier_and_packet_t *packet, bool is_or = false); - struct amd_aql_pm4_ib { - uint16_t header; - uint16_t ven_hdr; - uint32_t ib_jump_cmd[4]; - uint32_t dw_cnt_remain; - uint32_t reserved[8]; - hsa_signal_t completion_signal; - }; - hsa_status_t VendorSpecificAqlToPm4(char *cpu, amd_aql_pm4_ib *packet); - hsa_status_t SwitchAql2PM4(void); - - hsa_status_t PreSubmit(void); - hsa_status_t EndSubmit(void); - - void *ring; - uint64_t ring_size; - std::atomic *ring_wptr; - std::atomic *ring_rptr; - - // ib_start_addr is the current ib start address - uint64_t ib_start_addr; - - // ib_size is the current ib size. - uint64_t ib_size; - - // record the last submitted aql frame write index - uint64_t sync_point; - - uint64_t cmdbuf_aql_frame_write_index; - uint32_t cmdbuf_aql_frame_size; - - uint64_t *signal_addr_; - bool platform_atomic_support_; - bool needs_barrier; - bool ready_to_submit; - - CmdUtil cmd_util; - -private: - bool EnableProfiling() { - return AMD_HSA_BITS_GET(amd_queue_rocr_->queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING); - } - void HandleError(hsa_status_t status); - bool UpdateScratch(uint32_t private_segment_size, bool wave32); - - uint32_t UpdateIndexStride(uint32_t srd, bool wave32); - - void *ScratchBase() { return scratch_base_; } - - void AppendCmdbufSratchBaseOffset(int offset) { - scratch_base_offset_array_.push_back(offset); - } - - bool RelocateCmdbufScratchBase(uint64_t addr); - - uint32_t ScratchSizePerWave() { return scratch_size_per_wave_; } - uint64_t GetKernelObjAddr(uint64_t addr) const; - void InitScratchSRD(); - GpuMemoryHandle amd_queue_mem_; - amd_queue_t *amd_queue_; - amd_queue_t *amd_queue_rocr_; - uint64_t doorbell_signal_value_; - volatile std::atomic *error_code_; - std::thread aql_to_pm4_thread_; - bool thread_stop_; - std::mutex thread_cond_lock_; - std::condition_variable thread_cond_; - static void AqlToPm4Thread(ComputeQueue *queue); - - uint32_t scratch_waves_; - uint32_t scratch_size_per_wave_; - uint32_t scratch_size_; - void *scratch_base_; - GpuMemoryHandle scratch_mem_; - - std::vector scratch_base_offset_array_; -}; - -class SDMAQueue : public WDDMQueue { -public: - SDMAQueue(WDDMDevice *device, - void *ring, - uint64_t cmdbuf_size, - uint32_t engine, - bool use_hws = true); - - virtual ~SDMAQueue(); - - hsa_status_t Init(void); - hsa_status_t Fini(void); - hsa_status_t Submit(void); - - int PreparePacket(uint32_t offset, uint64_t size); - - void WaitQueue(void) { - device->CpuWait(&syncobj, &rptr_next, 1, false); - } - - uint64_t * GetRingWptr(void) { return &wptr_next_; } - uint64_t * GetRingRptr(void) { return WDDMQueue::GetSyncAddr(); } - uint64_t * GetDoorbellPtr() { return &doorbell_; } - void RingDoorbell(); - void* GetHsaQueueAddr(void) const { return reinterpret_cast(GetCmdbufAddr()); } - -private: - uint64_t wptr_next_; - uint64_t wptr_pre_; - uint64_t rptr_next; - uint64_t doorbell_; - std::vector> wptr_queue_; - uint64_t ib_size; - uint64_t ib_start_addr; - - std::thread thread_; - bool thread_stop_; - std::mutex thread_cond_lock_; - std::condition_variable thread_cond_; - static void SdmaThread(SDMAQueue *queue); - - struct SDMA_PKT_POLL_REGMEM { - union { - struct { - unsigned int op : 8; - unsigned int sub_op : 8; - unsigned int reserved_0 : 10; - unsigned int hdp_flush : 1; - unsigned int reserved_1 : 1; - unsigned int func : 3; - unsigned int mem_poll : 1; - }; - unsigned int DW_0_DATA; - } HEADER_UNION; - - union { - struct { - unsigned int addr_31_0 : 32; - }; - unsigned int DW_1_DATA; - } ADDR_LO_UNION; - - union { - struct { - unsigned int addr_63_32 : 32; - }; - unsigned int DW_2_DATA; - } ADDR_HI_UNION; - - union { - struct { - unsigned int value : 32; - }; - unsigned int DW_3_DATA; - } VALUE_UNION; - - union { - struct { - unsigned int mask : 32; - }; - unsigned int DW_4_DATA; - } MASK_UNION; - - union { - struct { - unsigned int interval : 16; - unsigned int retry_count : 12; - unsigned int reserved_0 : 4; - }; - unsigned int DW_5_DATA; - } DW5_UNION; - }; - const unsigned int SDMA_OP_POLL_REGMEM = 8; - bool IsPollPacket(SDMA_PKT_POLL_REGMEM* pkt) { - return pkt->HEADER_UNION.op == SDMA_OP_POLL_REGMEM && - pkt->HEADER_UNION.mem_poll == 1 && - pkt->HEADER_UNION.func == 3; - } - uint32_t WrapIntoRocrRing(uint64_t idx) { return (idx & (cmdbuf_size - 1)); } -}; - -} // namespace thunk -} // namespace wsl - -#endif diff --git a/inc/wddm/status.h b/inc/wddm/status.h deleted file mode 100644 index 528264c74e..0000000000 --- a/inc/wddm/status.h +++ /dev/null @@ -1,60 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef _WSL_INC_WDDM_STATUS_H -#define _WSL_INC_WDDM_STATUS_H - -enum class ErrorCode { - Success, - DeviceLost, - UnSupported, - NotReady, - OutOfMemory, - OutOfGpuMemory, - OutOfHandleApeMemory, - Timeout, - SyscallFail, - InvalidateParams, - Unknown, -}; - -#endif diff --git a/inc/wddm/thunks.h b/inc/wddm/thunks.h deleted file mode 100644 index 6a573c1a09..0000000000 --- a/inc/wddm/thunks.h +++ /dev/null @@ -1,232 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef _WSL_INC_WDDM_THUNKS_H -#define _WSL_INC_WDDM_THUNKS_H - -#include "inc/wddm/status.h" -#include "inc/wddm/types.h" - -namespace wsl { -namespace thunk { - -inline ErrorCode TranslateNtStatus(NTSTATUS status) { - switch (status) { - case STATUS_SUCCESS: - return ErrorCode::Success; - case STATUS_PENDING: - return ErrorCode::NotReady; - case STATUS_NO_MEMORY: - return ErrorCode::OutOfMemory; - case STATUS_DEVICE_REMOVED: - return ErrorCode::DeviceLost; - case STATUS_GRAPHICS_NO_VIDEO_MEMORY: - return ErrorCode::OutOfGpuMemory; - case STATUS_TIMEOUT: - return ErrorCode::Timeout; - case STATUS_INVALID_PARAMETER: - return ErrorCode::InvalidateParams; - default: - break; - } - return ErrorCode::Unknown; -} - -namespace d3dthunk { - -typedef D3DKMT_CREATEALLOCATION CreateAllocationArgs; -typedef D3DKMT_CREATECONTEXT CreateContextArgs; -typedef D3DKMT_CREATECONTEXTVIRTUAL CreateContextVirtualArgs; -typedef D3DKMT_CREATEPAGINGQUEUE CreatePagingQueueArgs; -typedef D3DKMT_CREATESYNCHRONIZATIONOBJECT CreateSynchronizationObjectArgs; -typedef D3DKMT_CREATESYNCHRONIZATIONOBJECT2 CreateSynchronizationObject2Args; -typedef D3DKMT_ESCAPE EscapeArgs; -typedef D3DKMT_EVICT EvictArgs; -typedef D3DKMT_FREEGPUVIRTUALADDRESS FreeGpuVirtualAddressArgs; -typedef D3DKMT_LOCK LockArgs; -typedef D3DKMT_LOCK2 Lock2Args; -typedef D3DKMT_OPENRESOURCE OpenResourceArgs; -typedef D3DKMT_OPENRESOURCEFROMNTHANDLE OpenResourceFromNtHandleArgs; -typedef D3DKMT_QUERYADAPTERINFO QueryAdapterInfoArgs; -typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECT SignalSynchronizationObjectArgs; -typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECT2 SignalSynchronizationObject2Args; -typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU SignalSynchronizationObjectFromCpuArgs; -typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU2 SignalSynchronizationObjectFromGpuArgs; -typedef D3DKMT_SUBMITCOMMAND SubmitCommandArgs; -typedef D3DKMT_UNLOCK UnlockArgs; -typedef D3DKMT_UNLOCK2 Unlock2Args; -typedef D3DKMT_UPDATEGPUVIRTUALADDRESS UpdateGpuVirtualAddressArgs; -typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECT WaitForSynchronizationObjectArgs; -typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECT2 WaitForSynchronizationObject2Args; -typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU WaitForSynchronizationObjectFromCpuArgs; -typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU WaitForSynchronizationObjectFromGpuArgs; -typedef D3DKMT_ACQUIREKEYEDMUTEX AcquireKeyedMutexArgs; -typedef D3DKMT_RELEASEKEYEDMUTEX ReleaseKeyedMutexArgs; -typedef D3DKMT_OPENKEYEDMUTEX OpenKeyedMutexArgs; -typedef D3DKMT_DESTROYKEYEDMUTEX DestroyKeyedMutexArgs; -typedef D3DKMT_QUERYVIDEOMEMORYINFO QueryVideoMemoryInfoArgs; -typedef D3DKMT_CREATEHWQUEUE CreateHwQueueArgs; -typedef D3DKMT_DESTROYHWQUEUE DestroyHwQueueArgs; -typedef D3DKMT_SUBMITCOMMANDTOHWQUEUE SubmitCommandToHwQueueArgs; -typedef D3DKMT_SUBMITPRESENTTOHWQUEUE SubmitPresentToHwQueueArgs; -typedef D3DKMT_SUBMITSIGNALSYNCOBJECTSTOHWQUEUE SubmitSignalSyncObjectsToHwQueueArgs; -typedef D3DKMT_SUBMITWAITFORSYNCOBJECTSTOHWQUEUE SubmitWaitForSyncObjectsToHwQueueArgs; -typedef D3DKMT_CREATESYNCFILE CreateSyncFileArgs; - -inline ErrorCode MapGpuVirtualAddress(D3DDDI_MAPGPUVIRTUALADDRESS *args) { - return TranslateNtStatus(D3DKMTMapGpuVirtualAddress(args)); -} - -inline ErrorCode CreateAllocation(CreateAllocationArgs *args) { - return TranslateNtStatus(D3DKMTCreateAllocation2(args)); -} - -inline ErrorCode DestroyAllocation( - WinDeviceHandle device, - WinResourceHandle resource, - size_t num_allocations, - const WinAllocationHandle *alloc_handles) { - - D3DKMT_DESTROYALLOCATION2 args{}; - - memset(&args, 0, sizeof(args)); - args.hDevice = device; - if (resource) { - args.hResource = resource; - } else { - args.phAllocationList = alloc_handles; - args.AllocationCount = num_allocations; - } - - return TranslateNtStatus(D3DKMTDestroyAllocation2(&args)); -} - -inline ErrorCode ReserveGpuVirtualAddress(D3DDDI_RESERVEGPUVIRTUALADDRESS *args) { - return TranslateNtStatus(D3DKMTReserveGpuVirtualAddress(args)); -} - -inline ErrorCode ReserveGpuVirtualAddress(WinAdapterHandle handle, - gpusize size, - gpusize base_address, - gpusize *out_addr) { - D3DDDI_RESERVEGPUVIRTUALADDRESS args{}; - args.hPagingQueue = handle; - args.Size = size; - args.BaseAddress = base_address; - - auto code = ReserveGpuVirtualAddress(&args); - if (code == ErrorCode::Success) - *out_addr = args.VirtualAddress; - return code; -} - -inline ErrorCode ReserveGpuVirtualAddress(WinAdapterHandle handle, - gpusize size, - gpusize minimum_address, - gpusize maximum_address, - gpusize *out_addr) { - D3DDDI_RESERVEGPUVIRTUALADDRESS args{}; - args.hPagingQueue = handle; - args.Size = size; - args.MinimumAddress = minimum_address; - args.MaximumAddress = maximum_address; - - auto code = ReserveGpuVirtualAddress(&args); - if (code == ErrorCode::Success) - *out_addr = args.VirtualAddress; - return code; -} - -inline ErrorCode FreeGpuVirtualAddress(FreeGpuVirtualAddressArgs *args) { - return TranslateNtStatus(D3DKMTFreeGpuVirtualAddress(args)); -} - -inline ErrorCode FreeGpuVirtualAddress(WinAdapterHandle handle, - gpusize base_address, - gpusize size) { - FreeGpuVirtualAddressArgs args{}; - args.hAdapter = handle; - args.Size = size; - args.BaseAddress = base_address; - return FreeGpuVirtualAddress(&args); -} - -inline ErrorCode MakeResident(D3DDDI_MAKERESIDENT *args) { - return TranslateNtStatus(D3DKMTMakeResident(args)); -} - -inline ErrorCode Evict(EvictArgs *args) { - return TranslateNtStatus(D3DKMTEvict(args)); -} - -inline ErrorCode ShareObjects(size_t num_allocations, - WinResourceHandle resource, - uint32_t flags, - int* dmabuf_fd) { - OBJECT_ATTRIBUTES obj_attr; - HANDLE nt_handle; - ErrorCode ret; - - InitializeObjectAttributes(&obj_attr, nullptr, OBJ_INHERIT, nullptr, nullptr); - ret = TranslateNtStatus(D3DKMTShareObjects(num_allocations, - &resource, &obj_attr, flags, &nt_handle)); - if (ret == ErrorCode::Success) - *dmabuf_fd = *(reinterpret_cast(&nt_handle)); - else - *dmabuf_fd = -1; - - return ret; -} - -inline ErrorCode QueryResourceInfoFromNtHandle(D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *args) { - return TranslateNtStatus(D3DKMTQueryResourceInfoFromNtHandle(args)); -} - -inline ErrorCode OpenResourceFromNtHandle(D3DKMT_OPENRESOURCEFROMNTHANDLE *args) { - return TranslateNtStatus(D3DKMTOpenResourceFromNtHandle(args)); -} - -} // namespace d3dthunk -} // namespace thunk -} // namespace wsl - -#endif diff --git a/inc/wddm/types.h b/inc/wddm/types.h deleted file mode 100644 index 0bc922b7fd..0000000000 --- a/inc/wddm/types.h +++ /dev/null @@ -1,101 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - -#ifndef _WSL_INC_WDDM_TYPES_H_ -#define _WSL_INC_WDDM_TYPES_H_ - -#include -#include -#include "inc/thunk_proxy/wddm_types.h" -// windows wchar is 16bit, but linux is 32bit -// seems libdxcore (not dxgkrnl.ko) convert thunk windows wchar to linux one -// so only accept 32bit wchar args. note driver private data structure still -// use 16bit wchar -#define WCHAR wchar_t -#define PCWSTR const wchar_t * -#include -#undef WCHAR -#undef PCWSTR - -using gpusize = uint64_t; // Used to specify GPU addresses and sizes of GPU allocations -using WinAllocationHandle = D3DKMT_HANDLE; -using WinResourceHandle = D3DKMT_HANDLE; -using WinContextHandle = D3DKMT_HANDLE; -using WinDeviceHandle = D3DKMT_HANDLE; -using WinAdapterHandle = D3DKMT_HANDLE; - -//reference dk/winnt.h -#define STANDARD_RIGHTS_REQUIRED (0x000F0000L) - -//reference dk/ntdef.h -#define OBJ_INHERIT (0x00000002L) -typedef WCHAR *PWCHAR, *LPWCH, *PWCH; -typedef struct _UNICODE_STRING { - USHORT Length; - USHORT MaximumLength; -#ifdef MIDL_PASS - [size_is(MaximumLength / 2), length_is((Length) / 2) ] USHORT * Buffer; -#else // MIDL_PASS - _Field_size_bytes_part_opt_(MaximumLength, Length) PWCH Buffer; -#endif // MIDL_PASS -} UNICODE_STRING; -typedef UNICODE_STRING *PUNICODE_STRING; -typedef const UNICODE_STRING *PCUNICODE_STRING; - -typedef struct _OBJECT_ATTRIBUTES { - ULONG Length; - HANDLE RootDirectory; - PUNICODE_STRING ObjectName; - ULONG Attributes; - PVOID SecurityDescriptor; - PVOID SecurityQualityOfService; -} OBJECT_ATTRIBUTES; -#define InitializeObjectAttributes( p, n, a, r, s ) { \ - (p)->Length = sizeof( OBJECT_ATTRIBUTES ); \ - (p)->RootDirectory = r; \ - (p)->Attributes = a; \ - (p)->ObjectName = n; \ - (p)->SecurityDescriptor = s; \ - (p)->SecurityQualityOfService = NULL; \ - } - -#endif \ No newline at end of file diff --git a/inc/wddm/va_mgr.h b/inc/wddm/va_mgr.h deleted file mode 100644 index 675bfc3e39..0000000000 --- a/inc/wddm/va_mgr.h +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef _WSL_INC_WDDM_VA_MGR_H_ -#define _WSL_INC_WDDM_VA_MGR_H_ - -#include -#include -#include "util/utils.h" - -namespace wsl { -namespace thunk { - -class VaMgr { -public: - VaMgr(uint64_t start, uint64_t size, uint64_t min_align); - ~VaMgr(); - - /* Allocate `bytes` VA, if `align` is not zero, the returned address is aligned by `align`. - * If `addr` parameter is not zero, try best to allocate VA from fixed address `addr`. - */ - uint64_t Alloc(uint64_t bytes, uint64_t align, uint64_t addr = 0); - - void Free(uint64_t addr); - -private: - uint64_t AllocImpl(uint64_t bytes, uint64_t align); - - struct Fragment { - using ptr = std::multimap::iterator; - ptr free_list_entry_; - - struct { - uint64_t size : 63; - bool is_free : 1; - }; - - Fragment() : size(0), is_free(false) {} - Fragment(ptr iterator, uint64_t len, bool is_free) - : free_list_entry_(iterator), size(len), is_free(is_free) {} - }; - - static inline Fragment make_fragment(typename Fragment::ptr iter, uint64_t len) { - return {iter, len, true}; - } - - inline Fragment make_fragment(uint64_t len) { return {free_list_.end(), len, false}; } - - static inline bool is_free(const Fragment& f) { return f.is_free; } - void set_used(Fragment& f) { - f.is_free = false; - f.free_list_entry_ = free_list_.end(); - } - static void set_free(Fragment& f, typename Fragment::ptr iter) { - f.free_list_entry_ = iter; - f.is_free = true; - } - - inline void remove_free_list_entry(Fragment& frag) { - if (frag.free_list_entry_ != free_list_.end()) { - free_list_.erase(frag.free_list_entry_); - frag.free_list_entry_ = free_list_.end(); - } - } - - inline void add_free_fragment(uint64_t size, uint64_t base) { - auto it = free_list_.insert(std::make_pair(size, base)); - frag_map_[base] = make_fragment(it, size); - } - - inline void add_used_fragment(uint64_t size, uint64_t base) { - frag_map_[base] = make_fragment(size); - } - // Indexed by size - std::multimap free_list_; - // Indexed by VA, each fragment has no overlap - std::map frag_map_; - - uint64_t min_align_; - - std::mutex lock_; // Mutex protecting allocation and free of va - - - DISALLOW_COPY_AND_ASSIGN(VaMgr); -}; - -} // namespace thunk -} // namespace wsl -#endif diff --git a/libdrm.cpp b/libdrm.cpp index c9ed48d60e..d85ca9aa1e 100644 --- a/libdrm.cpp +++ b/libdrm.cpp @@ -41,8 +41,8 @@ //////////////////////////////////////////////////////////////////////////////// #include -#include "inc/wddm/types.h" -#include "inc/wddm/device.h" +#include "impl/wddm/types.h" +#include "impl/wddm/device.h" #include "libhsakmt.h" HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle( diff --git a/libhsakmt.h b/libhsakmt.h index 255186fe8a..e34d4e10c9 100644 --- a/libhsakmt.h +++ b/libhsakmt.h @@ -32,8 +32,8 @@ #include "hsakmt/hsakmt.h" #include "hsakmt/hsakmt_drm.h" -#include "inc/wddm/types.h" -#include "inc/wddm/device.h" +#include "impl/wddm/types.h" +#include "impl/wddm/device.h" wsl::thunk::WDDMDevice* get_wddmdev(uint32_t node_id); diff --git a/memory.cpp b/memory.cpp index b080f72082..add31df673 100644 --- a/memory.cpp +++ b/memory.cpp @@ -32,7 +32,7 @@ #include #include #include -#include "inc/wddm/gpu_memory.h" +#include "impl/wddm/gpu_memory.h" #include "util/simple_heap.h" struct Allocation { diff --git a/queues.cpp b/queues.cpp index e7b89e3529..d8fefacff8 100644 --- a/queues.cpp +++ b/queues.cpp @@ -24,8 +24,8 @@ */ #include #include "libhsakmt.h" -#include "inc/wddm/device.h" -#include "inc/wddm/queue.h" +#include "impl/wddm/device.h" +#include "impl/wddm/queue.h" #include "hsa-runtime/inc/amd_hsa_signal.h" uint32_t get_vgpr_size_per_cu(HSA_ENGINE_ID id) { diff --git a/thunk_proxy/libthunk_proxy.a b/thunk_proxy/libthunk_proxy.a index c4fae89f53..8991622e0d 100644 Binary files a/thunk_proxy/libthunk_proxy.a and b/thunk_proxy/libthunk_proxy.a differ diff --git a/time.cpp b/time.cpp index edc9cb3052..02f225d6f4 100644 --- a/time.cpp +++ b/time.cpp @@ -27,7 +27,7 @@ #include #include #include "libhsakmt.h" -#include "inc/wddm/device.h" +#include "impl/wddm/device.h" HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId, HsaClockCounters *Counters) { diff --git a/topology.cpp b/topology.cpp index d8657014dd..e53e1094a7 100644 --- a/topology.cpp +++ b/topology.cpp @@ -39,8 +39,8 @@ #include #include "libhsakmt.h" -#include "inc/wddm/types.h" -#include "inc/wddm/device.h" +#include "impl/wddm/types.h" +#include "impl/wddm/device.h" #include "util/utils.h" /* Number of memory banks added by thunk on top of topology diff --git a/wddm/cmd_util.cpp b/wddm/cmd_util.cpp index 476007b640..d650651e31 100644 --- a/wddm/cmd_util.cpp +++ b/wddm/cmd_util.cpp @@ -1,6 +1,6 @@ /* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. */ -#include "inc/wddm/cmd_util.h" +#include "impl/wddm/cmd_util.h" namespace wsl { namespace thunk { diff --git a/wddm/device.cpp b/wddm/device.cpp index 4b74b619e0..7905ed93e3 100644 --- a/wddm/device.cpp +++ b/wddm/device.cpp @@ -48,10 +48,10 @@ #include #include #include -#include "inc/wddm/status.h" -#include "inc/wddm/types.h" -#include "inc/wddm/device.h" -#include "inc/wddm/queue.h" +#include "impl/wddm/status.h" +#include "impl/wddm/types.h" +#include "impl/wddm/device.h" +#include "impl/wddm/queue.h" namespace wsl { namespace thunk { diff --git a/wddm/gpu_memory.cpp b/wddm/gpu_memory.cpp index 3a1a080e78..ae89abb642 100644 --- a/wddm/gpu_memory.cpp +++ b/wddm/gpu_memory.cpp @@ -1,7 +1,7 @@ #include #include -#include "inc/wddm/gpu_memory.h" -#include "inc/wddm/device.h" +#include "impl/wddm/gpu_memory.h" +#include "impl/wddm/device.h" #include "util/utils.h" using namespace std; diff --git a/wddm/queue.cpp b/wddm/queue.cpp index 99f8bebd2e..68b5f25967 100644 --- a/wddm/queue.cpp +++ b/wddm/queue.cpp @@ -44,8 +44,8 @@ #include #include -#include "inc/wddm/queue.h" -#include "inc/registers.h" +#include "impl/wddm/queue.h" +#include "impl/registers.h" #include "libhsakmt.h" #include "hsa-runtime/inc/hsa.h" diff --git a/wddm/va_mgr.cpp b/wddm/va_mgr.cpp index 9b63747280..40f912b0d1 100644 --- a/wddm/va_mgr.cpp +++ b/wddm/va_mgr.cpp @@ -1,7 +1,7 @@ #include #include #include -#include "inc/wddm/va_mgr.h" +#include "impl/wddm/va_mgr.h" #include "libhsakmt.h" using namespace std;