From 7c8e575f5d8c33ace4e951a80f97dfa30f67b965 Mon Sep 17 00:00:00 2001 From: Sunday Clement <83687182+Sundance636@users.noreply.github.com> Date: Thu, 18 Sep 2025 09:09:30 -0400 Subject: [PATCH] Fix Undefined behavior from signed bit shifts (#871) * libhsakmt: fix UB due to signed integer literal in 1 << 31 Bit shift operations on signed numbers should not shift into or beyond the signed bit as this results in Undefined Behaviour. Signed-off-by: Sunday Clement * libhsakmt: Fix UB due to signed integer literal in 1 << x Bit Shifting an unsigned integer is undefined behavior. BUG: SWDEV-532853 Signed-off-by: Sunday Clement * rocr: Fix UB in various places due signed integer in bit shift Bit shifting signed integers into or beyond the sign bit is undefined. Signed-off-by: Sunday Clement * rocr: Change signed integer literals to unsigned Changing the signed integers in the macro expressions throughout the file to avoid overflow. Signed-off-by: Sunday Clement --------- Signed-off-by: Sunday Clement Co-authored-by: Flora Cui --- projects/rocr-runtime/libhsakmt/src/queues.c | 2 +- .../rocr-runtime/libhsakmt/src/rbtree_amd.h | 2 +- .../rocr-runtime/libhsakmt/src/topology.c | 4 +- .../hsa-runtime/core/inc/amd_gpu_pm4.h | 158 +++++++++--------- .../image/addrlib/src/core/addrelemlib.cpp | 8 +- 5 files changed, 87 insertions(+), 87 deletions(-) diff --git a/projects/rocr-runtime/libhsakmt/src/queues.c b/projects/rocr-runtime/libhsakmt/src/queues.c index 1f95b43b78..0e3500f5ef 100644 --- a/projects/rocr-runtime/libhsakmt/src/queues.c +++ b/projects/rocr-runtime/libhsakmt/src/queues.c @@ -663,7 +663,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId, /* cu_mask_count counts bits. It must be multiple of 32 */ q->cu_mask_count = ALIGN_UP_32(cu_num, 32); for (i = 0; i < cu_num; i++) - q->cu_mask[i/32] |= (1 << (i % 32)); + q->cu_mask[i/32] |= (1U << (i % 32)); } struct kfd_ioctl_create_queue_args args = {0}; diff --git a/projects/rocr-runtime/libhsakmt/src/rbtree_amd.h b/projects/rocr-runtime/libhsakmt/src/rbtree_amd.h index 5990404b7d..d711d9905b 100644 --- a/projects/rocr-runtime/libhsakmt/src/rbtree_amd.h +++ b/projects/rocr-runtime/libhsakmt/src/rbtree_amd.h @@ -33,7 +33,7 @@ struct rbtree_key_s { unsigned long addr; unsigned long size; }; -#define BIT(x) (1<<(x)) +#define BIT(x) (1U<<(x)) #define LKP_ALL (BIT(ADDR_BIT) | BIT(SIZE_BIT)) #define LKP_ADDR (BIT(ADDR_BIT)) #define LKP_ADDR_SIZE (BIT(ADDR_BIT) | BIT(SIZE_BIT)) diff --git a/projects/rocr-runtime/libhsakmt/src/topology.c b/projects/rocr-runtime/libhsakmt/src/topology.c index 0992868f5c..324033c840 100644 --- a/projects/rocr-runtime/libhsakmt/src/topology.c +++ b/projects/rocr-runtime/libhsakmt/src/topology.c @@ -469,7 +469,7 @@ static void cpumap_to_cpu_ci(char *shared_cpu_map, struct proc_cpuinfo *cpuinfo, HsaCacheProperties *this_cache) { - int num_hexs, bit; + uint32_t num_hexs, bit; uint32_t proc, apicid, mask; char *ch_ptr; @@ -482,7 +482,7 @@ static void cpumap_to_cpu_ci(char *shared_cpu_map, while (num_hexs-- > 0) { mask = strtol(ch_ptr, NULL, 16); /* each X */ for (bit = 0; bit < 32; bit++) { - if (!((1 << bit) & mask)) + if (!((1U << bit) & mask)) continue; proc = num_hexs * 32 + bit; apicid = cpuinfo[proc].apicid; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_pm4.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_pm4.h index 31ed5453a9..8aba78b4b2 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_pm4.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_pm4.h @@ -45,22 +45,22 @@ // clang-format off -#define PM4_HDR_IT_OPCODE_NOP 0x10 -#define PM4_HDR_IT_OPCODE_INDIRECT_BUFFER 0x3F -#define PM4_HDR_IT_OPCODE_RELEASE_MEM 0x49 -#define PM4_HDR_IT_OPCODE_ACQUIRE_MEM 0x58 +#define PM4_HDR_IT_OPCODE_NOP 0x10U +#define PM4_HDR_IT_OPCODE_INDIRECT_BUFFER 0x3FU +#define PM4_HDR_IT_OPCODE_RELEASE_MEM 0x49U +#define PM4_HDR_IT_OPCODE_ACQUIRE_MEM 0x58U -#define PM4_HDR_IT_OPCODE_ATOMIC_MEM 0x1E -#define PM4_HDR_IT_OPCODE_PRED_EXEC 0x23 -#define PM4_HDR_IT_OPCODE_WRITE_DATA 0x37 -#define PM4_HDR_IT_OPCODE_WAIT_REG_MEM 0x3C -#define PM4_HDR_IT_OPCODE_COPY_DATA 0x40 -#define PM4_HDR_IT_OPCODE_DMA_DATA 0x50 +#define PM4_HDR_IT_OPCODE_ATOMIC_MEM 0x1EU +#define PM4_HDR_IT_OPCODE_PRED_EXEC 0x23U +#define PM4_HDR_IT_OPCODE_WRITE_DATA 0x37U +#define PM4_HDR_IT_OPCODE_WAIT_REG_MEM 0x3CU +#define PM4_HDR_IT_OPCODE_COPY_DATA 0x40U +#define PM4_HDR_IT_OPCODE_DMA_DATA 0x50U -#define PM4_HDR_SHADER_TYPE(x) (((x) & 0x1) << 1) -#define PM4_HDR_IT_OPCODE(x) (((x) & 0xFF) << 8) -#define PM4_HDR_COUNT(x) (((x) & 0x3FFF) << 16) -#define PM4_HDR_TYPE(x) (((x) & 0x3) << 30) +#define PM4_HDR_SHADER_TYPE(x) (((x) & 0x1U) << 1) +#define PM4_HDR_IT_OPCODE(x) (((x) & 0xFFU) << 8) +#define PM4_HDR_COUNT(x) (((x) & 0x3FFFU) << 16) +#define PM4_HDR_TYPE(x) (((x) & 0x3U) << 30) #define PM4_HDR(it_opcode, pkt_size_dw, gfxip_ver) ( \ PM4_HDR_SHADER_TYPE((gfxip_ver) == 7 ? 1 : 0) | \ @@ -69,78 +69,78 @@ PM4_HDR_TYPE(3) \ ) -#define PM4_INDIRECT_BUFFER_DW1_IB_BASE_LO(x) (((x) & 0x3FFFFFFF) << 2) -#define PM4_INDIRECT_BUFFER_DW2_IB_BASE_HI(x) (((x) & 0xFFFF) << 0) -#define PM4_INDIRECT_BUFFER_DW3_IB_SIZE(x) (((x) & 0xFFFFF) << 0) -#define PM4_INDIRECT_BUFFER_DW3_IB_VALID(x) (((x) & 0x1) << 23) +#define PM4_INDIRECT_BUFFER_DW1_IB_BASE_LO(x) (((x) & 0x3FFFFFFFU) << 2) +#define PM4_INDIRECT_BUFFER_DW2_IB_BASE_HI(x) (((x) & 0xFFFFU) << 0) +#define PM4_INDIRECT_BUFFER_DW3_IB_SIZE(x) (((x) & 0xFFFFFU) << 0) +#define PM4_INDIRECT_BUFFER_DW3_IB_VALID(x) (((x) & 0x1U) << 23) -#define PM4_ACQUIRE_MEM_DW1_COHER_CNTL(x) (((x) & 0x7FFFFFFF) << 0) -# define PM4_ACQUIRE_MEM_COHER_CNTL_TC_WB_ACTION_ENA (1 << 18) -# define PM4_ACQUIRE_MEM_COHER_CNTL_TC_ACTION_ENA (1 << 23) -# define PM4_ACQUIRE_MEM_COHER_CNTL_SH_KCACHE_ACTION_ENA (1 << 27) -# define PM4_ACQUIRE_MEM_COHER_CNTL_SH_ICACHE_ACTION_ENA (1 << 29) -#define PM4_ACQUIRE_MEM_DW2_COHER_SIZE(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_ACQUIRE_MEM_DW3_COHER_SIZE_HI(x) (((x) & 0xFF) << 0) -#define PM4_ACQUIRE_MEM_DW4_COHER_BASE(x) ((x >> 8) & 0xFFFFFFFF) -#define PM4_ACQUIRE_MEM_DW4_COHER_BASE_HI(x) ((x >> 40) & 0xFFFFFF) -#define PM4_ACQUIRE_MEM_DW7_GCR_CNTL(x) (((x) & 0x7FFFF) << 0) -# define PM4_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) (((x) & 0x3) << 0) -# define PM4_ACQUIRE_MEM_GCR_CNTL_GLK_INV (1 << 7) -# define PM4_ACQUIRE_MEM_GCR_CNTL_GLV_INV (1 << 8) -# define PM4_ACQUIRE_MEM_GCR_CNTL_GL1_INV (1 << 9) -# define PM4_ACQUIRE_MEM_GCR_CNTL_GL2_INV (1 << 14) -# define PM4_ACQUIRE_MEM_GCR_CNTL_GL2_WB (1 << 15) -#define PM4_RELEASE_MEM_DW1_EVENT_INDEX(x) (((x) & 0xF) << 8) -# define PM4_RELEASE_MEM_EVENT_INDEX_AQL 0x7 +#define PM4_ACQUIRE_MEM_DW1_COHER_CNTL(x) (((x) & 0x7FFFFFFFU) << 0) +# define PM4_ACQUIRE_MEM_COHER_CNTL_TC_WB_ACTION_ENA (1U << 18) +# define PM4_ACQUIRE_MEM_COHER_CNTL_TC_ACTION_ENA (1U << 23) +# define PM4_ACQUIRE_MEM_COHER_CNTL_SH_KCACHE_ACTION_ENA (1U << 27) +# define PM4_ACQUIRE_MEM_COHER_CNTL_SH_ICACHE_ACTION_ENA (1U << 29) +#define PM4_ACQUIRE_MEM_DW2_COHER_SIZE(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_ACQUIRE_MEM_DW3_COHER_SIZE_HI(x) (((x) & 0xFFU) << 0) +#define PM4_ACQUIRE_MEM_DW4_COHER_BASE(x) ((x >> 8) & 0xFFFFFFFFU) +#define PM4_ACQUIRE_MEM_DW4_COHER_BASE_HI(x) ((x >> 40) & 0xFFFFFFU) +#define PM4_ACQUIRE_MEM_DW7_GCR_CNTL(x) (((x) & 0x7FFFFU) << 0) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) (((x) & 0x3U) << 0) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GLK_INV (1U << 7) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GLV_INV (1U << 8) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GL1_INV (1U << 9) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GL2_INV (1U << 14) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GL2_WB (1U << 15) +#define PM4_RELEASE_MEM_DW1_EVENT_INDEX(x) (((x) & 0xFU) << 8) +# define PM4_RELEASE_MEM_EVENT_INDEX_AQL 0x7U -#define PM4_ATOMIC_MEM_DW1_ATOMIC(x) (((x) & 0x7F) << 0) -# define PM4_ATOMIC_MEM_GL2_OP_ATOMIC_SWAP_RTN_64 (39 << 0) -#define PM4_ATOMIC_MEM_DW2_ADDR_LO(x) (((x) & 0xFFFFFFF8) << 0) -#define PM4_ATOMIC_MEM_DW3_ADDR_HI(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_ATOMIC_MEM_DW4_SRC_DATA_LO(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_ATOMIC_MEM_DW5_SRC_DATA_HI(x) (((x) & 0xFFFFFFFF) << 0) +#define PM4_ATOMIC_MEM_DW1_ATOMIC(x) (((x) & 0x7FU) << 0) +# define PM4_ATOMIC_MEM_GL2_OP_ATOMIC_SWAP_RTN_64 (39U << 0) +#define PM4_ATOMIC_MEM_DW2_ADDR_LO(x) (((x) & 0xFFFFFFF8U) << 0) +#define PM4_ATOMIC_MEM_DW3_ADDR_HI(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_ATOMIC_MEM_DW4_SRC_DATA_LO(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_ATOMIC_MEM_DW5_SRC_DATA_HI(x) (((x) & 0xFFFFFFFFU) << 0) -#define PM4_PRED_EXEC_DW1_HEADER(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_PRED_EXEC_DW2_EXEC_COUNT(x) (((x) & 0x3FFF) << 0) -#define PM4_PRED_EXEC_DW2_VIRTUALXCCID_SELECT(x) (((x) & 0xFF) << 24) +#define PM4_PRED_EXEC_DW1_HEADER(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_PRED_EXEC_DW2_EXEC_COUNT(x) (((x) & 0x3FFFU) << 0) +#define PM4_PRED_EXEC_DW2_VIRTUALXCCID_SELECT(x) (((x) & 0xFFU) << 24) -#define PM4_COPY_DATA_DW1(x) (((x) & 0xFFFFFFFF) << 0) -# define PM4_COPY_DATA_SRC_SEL_ATOMIC_RETURN_DATA (6 << 0) -# define PM4_COPY_DATA_DST_SEL_TC_12 (2 << 8) -# define PM4_COPY_DATA_COUNT_SEL (1 << 16) -# define PM4_COPY_DATA_WR_CONFIRM (1 << 20) -#define PM4_COPY_DATA_DW4_DST_ADDR_LO(x) (((x) & 0xFFFFFFF8) << 0) -#define PM4_COPY_DATA_DW5_DST_ADDR_HI(x) (((x) & 0xFFFFFFFF) << 0) +#define PM4_COPY_DATA_DW1(x) (((x) & 0xFFFFFFFFU) << 0) +# define PM4_COPY_DATA_SRC_SEL_ATOMIC_RETURN_DATA (6U << 0) +# define PM4_COPY_DATA_DST_SEL_TC_12 (2U << 8) +# define PM4_COPY_DATA_COUNT_SEL (1U << 16) +# define PM4_COPY_DATA_WR_CONFIRM (1U << 20) +#define PM4_COPY_DATA_DW4_DST_ADDR_LO(x) (((x) & 0xFFFFFFF8U) << 0) +#define PM4_COPY_DATA_DW5_DST_ADDR_HI(x) (((x) & 0xFFFFFFFFU) << 0) -#define PM4_WAIT_REG_MEM_DW1(x) (((x) & 0xFFFFFFFF) << 0) -# define PM4_WAIT_REG_MEM_FUNCTION_EQUAL_TO_REFERENCE (3 << 0) -# define PM4_WAIT_REG_MEM_MEM_SPACE_MEMORY_SPACE (1 << 4) -# define PM4_WAIT_REG_MEM_OPERATION_WAIT_REG_MEM (0 << 6) -#define PM4_WAIT_REG_MEM_DW2_MEM_POLL_ADDR_LO(x) (((x) & 0xFFFFFFFC) << 0) -#define PM4_WAIT_REG_MEM_DW3_MEM_POLL_ADDR_HI(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_WAIT_REG_MEM_DW4_REFERENCE(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_WAIT_REG_MEM_DW6(x) (((x) & 0x8000FFFF) << 0) -# define PM4_WAIT_REG_MEM_POLL_INTERVAL(x) (((x) & 0xFFFF) << 0) -# define PM4_WAIT_REG_MEM_OPTIMIZE_ACE_OFFLOAD_MODE (1 << 31) +#define PM4_WAIT_REG_MEM_DW1(x) (((x) & 0xFFFFFFFFU) << 0) +# define PM4_WAIT_REG_MEM_FUNCTION_EQUAL_TO_REFERENCE (3U << 0) +# define PM4_WAIT_REG_MEM_MEM_SPACE_MEMORY_SPACE (1U << 4) +# define PM4_WAIT_REG_MEM_OPERATION_WAIT_REG_MEM (0U << 6) +#define PM4_WAIT_REG_MEM_DW2_MEM_POLL_ADDR_LO(x) (((x) & 0xFFFFFFFCU) << 0) +#define PM4_WAIT_REG_MEM_DW3_MEM_POLL_ADDR_HI(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_WAIT_REG_MEM_DW4_REFERENCE(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_WAIT_REG_MEM_DW6(x) (((x) & 0x8000FFFFU) << 0) +# define PM4_WAIT_REG_MEM_POLL_INTERVAL(x) (((x) & 0xFFFFU) << 0) +# define PM4_WAIT_REG_MEM_OPTIMIZE_ACE_OFFLOAD_MODE (1U << 31) -#define PM4_DMA_DATA_DW1(x) (((x) & 0xFFFFFFFF) << 0) -# define PM4_DMA_DATA_DST_SEL_DST_ADDR_USING_L2 (3 << 20) -# define PM4_DMA_DATA_SRC_SEL_SRC_ADDR_USING_L2 (3 << 29) -#define PM4_DMA_DATA_DW2_SRC_ADDR_LO(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_DMA_DATA_DW3_SRC_ADDR_HI(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_DMA_DATA_DW4_DST_ADDR_LO(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_DMA_DATA_DW5_DST_ADDR_HI(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_DMA_DATA_DW6(x) (((x) & 0xFFFFFFFF) << 0) -# define PM4_DMA_DATA_BYTE_COUNT(x) (((x) & 0x3FFFFFF) << 0) -# define PM4_DMA_DATA_DIS_WC (1 << 31) -# define PM4_DMA_DATA_DIS_WC_LAST (0 << 31) +#define PM4_DMA_DATA_DW1(x) (((x) & 0xFFFFFFFFU) << 0) +# define PM4_DMA_DATA_DST_SEL_DST_ADDR_USING_L2 (3U << 20) +# define PM4_DMA_DATA_SRC_SEL_SRC_ADDR_USING_L2 (3U << 29) +#define PM4_DMA_DATA_DW2_SRC_ADDR_LO(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_DMA_DATA_DW3_SRC_ADDR_HI(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_DMA_DATA_DW4_DST_ADDR_LO(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_DMA_DATA_DW5_DST_ADDR_HI(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_DMA_DATA_DW6(x) (((x) & 0xFFFFFFFFU) << 0) +# define PM4_DMA_DATA_BYTE_COUNT(x) (((x) & 0x3FFFFFFU) << 0) +# define PM4_DMA_DATA_DIS_WC (1U << 31) +# define PM4_DMA_DATA_DIS_WC_LAST (0U << 31) -#define PM4_WRITE_DATA_DW1(x) (((x) & 0xFFFFFF00) << 0) -# define PM4_WRITE_DATA_DST_SEL_TC_L2 (2 << 8) -# define PM4_WRITE_DATA_WR_CONFIRM_WAIT_CONFIRMATION (1 << 20) -#define PM4_WRITE_DATA_DW2_DST_MEM_ADDR_LO(x) (((x) & 0xFFFFFFFC) << 0) -#define PM4_WRITE_DATA_DW3_DST_MEM_ADDR_HI(x) (((x) & 0xFFFFFFFF) << 0) -#define PM4_WRITE_DATA_DW4_DATA(x) (((x) & 0xFFFFFFFF) << 0) +#define PM4_WRITE_DATA_DW1(x) (((x) & 0xFFFFFF00U) << 0) +# define PM4_WRITE_DATA_DST_SEL_TC_L2 (2U << 8) +# define PM4_WRITE_DATA_WR_CONFIRM_WAIT_CONFIRMATION (1U << 20) +#define PM4_WRITE_DATA_DW2_DST_MEM_ADDR_LO(x) (((x) & 0xFFFFFFFCU) << 0) +#define PM4_WRITE_DATA_DW3_DST_MEM_ADDR_HI(x) (((x) & 0xFFFFFFFFU) << 0) +#define PM4_WRITE_DATA_DW4_DATA(x) (((x) & 0xFFFFFFFFU) << 0) // clang-format on diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.cpp index 615d8f0e39..a15dabff19 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.cpp @@ -116,7 +116,7 @@ ElemLib* ElemLib::Create( */ VOID ElemLib::Flt32sToInt32s( ADDR_FLT_32 value, ///< [in] ADDR_FLT_32 value - UINT_32 bits, ///< [in] nubmer of bits in value + UINT_32 bits, ///< [in] number of bits in value NumberType numberType, ///< [in] the type of number UINT_32* pResult) ///< [out] Int32 value { @@ -134,7 +134,7 @@ VOID ElemLib::Flt32sToInt32s( return; // these are zero-bit components, so don't set result case ADDR_UINT_BITS: // unsigned integer bit field, clamped to range - uscale = (1<= 1) { - *pResult = (1<>23)&0xFF);