From 7b45ebfe7fc8f066c8c8bd558297f3e3efd843fe Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Tue, 31 Jan 2017 16:39:45 -0800 Subject: [PATCH] Add support for ARM - Build system fixes - No user-mode high-precision timer by default, use clock_gettime - Use C11 aligned_alloc pending C++17 std::aligned_alloc Change-Id: I268365bdfd11d1e817a89584b9e086ee5b86e1dc [ROCm/ROCR-Runtime commit: 9e575ea96a67d2f50c184e63e5aea7538d2242f8] --- .../runtime/cmake_modules/hsa_common.cmake | 4 ++-- .../runtime/hsa-ext-finalize/CMakeLists.txt | 4 ++-- .../runtime/hsa-ext-image/CMakeLists.txt | 4 ++-- .../runtime/hsa-runtime-tools/CMakeLists.txt | 4 ++-- .../hsa-runtime/cmake_modules/hsa_common.cmake | 4 ++-- .../runtime/hsa-runtime/core/util/timer.h | 12 +++++++++++- .../runtime/hsa-runtime/core/util/utils.h | 6 +++--- 7 files changed, 24 insertions(+), 14 deletions(-) diff --git a/projects/rocr-runtime/runtime/cmake_modules/hsa_common.cmake b/projects/rocr-runtime/runtime/cmake_modules/hsa_common.cmake index 0f7dd57a05..f9a2a9e9c2 100644 --- a/projects/rocr-runtime/runtime/cmake_modules/hsa_common.cmake +++ b/projects/rocr-runtime/runtime/cmake_modules/hsa_common.cmake @@ -57,9 +57,9 @@ if(UNIX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpic") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--unresolved-symbols=ignore-in-shared-libs") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") - if ( IS64BIT ) + if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" ) - else () + elseif ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86" ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" ) endif () if ( "${CMAKE_BUILD_TYPE}" STREQUAL Debug ) diff --git a/projects/rocr-runtime/runtime/hsa-ext-finalize/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-ext-finalize/CMakeLists.txt index 6ac49927ff..9d32749a52 100755 --- a/projects/rocr-runtime/runtime/hsa-ext-finalize/CMakeLists.txt +++ b/projects/rocr-runtime/runtime/hsa-ext-finalize/CMakeLists.txt @@ -89,9 +89,9 @@ set ( CMAKE_CXX_FLAGS "-std=c++11 " ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-deprecated-declarations -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -fPIC" ) -if ( IS64BIT ) +if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" ) -else () +elseif ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86" ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" ) endif () diff --git a/projects/rocr-runtime/runtime/hsa-ext-image/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-ext-image/CMakeLists.txt index c1221dceaf..5326850085 100755 --- a/projects/rocr-runtime/runtime/hsa-ext-image/CMakeLists.txt +++ b/projects/rocr-runtime/runtime/hsa-ext-image/CMakeLists.txt @@ -88,9 +88,9 @@ set ( CMAKE_CXX_FLAGS "-std=c++11 " ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-deprecated-declarations -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -fPIC" ) -if ( IS64BIT ) +if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" ) -else () +elseif ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86" ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" ) endif () diff --git a/projects/rocr-runtime/runtime/hsa-runtime-tools/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-runtime-tools/CMakeLists.txt index 573be7c99f..09fdfd1176 100755 --- a/projects/rocr-runtime/runtime/hsa-runtime-tools/CMakeLists.txt +++ b/projects/rocr-runtime/runtime/hsa-runtime-tools/CMakeLists.txt @@ -76,9 +76,9 @@ set ( CMAKE_CXX_FLAGS "-std=c++11 ") set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type -Werror -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=sign-compare -Wno-error=enum-compare -Wno-sign-compare -Wno-write-strings -Wno-deprecated-declarations -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -fPIC" ) -if (IS64BIT) +if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" ) -else () +elseif ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86" ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" ) endif () diff --git a/projects/rocr-runtime/runtime/hsa-runtime/cmake_modules/hsa_common.cmake b/projects/rocr-runtime/runtime/hsa-runtime/cmake_modules/hsa_common.cmake index 97e9304888..0b907bf3d4 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/cmake_modules/hsa_common.cmake +++ b/projects/rocr-runtime/runtime/hsa-runtime/cmake_modules/hsa_common.cmake @@ -57,9 +57,9 @@ if(UNIX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpic") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--unresolved-symbols=ignore-in-shared-libs") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") - if ( IS64BIT ) + if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" ) - else () + elseif ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86" ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" ) endif () if ( "${CMAKE_BUILD_TYPE}" STREQUAL Debug ) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/util/timer.h b/projects/rocr-runtime/runtime/hsa-runtime/core/util/timer.h index f8e52beee1..65b9dfb6b1 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/util/timer.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/util/timer.h @@ -46,7 +46,7 @@ #include "core/util/utils.h" #include "core/util/os.h" #include - +#include #include namespace timer { @@ -144,8 +144,18 @@ class fast_clock { typedef uint64_t raw_rep; typedef double raw_frequency; +#ifdef __x86_64__ static __forceinline raw_rep raw_now() { return __rdtsc(); } static __forceinline raw_frequency raw_freq() { return freq; } +#endif +#ifdef __aarch64__ + static __forceinline raw_rep raw_now() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + return (raw_rep(ts.tv_sec) * 1000000000 + raw_rep(ts.tv_nsec)); + } + static __forceinline raw_frequency raw_freq() { return 1.e-9; } +#endif private: static double period_ps; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/util/utils.h b/projects/rocr-runtime/runtime/hsa-runtime/core/util/utils.h index 7652e30b00..880472af38 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/util/utils.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/util/utils.h @@ -54,9 +54,9 @@ typedef unsigned int uint; typedef uint64_t uint64; #if defined(__GNUC__) -#include "mm_malloc.h" #if defined(__i386__) || defined(__x86_64__) #include +#elif defined(__aarch64__) #else #error \ "Processor or compiler not identified. " \ @@ -70,9 +70,9 @@ typedef uint64_t uint64; #define __ALIGNED__(x) __attribute__((aligned(x))) static __forceinline void* _aligned_malloc(size_t size, size_t alignment) { - return _mm_malloc(size, alignment); + return aligned_alloc(alignment, size); } -static __forceinline void _aligned_free(void* ptr) { return _mm_free(ptr); } +static __forceinline void _aligned_free(void* ptr) { return free(ptr); } #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) #include "intrin.h" #define __ALIGNED__(x) __declspec(align(x))