Implement hip_hc.ll into HIP headers
Move all __hip_hc_ir_* functions from hip_hc.ll into HIP header as inline asm. Remove hip_hc.ll and build dependencies from HIP.
This commit is contained in:
@@ -237,7 +237,6 @@ endif()
|
||||
# Install hip_hcc if platform is hcc
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
install(TARGETS hip_hcc_static hip_hcc hip_device DESTINATION lib)
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_hc.ll DESTINATION lib)
|
||||
|
||||
# Install .hipInfo
|
||||
install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib)
|
||||
|
||||
@@ -254,8 +254,7 @@ if($HIP_PLATFORM eq "hcc"){
|
||||
}
|
||||
|
||||
if(($HIP_PLATFORM eq "hcc")){
|
||||
$ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n";
|
||||
$ENV{HIP_HC_IR_FILE}="";
|
||||
$ENV{HCC_EXTRA_LIBRARIES}="\n";
|
||||
}
|
||||
|
||||
if($HIP_PLATFORM eq "nvcc"){
|
||||
@@ -508,7 +507,7 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){
|
||||
print "No valid AMD GPU target was either specified or found. Please specify a valid target using --amdgpu-target=" and die();
|
||||
}
|
||||
|
||||
$ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n";
|
||||
$ENV{HCC_EXTRA_LIBRARIES}="\n";
|
||||
|
||||
if($HIP_PLATFORM eq "hcc") {
|
||||
$GPU_ARCH_OPT = " --amdgpu-target=";
|
||||
|
||||
@@ -34,11 +34,55 @@ THE SOFTWARE.
|
||||
typedef unsigned long ulong;
|
||||
typedef unsigned int uint;
|
||||
|
||||
extern "C" __device__ unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int);
|
||||
extern "C" __device__ signed int __hip_hc_ir_mul24_int(signed int, signed int);
|
||||
extern "C" __device__ signed int __hip_hc_ir_mulhi_int(signed int, signed int);
|
||||
extern "C" __device__ unsigned int __hip_hc_ir_umulhi_int(unsigned int, unsigned int);
|
||||
extern "C" __device__ unsigned int __hip_hc_ir_usad_int(unsigned int, unsigned int, unsigned int);
|
||||
extern "C" __device__ inline uint __hip_hc_ir_umul24_int(uint a, uint b) {
|
||||
// define i32 @__hip_hc_ir_umul24_int(i32 %a, i32 %b) #1 {
|
||||
// %1 = tail call i32 asm sideeffect "v_mul_u32_u24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
// ret i32 %1
|
||||
// }
|
||||
uint out;
|
||||
__asm volatile("v_mul_u32_u24 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b));
|
||||
return out;
|
||||
}
|
||||
|
||||
extern "C" __device__ inline int __hip_hc_ir_mul24_int(int a, int b) {
|
||||
// define i32 @__hip_hc_ir_mul24_int(i32 %a, i32 %b) #1 {
|
||||
// %1 = tail call i32 asm sideeffect "v_mul_i32_i24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
// ret i32 %1
|
||||
// }
|
||||
int out;
|
||||
__asm volatile("v_mul_i32_i24 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b));
|
||||
return out;
|
||||
}
|
||||
|
||||
extern "C" __device__ inline int __hip_hc_ir_mulhi_int(int a, int b) {
|
||||
// define i32 @__hip_hc_ir_mulhi_int(i32 %a, i32 %b) #1 {
|
||||
// %1 = tail call i32 asm sideeffect "v_mul_hi_i32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
// ret i32 %1
|
||||
// }
|
||||
int out;
|
||||
__asm volatile("v_mul_hi_i32 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b));
|
||||
return out;
|
||||
}
|
||||
|
||||
extern "C" __device__ inline uint __hip_hc_ir_umulhi_int(uint a, uint b) {
|
||||
// define i32 @__hip_hc_ir_umulhi_int(i32 %a, i32 %b) #1 {
|
||||
// %1 = tail call i32 asm sideeffect "v_mul_hi_u32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
// ret i32 %1
|
||||
// }
|
||||
uint out;
|
||||
__asm volatile("v_mul_hi_u32 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b));
|
||||
return out;
|
||||
}
|
||||
|
||||
extern "C" __device__ inline uint __hip_hc_ir_usad_int(uint a, uint b, uint c) {
|
||||
// define i32 @__hip_hc_ir_usad_int(i32 %a, i32 %b, i32 %c) #1 {
|
||||
// %1 = tail call i32 asm sideeffect "v_sad_u32 $0, $1, $2, $3","=v,v,v,v"(i32 %a, i32 %b, i32 %c)
|
||||
// ret i32 %1
|
||||
// }
|
||||
uint out;
|
||||
__asm volatile("v_sad_u32 %0, %1, %2, %3" : "=v"(out) : "v"(a), "v"(b), "v"(c));
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
Integer Intrinsics
|
||||
@@ -556,7 +600,7 @@ uint64_t __ballot64(int a) {
|
||||
// %b = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %a) #1
|
||||
// ret i64 %b
|
||||
// }
|
||||
__asm("v_cmp_ne_i32_e64 $0, 0, $1" : "=s"(s) : "v"(a));
|
||||
__asm("v_cmp_ne_i32_e64 %0, 0, %1" : "=s"(s) : "v"(a));
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ install(FILES @PROJECT_BINARY_DIR@/libhip_hcc.so DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/libhip_hcc_static.a DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/libhip_device.a DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib)
|
||||
install(FILES @hip_SOURCE_DIR@/src/hip_hc.ll DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip)
|
||||
install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip)
|
||||
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
target triple = "amdgcn--amdhsa"
|
||||
|
||||
define i32 @__hip_hc_ir_mul24_int(i32 %a, i32 %b) #1 {
|
||||
%1 = tail call i32 asm sideeffect "v_mul_i32_i24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @__hip_hc_ir_umul24_int(i32 %a, i32 %b) #1 {
|
||||
%1 = tail call i32 asm sideeffect "v_mul_u32_u24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @__hip_hc_ir_mulhi_int(i32 %a, i32 %b) #1 {
|
||||
%1 = tail call i32 asm sideeffect "v_mul_hi_i32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @__hip_hc_ir_umulhi_int(i32 %a, i32 %b) #1 {
|
||||
%1 = tail call i32 asm sideeffect "v_mul_hi_u32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @__hip_hc_ir_usad_int(i32 %a, i32 %b, i32 %c) #1 {
|
||||
%1 = tail call i32 asm sideeffect "v_sad_u32 $0, $1, $2, $3","=v,v,v,v"(i32 %a, i32 %b, i32 %c)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
attributes #1 = { alwaysinline nounwind }
|
||||
|
||||
Viittaa uudesa ongelmassa
Block a user