include arch specific ir on fallback path

Change-Id: Ib04996aae2c21eb73ef2a9f6305915e0caccd704


[ROCm/hip commit: 27d2fc99ca]
Этот коммит содержится в:
Aditya Atluri
2017-02-08 12:19:06 -06:00
родитель 82c0dcb03f
Коммит c692cd5d4a
2 изменённых файлов: 7 добавлений и 30 удалений
+1
Просмотреть файл
@@ -153,6 +153,7 @@ if ($HIP_PLATFORM eq "hcc") {
if ($target_gfx701 eq 0 and $target_gfx801 eq 0 and $target_gfx802 eq 0 and $target_gfx803 eq 0)
{
$HIPLDFLAGS .= " --amdgpu-target=gfx701 --amdgpu-target=gfx801 --amdgpu-target=gfx802 --amdgpu-target=gfx803";
$ENV{HIP_HC_IR_GFX803}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
}
# Add trace marker library:
+6 -30
Просмотреть файл
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -17,40 +17,16 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <stdio.h>
#include <iostream>
#include <hip/hip_fp16.h>
#include "hip/hip_runtime_api.h"
#define DSIZE 4
#define SCF 0.5f
#define nTPB 256
__global__ void half_scale_kernel(hipLaunchParm lp, float *din, float *dout, int dsize){
int idx = hipThreadIdx_x+ hipBlockDim_x*hipBlockIdx_x;
if (idx < dsize){
__half scf = __float2half(SCF);
__half kin = __float2half(din[idx]);
__half kout;
kout = __hmul(kin, scf);
// kout = cvt_float_to_half(cvt_half_to_float(kin)*cvt_half_to_float(scf));
dout[idx] = __half2float(kout);
}
__global__ void halfMath(hipLaunchParm lp, half *A, half *B, half *C) {
int tx = hipThreadIdx_x;
__half a = A[tx];
}
int main(){
float *hin, *hout, *din, *dout;
hin = (float *)malloc(DSIZE*sizeof(float));
hout = (float *)malloc(DSIZE*sizeof(float));
for (int i = 0; i < DSIZE; i++) hin[i] = i;
hipMalloc(&din, DSIZE*sizeof(float));
hipMalloc(&dout, DSIZE*sizeof(float));
hipMemcpy(din, hin, DSIZE*sizeof(float), hipMemcpyHostToDevice);
hipLaunchKernel(half_scale_kernel, dim3((DSIZE+nTPB-1)/nTPB),dim3(nTPB), 0, 0, din, dout, DSIZE);
hipMemcpy(hout, dout, DSIZE*sizeof(float), hipMemcpyDeviceToHost);
for (int i = 0; i < DSIZE; i++) printf("%f\n", hout[i]);
return 0;
}