include arch specific ir on fallback path
Change-Id: Ib04996aae2c21eb73ef2a9f6305915e0caccd704
[ROCm/hip commit: 27d2fc99ca]
Этот коммит содержится в:
@@ -153,6 +153,7 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
if ($target_gfx701 eq 0 and $target_gfx801 eq 0 and $target_gfx802 eq 0 and $target_gfx803 eq 0)
|
||||
{
|
||||
$HIPLDFLAGS .= " --amdgpu-target=gfx701 --amdgpu-target=gfx801 --amdgpu-target=gfx802 --amdgpu-target=gfx803";
|
||||
$ENV{HIP_HC_IR_GFX803}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
|
||||
}
|
||||
|
||||
# Add trace marker library:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
@@ -17,40 +17,16 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <hip/hip_fp16.h>
|
||||
#include "hip/hip_runtime_api.h"
|
||||
|
||||
#define DSIZE 4
|
||||
#define SCF 0.5f
|
||||
#define nTPB 256
|
||||
__global__ void half_scale_kernel(hipLaunchParm lp, float *din, float *dout, int dsize){
|
||||
|
||||
int idx = hipThreadIdx_x+ hipBlockDim_x*hipBlockIdx_x;
|
||||
if (idx < dsize){
|
||||
__half scf = __float2half(SCF);
|
||||
__half kin = __float2half(din[idx]);
|
||||
__half kout;
|
||||
|
||||
kout = __hmul(kin, scf);
|
||||
|
||||
// kout = cvt_float_to_half(cvt_half_to_float(kin)*cvt_half_to_float(scf));
|
||||
|
||||
dout[idx] = __half2float(kout);
|
||||
}
|
||||
__global__ void halfMath(hipLaunchParm lp, half *A, half *B, half *C) {
|
||||
int tx = hipThreadIdx_x;
|
||||
__half a = A[tx];
|
||||
|
||||
}
|
||||
|
||||
int main(){
|
||||
|
||||
float *hin, *hout, *din, *dout;
|
||||
hin = (float *)malloc(DSIZE*sizeof(float));
|
||||
hout = (float *)malloc(DSIZE*sizeof(float));
|
||||
for (int i = 0; i < DSIZE; i++) hin[i] = i;
|
||||
hipMalloc(&din, DSIZE*sizeof(float));
|
||||
hipMalloc(&dout, DSIZE*sizeof(float));
|
||||
hipMemcpy(din, hin, DSIZE*sizeof(float), hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(half_scale_kernel, dim3((DSIZE+nTPB-1)/nTPB),dim3(nTPB), 0, 0, din, dout, DSIZE);
|
||||
hipMemcpy(hout, dout, DSIZE*sizeof(float), hipMemcpyDeviceToHost);
|
||||
for (int i = 0; i < DSIZE; i++) printf("%f\n", hout[i]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user