enable hc_am example in hcc_ddialects example
Change-Id: Iec2f9eb05f95cb025c157fee8fd284aab844d1a2
[ROCm/hip commit: cd3a0a2d61]
Этот коммит содержится в:
@@ -1,8 +1,9 @@
|
||||
HCC_HOME?=/opt/rocm/hcc
|
||||
HCC = $(HCC_HOME)/bin/hcc
|
||||
|
||||
HCC_CFLAGS= `$(HCC_HOME)/bin/hcc-config --cxxflags`
|
||||
HCC_LDFLAGS= `$(HCC_HOME)/bin/hcc-config --ldflags`
|
||||
OPT=-O2
|
||||
HCC_CFLAGS= `$(HCC_HOME)/bin/hcc-config --cxxflags` ${OPT}
|
||||
HCC_LDFLAGS= `$(HCC_HOME)/bin/hcc-config --ldflags` ${OPT}
|
||||
|
||||
CPPAMP_CFLAGS= -std=c++amp -stdlib=libc++ -I$(HCC_HOME)/include
|
||||
CPPAMP_LDFLAGS= -std=c++amp -L$(HCC_HOME)/lib -Wl,--rpath=$(HCC_HOME)/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive
|
||||
@@ -19,7 +20,7 @@ $(error hcc_dialects requires hcc compiler and only runs on hcc platform)
|
||||
endif
|
||||
|
||||
|
||||
TARGETS=vadd_hc_arrayview vadd_hc_array vadd_amp_arrayview vadd_hip
|
||||
TARGETS=vadd_hc_arrayview vadd_hc_array vadd_hc_am vadd_amp_arrayview vadd_hip
|
||||
|
||||
all: $(TARGETS)
|
||||
|
||||
@@ -51,7 +52,7 @@ vadd_hc_array: vadd_hc_array.o
|
||||
vadd_hc_am.o: vadd_hc_am.cpp
|
||||
$(HCC) $(HCC_CFLAGS) -c $< -o $@
|
||||
vadd_hc_am: vadd_hc_am.o
|
||||
$(HCC) $(HCC_LDFLAGS) $< -o $@
|
||||
$(HCC) $(HCC_LDFLAGS) -lhc_am $< -o $@
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -32,23 +32,30 @@ int main(int argc, char *argv[])
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
A_h[i] = 1.618f * i;
|
||||
B_h[i] = 3.142f * i;
|
||||
C_h[i] = 0;
|
||||
}
|
||||
|
||||
av.copy(A_h, A_d); // C++ copy H2D
|
||||
av.copy(B_h, B_d); //C++ copy H2D
|
||||
av.copy(A_h, A_d, sizeBytes); // C++ copy H2D
|
||||
av.copy(B_h, B_d, sizeBytes); // C++ copy H2D
|
||||
|
||||
// Launch kernel onto AV.
|
||||
// Because the kernel PFE and the copies are submitted to same AV, they will execute in order
|
||||
// and we don't need additional synchronization to ensure the copies complete before the PFE begins.
|
||||
#if 1
|
||||
hc::completion_future cf=
|
||||
hc::parallel_for_each(av, hc::extent<1> (sizeElements),
|
||||
[&] (hc::index<1> idx) [[hc]] {
|
||||
int i = idx[0];
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
//C_d[i] = A_d[i] + B_d[i];
|
||||
C_d[0] = A_d[1] + B_d[2];
|
||||
});
|
||||
cf.wait();
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// This copy is in same AV as the kernel and thus will wait for the kernel to finish before executing.
|
||||
av.copy(C_d, C_h); // C++ copy D2H
|
||||
av.copy(C_d, C_h, sizeBytes); // C++ copy D2H
|
||||
|
||||
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
|
||||
Ссылка в новой задаче
Block a user