Fix av::copy in dialects to use capture-by-value

Change-Id: Ibce1488a1326f66b92b4d5b351230666b691ed31
Цей коміт міститься в:
Ben Sander
2016-09-01 14:00:46 -05:00
джерело aa823871db
коміт fb7046160f
2 змінених файлів з 7 додано та 9 видалено
+2 -7
Переглянути файл
@@ -41,17 +41,12 @@ int main(int argc, char *argv[])
// Launch kernel onto AV.
// Because the kernel PFE and the copies are submitted to same AV, they will execute in order
// and we don't need additional synchronization to ensure the copies complete before the PFE begins.
#if 1
hc::completion_future cf=
hc::parallel_for_each(av, hc::extent<1> (sizeElements),
[&] (hc::index<1> idx) [[hc]] {
[=] (hc::index<1> idx) [[hc]] {
int i = idx[0];
//C_d[i] = A_d[i] + B_d[i];
C_d[0] = A_d[1] + B_d[2];
C_d[i] = A_d[i] + B_d[i];
});
cf.wait();
#endif
// This copy is in same AV as the kernel and thus will wait for the kernel to finish before executing.
+5 -2
Переглянути файл
@@ -27,22 +27,25 @@ int main(int argc, char *argv[])
hipMalloc(&B_d, sizeBytes);
hipMalloc(&C_d, sizeBytes);
// Initialize host data
// Initialize host memory
for (int i=0; i<sizeElements; i++) {
A_h[i] = 1.618f * i;
B_h[i] = 3.142f * i;
}
// H2D Copy
hipMemcpy(A_d, A_h, sizeBytes, hipMemcpyHostToDevice);
hipMemcpy(B_d, B_h, sizeBytes, hipMemcpyHostToDevice);
// Launch kernel onto default accelerator:
// Launch kernel onto default accelerator
int blockSize = 256; // pick arbitrary block size
int blocks = (sizeElements+blockSize-1)/blockSize; // round up to launch enough blocks
hipLaunchKernel(vadd_hip, dim3(blocks), dim3(blockSize), 0, 0, A_d, B_d, C_d, sizeElements);
// D2H Copy
hipMemcpy(C_h, C_d, sizeBytes, hipMemcpyDeviceToHost);
// Verify
for (int i=0; i<sizeElements; i++) {
float ref= 1.618f * i + 3.142f * i;
if (C_h[i] != ref) {