Fix av::copy in dialects to use capture-by-value
Change-Id: Ibce1488a1326f66b92b4d5b351230666b691ed31
Цей коміт міститься в:
@@ -41,17 +41,12 @@ int main(int argc, char *argv[])
|
||||
// Launch kernel onto AV.
|
||||
// Because the kernel PFE and the copies are submitted to same AV, they will execute in order
|
||||
// and we don't need additional synchronization to ensure the copies complete before the PFE begins.
|
||||
#if 1
|
||||
hc::completion_future cf=
|
||||
hc::parallel_for_each(av, hc::extent<1> (sizeElements),
|
||||
[&] (hc::index<1> idx) [[hc]] {
|
||||
[=] (hc::index<1> idx) [[hc]] {
|
||||
int i = idx[0];
|
||||
//C_d[i] = A_d[i] + B_d[i];
|
||||
C_d[0] = A_d[1] + B_d[2];
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
});
|
||||
cf.wait();
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// This copy is in same AV as the kernel and thus will wait for the kernel to finish before executing.
|
||||
|
||||
@@ -27,22 +27,25 @@ int main(int argc, char *argv[])
|
||||
hipMalloc(&B_d, sizeBytes);
|
||||
hipMalloc(&C_d, sizeBytes);
|
||||
|
||||
// Initialize host data
|
||||
// Initialize host memory
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
A_h[i] = 1.618f * i;
|
||||
B_h[i] = 3.142f * i;
|
||||
}
|
||||
|
||||
// H2D Copy
|
||||
hipMemcpy(A_d, A_h, sizeBytes, hipMemcpyHostToDevice);
|
||||
hipMemcpy(B_d, B_h, sizeBytes, hipMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel onto default accelerator:
|
||||
// Launch kernel onto default accelerator
|
||||
int blockSize = 256; // pick arbitrary block size
|
||||
int blocks = (sizeElements+blockSize-1)/blockSize; // round up to launch enough blocks
|
||||
hipLaunchKernel(vadd_hip, dim3(blocks), dim3(blockSize), 0, 0, A_d, B_d, C_d, sizeElements);
|
||||
|
||||
// D2H Copy
|
||||
hipMemcpy(C_h, C_d, sizeBytes, hipMemcpyDeviceToHost);
|
||||
|
||||
// Verify
|
||||
for (int i=0; i<sizeElements; i++) {
|
||||
float ref= 1.618f * i + 3.142f * i;
|
||||
if (C_h[i] != ref) {
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача