Partial fix async after kernel launch signal issue
Change-Id: Ib48d6564379160035bded9493b93663fba361710
[ROCm/hip commit: c756bb3398]
Bu işleme şunda yer alıyor:
@@ -445,6 +445,7 @@ private:
|
||||
|
||||
private:
|
||||
void enqueueBarrier(hsa_queue_t* queue, ihipSignal_t *depSignal);
|
||||
void enqueueBarrier(hsa_queue_t* queue, hsa_signal_t *depSignal);
|
||||
void waitCopy(LockedAccessor_StreamCrit_t &crit, ihipSignal_t *signal);
|
||||
|
||||
// The unsigned return is hipMemcpyKind
|
||||
|
||||
@@ -359,6 +359,42 @@ void ihipStream_t::enqueueBarrier(hsa_queue_t* queue, ihipSignal_t *depSignal)
|
||||
hsa_signal_store_relaxed(queue->doorbell_signal, index);
|
||||
}
|
||||
|
||||
void ihipStream_t::enqueueBarrier(hsa_queue_t* queue, hsa_signal_t *depSignal)
|
||||
{
|
||||
|
||||
// Obtain the write index for the command queue
|
||||
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
|
||||
const uint32_t queueMask = queue->size - 1;
|
||||
|
||||
// Define the barrier packet to be at the calculated queue index address
|
||||
hsa_barrier_and_packet_t* barrier = &(((hsa_barrier_and_packet_t*)(queue->base_address))[index&queueMask]);
|
||||
memset(barrier, 0, sizeof(hsa_barrier_and_packet_t));
|
||||
|
||||
// setup header
|
||||
uint16_t header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
|
||||
header |= 1 << HSA_PACKET_HEADER_BARRIER;
|
||||
//header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
|
||||
//header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
|
||||
barrier->header = header;
|
||||
|
||||
barrier->dep_signal[0].handle = 0;
|
||||
barrier->dep_signal[1].handle = 0;
|
||||
barrier->dep_signal[2].handle = 0;
|
||||
barrier->dep_signal[3].handle = 0;
|
||||
barrier->dep_signal[4].handle = 0;
|
||||
|
||||
hsa_signal_t signal;
|
||||
hsa_signal_create(1, 0, NULL, &signal);
|
||||
*depSignal = signal;
|
||||
barrier->completion_signal = signal;
|
||||
|
||||
// TODO - check queue overflow, return error:
|
||||
// Increment write index and ring doorbell to dispatch the kernel
|
||||
hsa_queue_store_write_index_relaxed(queue, index+1);
|
||||
hsa_signal_store_relaxed(queue->doorbell_signal, index);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//--
|
||||
//When the commands in a stream change types (ie kernel command follows a data command,
|
||||
@@ -429,15 +465,16 @@ int ihipStream_t::preCopyCommand(LockedAccessor_StreamCrit_t &crit, ihipSignal_t
|
||||
tprintf (DB_SYNC, "stream %p switch %s to %s (async copy dep on prev kernel)\n",
|
||||
this, ihipCommandName[crit->_last_command_type], ihipCommandName[copyType]);
|
||||
needSync = 1;
|
||||
hsa_signal_t *hsaSignal = (static_cast<hsa_signal_t*> (crit->_last_kernel_future.get_native_handle()));
|
||||
if (hsaSignal) {
|
||||
this->enqueueBarrier(static_cast<hsa_queue_t*>(_av.get_hsa_queue()), waitSignal);
|
||||
// hsa_signal_t *hsaSignal = (static_cast<hsa_signal_t*> (crit->_last_kernel_future.get_native_handle()));
|
||||
/* if (hsaSignal) {
|
||||
// Keep reference to the kernel future in order to keep the
|
||||
// dependent signal alive.
|
||||
_depFutures.push_back(crit->_last_kernel_future);
|
||||
*waitSignal = * hsaSignal;
|
||||
} else {
|
||||
assert(0); // if NULL signal, and we return 1, hsa_amd_memory_copy_async will fail. Confirm this never happens.
|
||||
}
|
||||
}*/
|
||||
} else if (crit->_last_copy_signal) {
|
||||
needSync = 1;
|
||||
tprintf (DB_SYNC, "stream %p switch %s to %s (async copy dep on other copy #%lu)\n",
|
||||
@@ -1364,7 +1401,6 @@ void ihipStream_t::setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType,
|
||||
void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const void* src, size_t sizeBytes, unsigned kind)
|
||||
{
|
||||
ihipDevice_t *device = this->getDevice();
|
||||
|
||||
if (device == NULL) {
|
||||
throw ihipException(hipErrorInvalidDevice);
|
||||
}
|
||||
@@ -1446,7 +1482,10 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
tprintf(DB_COPY1, "D2H && !dstTracked: staged copy D2H dst=%p src=%p sz=%zu\n", dst, src, sizeBytes);
|
||||
//printf ("staged-copy- read dep signals\n");
|
||||
device->_staging_buffer[1]->CopyDeviceToHost(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
|
||||
|
||||
if(crit->_last_command_type == ihipCommandKernel){
|
||||
std::cout<<"Destroying depSignal MemcpySync"<<std::endl;
|
||||
hsa_signal_destroy(depSignal);
|
||||
}
|
||||
// The copy completes before returning so can reset queue to empty:
|
||||
this->wait(crit, true);
|
||||
|
||||
@@ -1529,6 +1568,10 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
throw ihipException(hipErrorInvalidValue);
|
||||
}
|
||||
}
|
||||
if(crit->_last_command_type == ihipCommandKernel){
|
||||
hsa_signal_destroy(depSignal);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1602,6 +1645,9 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
|
||||
|
||||
hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, dstAgent, src, srcAgent, sizeBytes, depSignalCnt, depSignalCnt ? &depSignal:0x0, ihip_signal->_hsa_signal);
|
||||
|
||||
if (crit->_last_command_type == ihipCommandKernel) {
|
||||
hsa_signal_destroy(depSignal);
|
||||
}
|
||||
|
||||
if (hsa_status == HSA_STATUS_SUCCESS) {
|
||||
if (HIP_LAUNCH_BLOCKING) {
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Test for checking the functionality of
|
||||
* hipError_t hipDeviceSynchronize();
|
||||
*/
|
||||
|
||||
#include"hip_runtime.h"
|
||||
#include<iostream>
|
||||
|
||||
#define _SIZE sizeof(int)*1024*1024
|
||||
#define NUM_STREAMS 20
|
||||
#define ITER 1<<10
|
||||
|
||||
__global__ void Iter(hipLaunchParm lp, int *Ad, int num){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
if(tx == 0){
|
||||
for(int i = 0; i<num;i++){
|
||||
Ad[tx] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
int *A[NUM_STREAMS];
|
||||
int *Ad[NUM_STREAMS];
|
||||
hipStream_t stream[NUM_STREAMS];
|
||||
for(int i=0;i<NUM_STREAMS;i++){
|
||||
hipHostMalloc((void**)&A[i], _SIZE, hipHostMallocDefault);
|
||||
A[i][0] = 1;
|
||||
hipMalloc((void**)&Ad[i], _SIZE);
|
||||
hipStreamCreate(&stream[i]);
|
||||
}
|
||||
for(int i=0;i<NUM_STREAMS;i++){
|
||||
for(int j=0;j<ITER;j++){
|
||||
std::cout<<"Iter: "<<j<<std::endl;
|
||||
hipMemcpyAsync(Ad[i], A[i], _SIZE, hipMemcpyHostToDevice, stream[i]);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, stream[i], Ad[i], 1<<30);
|
||||
hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost, stream[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout<<"Waitin..."<<std::endl;
|
||||
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include"hip_runtime.h"
|
||||
#include<iostream>
|
||||
#include<time.h>
|
||||
|
||||
#define NUM_SIZE 8
|
||||
#define NUM_ITER 1 << 30
|
||||
static size_t size[NUM_SIZE];
|
||||
|
||||
__global__ void Add(hipLaunchParm lp, int *Ad){
|
||||
int tx = hipThreadIdx_x;
|
||||
Ad[tx] = Ad[tx] + tx;
|
||||
}
|
||||
|
||||
void setup(){
|
||||
for(int i=0;i<NUM_SIZE;i++){
|
||||
size[i] = 1<<(i+6); // start at 8 bytes
|
||||
}
|
||||
}
|
||||
|
||||
void valSet(int *A, int val, size_t size){
|
||||
size_t len = size/sizeof(int);
|
||||
for(int i=0;i<len;i++){
|
||||
A[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
setup();
|
||||
int *A, *Ad;
|
||||
for(int i=0;i<NUM_SIZE;i++){
|
||||
A = (int*)malloc(size[i]);
|
||||
valSet(A, 1, size[i]);
|
||||
hipMalloc(&Ad, size[i]);
|
||||
std::cout<<"Malloc success at size: "<<size[i]<<std::endl;
|
||||
|
||||
for(int j=0;j<NUM_ITER;j++){
|
||||
std::cout<<"Iter: "<<j<<std::endl;
|
||||
hipMemcpy(Ad, A, size[i], hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(Add, dim3(1), dim3(size[i]/sizeof(int)), 0, 0, Ad);
|
||||
hipMemcpy(A, Ad, size[i], hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Test for checking the functionality of
|
||||
* hipError_t hipDeviceSynchronize();
|
||||
*/
|
||||
|
||||
#include"hip_runtime.h"
|
||||
#include<iostream>
|
||||
|
||||
#define _SIZE sizeof(int)*1024*1024
|
||||
#define NUM_STREAMS 20
|
||||
#define ITER 1<<10
|
||||
|
||||
__global__ void Iter(hipLaunchParm lp, int *Ad, int num){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
if(tx == 0){
|
||||
for(int i = 0; i<num;i++){
|
||||
Ad[tx] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
int *A[NUM_STREAMS];
|
||||
int *Ad[NUM_STREAMS];
|
||||
hipStream_t stream[NUM_STREAMS];
|
||||
for(int i=0;i<NUM_STREAMS;i++){
|
||||
hipHostMalloc((void**)&A[i], _SIZE, hipHostMallocDefault);
|
||||
A[i][0] = 1;
|
||||
hipMalloc((void**)&Ad[i], _SIZE);
|
||||
}
|
||||
for(int i=0;i<NUM_STREAMS;i++){
|
||||
for(int j=0;j<ITER;j++){
|
||||
std::cout<<"Iter: "<<j<<std::endl;
|
||||
hipMemcpy(Ad[i], A[i], _SIZE, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, 0, Ad[i], 1<<30);
|
||||
hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout<<"Waitin..."<<std::endl;
|
||||
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle