Partial fix async after kernel launch signal issue

Change-Id: Ib48d6564379160035bded9493b93663fba361710


[ROCm/hip commit: c756bb3398]
Bu işleme şunda yer alıyor:
Aditya Atluri
2016-07-23 14:54:20 -05:00
ebeveyn 2e5a6ab630
işleme 6f95ad0a8d
5 değiştirilmiş dosya ile 243 ekleme ve 5 silme
+1
Dosyayı Görüntüle
@@ -445,6 +445,7 @@ private:
private:
void enqueueBarrier(hsa_queue_t* queue, ihipSignal_t *depSignal);
void enqueueBarrier(hsa_queue_t* queue, hsa_signal_t *depSignal);
void waitCopy(LockedAccessor_StreamCrit_t &crit, ihipSignal_t *signal);
// The unsigned return is hipMemcpyKind
+51 -5
Dosyayı Görüntüle
@@ -359,6 +359,42 @@ void ihipStream_t::enqueueBarrier(hsa_queue_t* queue, ihipSignal_t *depSignal)
hsa_signal_store_relaxed(queue->doorbell_signal, index);
}
void ihipStream_t::enqueueBarrier(hsa_queue_t* queue, hsa_signal_t *depSignal)
{
// Obtain the write index for the command queue
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
const uint32_t queueMask = queue->size - 1;
// Define the barrier packet to be at the calculated queue index address
hsa_barrier_and_packet_t* barrier = &(((hsa_barrier_and_packet_t*)(queue->base_address))[index&queueMask]);
memset(barrier, 0, sizeof(hsa_barrier_and_packet_t));
// setup header
uint16_t header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
header |= 1 << HSA_PACKET_HEADER_BARRIER;
//header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
//header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
barrier->header = header;
barrier->dep_signal[0].handle = 0;
barrier->dep_signal[1].handle = 0;
barrier->dep_signal[2].handle = 0;
barrier->dep_signal[3].handle = 0;
barrier->dep_signal[4].handle = 0;
hsa_signal_t signal;
hsa_signal_create(1, 0, NULL, &signal);
*depSignal = signal;
barrier->completion_signal = signal;
// TODO - check queue overflow, return error:
// Increment write index and ring doorbell to dispatch the kernel
hsa_queue_store_write_index_relaxed(queue, index+1);
hsa_signal_store_relaxed(queue->doorbell_signal, index);
}
//--
//When the commands in a stream change types (ie kernel command follows a data command,
@@ -429,15 +465,16 @@ int ihipStream_t::preCopyCommand(LockedAccessor_StreamCrit_t &crit, ihipSignal_t
tprintf (DB_SYNC, "stream %p switch %s to %s (async copy dep on prev kernel)\n",
this, ihipCommandName[crit->_last_command_type], ihipCommandName[copyType]);
needSync = 1;
hsa_signal_t *hsaSignal = (static_cast<hsa_signal_t*> (crit->_last_kernel_future.get_native_handle()));
if (hsaSignal) {
this->enqueueBarrier(static_cast<hsa_queue_t*>(_av.get_hsa_queue()), waitSignal);
// hsa_signal_t *hsaSignal = (static_cast<hsa_signal_t*> (crit->_last_kernel_future.get_native_handle()));
/* if (hsaSignal) {
// Keep reference to the kernel future in order to keep the
// dependent signal alive.
_depFutures.push_back(crit->_last_kernel_future);
*waitSignal = * hsaSignal;
} else {
assert(0); // if NULL signal, and we return 1, hsa_amd_memory_copy_async will fail. Confirm this never happens.
}
}*/
} else if (crit->_last_copy_signal) {
needSync = 1;
tprintf (DB_SYNC, "stream %p switch %s to %s (async copy dep on other copy #%lu)\n",
@@ -1364,7 +1401,6 @@ void ihipStream_t::setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType,
void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const void* src, size_t sizeBytes, unsigned kind)
{
ihipDevice_t *device = this->getDevice();
if (device == NULL) {
throw ihipException(hipErrorInvalidDevice);
}
@@ -1446,7 +1482,10 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
tprintf(DB_COPY1, "D2H && !dstTracked: staged copy D2H dst=%p src=%p sz=%zu\n", dst, src, sizeBytes);
//printf ("staged-copy- read dep signals\n");
device->_staging_buffer[1]->CopyDeviceToHost(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
if(crit->_last_command_type == ihipCommandKernel){
std::cout<<"Destroying depSignal MemcpySync"<<std::endl;
hsa_signal_destroy(depSignal);
}
// The copy completes before returning so can reset queue to empty:
this->wait(crit, true);
@@ -1529,6 +1568,10 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
throw ihipException(hipErrorInvalidValue);
}
}
if(crit->_last_command_type == ihipCommandKernel){
hsa_signal_destroy(depSignal);
}
}
@@ -1602,6 +1645,9 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, dstAgent, src, srcAgent, sizeBytes, depSignalCnt, depSignalCnt ? &depSignal:0x0, ihip_signal->_hsa_signal);
if (crit->_last_command_type == ihipCommandKernel) {
hsa_signal_destroy(depSignal);
}
if (hsa_status == HSA_STATUS_SUCCESS) {
if (HIP_LAUNCH_BLOCKING) {
+64
Dosyayı Görüntüle
@@ -0,0 +1,64 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
* Test for checking the functionality of
* hipError_t hipDeviceSynchronize();
*/
#include"hip_runtime.h"
#include<iostream>
#define _SIZE sizeof(int)*1024*1024
#define NUM_STREAMS 20
#define ITER 1<<10
__global__ void Iter(hipLaunchParm lp, int *Ad, int num){
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
if(tx == 0){
for(int i = 0; i<num;i++){
Ad[tx] += 1;
}
}
}
int main(){
int *A[NUM_STREAMS];
int *Ad[NUM_STREAMS];
hipStream_t stream[NUM_STREAMS];
for(int i=0;i<NUM_STREAMS;i++){
hipHostMalloc((void**)&A[i], _SIZE, hipHostMallocDefault);
A[i][0] = 1;
hipMalloc((void**)&Ad[i], _SIZE);
hipStreamCreate(&stream[i]);
}
for(int i=0;i<NUM_STREAMS;i++){
for(int j=0;j<ITER;j++){
std::cout<<"Iter: "<<j<<std::endl;
hipMemcpyAsync(Ad[i], A[i], _SIZE, hipMemcpyHostToDevice, stream[i]);
hipLaunchKernel(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, stream[i], Ad[i], 1<<30);
hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost, stream[i]);
}
}
std::cout<<"Waitin..."<<std::endl;
hipDeviceSynchronize();
}
+64
Dosyayı Görüntüle
@@ -0,0 +1,64 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include"hip_runtime.h"
#include<iostream>
#include<time.h>
#define NUM_SIZE 8
#define NUM_ITER 1 << 30
static size_t size[NUM_SIZE];
__global__ void Add(hipLaunchParm lp, int *Ad){
int tx = hipThreadIdx_x;
Ad[tx] = Ad[tx] + tx;
}
void setup(){
for(int i=0;i<NUM_SIZE;i++){
size[i] = 1<<(i+6); // start at 8 bytes
}
}
void valSet(int *A, int val, size_t size){
size_t len = size/sizeof(int);
for(int i=0;i<len;i++){
A[i] = val;
}
}
int main(){
setup();
int *A, *Ad;
for(int i=0;i<NUM_SIZE;i++){
A = (int*)malloc(size[i]);
valSet(A, 1, size[i]);
hipMalloc(&Ad, size[i]);
std::cout<<"Malloc success at size: "<<size[i]<<std::endl;
for(int j=0;j<NUM_ITER;j++){
std::cout<<"Iter: "<<j<<std::endl;
hipMemcpy(Ad, A, size[i], hipMemcpyHostToDevice);
hipLaunchKernel(Add, dim3(1), dim3(size[i]/sizeof(int)), 0, 0, Ad);
hipMemcpy(A, Ad, size[i], hipMemcpyDeviceToHost);
}
hipDeviceSynchronize();
}
}
+63
Dosyayı Görüntüle
@@ -0,0 +1,63 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
* Test for checking the functionality of
* hipError_t hipDeviceSynchronize();
*/
#include"hip_runtime.h"
#include<iostream>
#define _SIZE sizeof(int)*1024*1024
#define NUM_STREAMS 20
#define ITER 1<<10
__global__ void Iter(hipLaunchParm lp, int *Ad, int num){
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
if(tx == 0){
for(int i = 0; i<num;i++){
Ad[tx] += 1;
}
}
}
int main(){
int *A[NUM_STREAMS];
int *Ad[NUM_STREAMS];
hipStream_t stream[NUM_STREAMS];
for(int i=0;i<NUM_STREAMS;i++){
hipHostMalloc((void**)&A[i], _SIZE, hipHostMallocDefault);
A[i][0] = 1;
hipMalloc((void**)&Ad[i], _SIZE);
}
for(int i=0;i<NUM_STREAMS;i++){
for(int j=0;j<ITER;j++){
std::cout<<"Iter: "<<j<<std::endl;
hipMemcpy(Ad[i], A[i], _SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, 0, Ad[i], 1<<30);
hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost);
}
}
std::cout<<"Waitin..."<<std::endl;
hipDeviceSynchronize();
}