SWDEV-323441 - support for default stream per thread

Change-Id: I0032da0357f5cffbf5e4ec4a02435d2a128a262b


[ROCm/clr commit: fc1f02bbed]
Этот коммит содержится в:
Sarbojit Sarkar
2022-04-13 06:35:25 +00:00
коммит произвёл Sarbojit Sarkar
родитель d4991641bd
Коммит e22f2e1714
9 изменённых файлов: 455 добавлений и 118 удалений
+34 -3
Просмотреть файл
@@ -34,6 +34,7 @@ THE SOFTWARE.
#endif
#if defined(__HIP_STREAM_PER_THREAD)
// Memory APIs
#define hipMemcpy __HIP_API_SPT(hipMemcpy)
#define hipMemcpyToSymbol __HIP_API_SPT(hipMemcpyToSymbol)
#define hipMemcpyFromSymbol __HIP_API_SPT(hipMemcpyFromSymbol)
@@ -44,10 +45,21 @@ THE SOFTWARE.
#define hipMemset __HIP_API_SPT(hipMemset)
#define hipMemset2D __HIP_API_SPT(hipMemset2D)
#define hipMemset3D __HIP_API_SPT(hipMemset3D)
#define hipMemcpyAsync __HIP_API_SPT(hipMemcpyAsync)
#define hipMemcpyAsync __HIP_API_SPT(hipMemcpyAsync)
#define hipLaunchKernel __HIP_API_SPT(hipLaunchKernel)
#define hipStreamSynchronize __HIP_API_SPT(hipStreamSynchronize)
// Stream APIs
#define hipStreamSynchronize __HIP_API_SPT(hipStreamSynchronize)
#define hipStreamQuery __HIP_API_SPT(hipStreamQuery)
#define hipStreamGetFlags __HIP_API_SPT(hipStreamGetFlags)
#define hipStreamGetPriority __HIP_API_SPT(hipStreamGetPriority)
#define hipStreamWaitEvent __HIP_API_SPT(hipStreamWaitEvent)
// Event APIs
#define hipEventRecord __HIP_API_SPT(hipEventRecord)
// Launch APIs
#define hipLaunchKernel __HIP_API_SPT(hipLaunchKernel)
#define hipLaunchCooperativeKernel __HIP_API_SPT(hipLaunchCooperativeKernel)
#endif
hipError_t hipMemcpy_spt(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind);
@@ -77,10 +89,29 @@ hipError_t hipMemset3D_spt(hipPitchedPtr pitchedDevPtr, int value, hipExtent ex
hipError_t hipMemcpyAsync_spt(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
hipStream_t stream);
hipError_t hipStreamQuery_spt(hipStream_t stream);
hipError_t hipStreamSynchronize_spt(hipStream_t stream);
hipError_t hipStreamGetPriority_spt(hipStream_t stream, int* priority);
hipError_t hipStreamWaitEvent_spt(hipStream_t stream, hipEvent_t event, unsigned int flags);
hipError_t hipStreamGetFlags_spt(hipStream_t stream, unsigned int* flags);
hipError_t hipLaunchCooperativeKernel_spt(const void* f,
dim3 gridDim, dim3 blockDim,
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream);
#ifdef __cplusplus
extern "C" {
#endif
hipError_t hipLaunchKernel_spt(const void* function_address,
dim3 numBlocks,
dim3 dimBlocks,
void** args,
size_t sharedMemBytes, hipStream_t stream);
#ifdef __cplusplus
}
#endif // extern "C"
#endif //HIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H
+19
Просмотреть файл
@@ -377,3 +377,22 @@ hipMemRelease
hipMemRetainAllocationHandle
hipMemSetAccess
hipMemUnmap
hipMemcpy_spt
hipMemcpyAsync_spt
hipStreamSynchronize_spt
hipMemcpyToSymbol_spt
hipMemcpyFromSymbol_spt
hipMemcpy2D_spt
hipMemcpy2DToArray_spt
hipMemcpy2DFromArray_spt
hipMemcpy3D_spt
hipMemset_spt
hipMemset2D_spt
hipMemset3D_spt
hipStreamQuery_spt
hipStreamGetFlags_spt
hipStreamGetPriority_spt
hipStreamWaitEvent_spt
hipEventRecord_spt
hipLaunchKernel_spt
hipLaunchCooperativeKernel_spt
+14 -9
Просмотреть файл
@@ -336,24 +336,29 @@ hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
HIP_RETURN(eStart->elapsedTime(*eStop, *ms), "Elapsed Time = ", *ms);
}
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
HIP_INIT_API(hipEventRecord, event, stream);
hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream) {
STREAM_CAPTURE(hipEventRecord, stream, event);
if (event == nullptr) {
HIP_RETURN(hipErrorInvalidHandle);
return hipErrorInvalidHandle;
}
hip::Event* e = reinterpret_cast<hip::Event*>(event);
amd::HostQueue* queue = hip::getQueue(stream);
if (g_devices[e->deviceId()]->devices()[0] != &queue->device()) {
HIP_RETURN(hipErrorInvalidHandle);
return hipErrorInvalidHandle;
}
return e->addMarker(stream, nullptr, true);
}
HIP_RETURN(e->addMarker(stream, nullptr, true));
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
HIP_INIT_API(hipEventRecord, event, stream);
HIP_RETURN(hipEventRecord_common(event, stream));
}
hipError_t hipEventRecord_spt(hipEvent_t event, hipStream_t stream) {
HIP_INIT_API(hipEventRecord, event, stream);
PER_THREAD_DEFAULT_STREAM(stream);
HIP_RETURN(hipEventRecord_common(event, stream));
}
hipError_t hipEventSynchronize(hipEvent_t event) {
+19
Просмотреть файл
@@ -385,3 +385,22 @@ hipMemRelease
hipMemRetainAllocationHandle
hipMemSetAccess
hipMemUnmap
hipMemcpy_spt
hipMemcpyAsync_spt
hipStreamSynchronize_spt
hipMemcpyToSymbol_spt
hipMemcpyFromSymbol_spt
hipMemcpy2D_spt
hipMemcpy2DToArray_spt
hipMemcpy2DFromArray_spt
hipMemcpy3D_spt
hipMemset_spt
hipMemset2D_spt
hipMemset3D_spt
hipStreamQuery_spt
hipStreamGetFlags_spt
hipStreamGetPriority_spt
hipStreamWaitEvent_spt
hipEventRecord_spt
hipLaunchKernel_spt
hipLaunchCooperativeKernel_spt
+25
Просмотреть файл
@@ -434,3 +434,28 @@ global:
local:
*;
} hip_5.0;
hip_5.2 {
global:
hipMemcpy_spt;
hipMemcpyAsync_spt;
hipStreamSynchronize_spt;
hipMemcpyToSymbol_spt;
hipMemcpyFromSymbol_spt;
hipMemcpy2D_spt;
hipMemcpy2DToArray_spt;
hipMemcpy2DFromArray_spt;
hipMemcpy3D_spt;
hipMemset_spt;
hipMemset2D_spt;
hipMemset3D_spt;
hipStreamQuery_spt;
hipStreamGetFlags_spt;
hipStreamGetPriority_spt;
hipStreamWaitEvent_spt;
hipEventRecord_spt;
hipLaunchKernel_spt;
hipLaunchCooperativeKernel_spt;
local:
*;
} hip_5.1;
+8 -2
Просмотреть файл
@@ -171,7 +171,7 @@ static amd::Monitor g_hipInitlock{"hipInit lock"};
// Sync APIs cannot be called when stream capture is active
#define CHECK_STREAM_CAPTURING() \
if (!g_captureStreams.empty()) { \
HIP_RETURN(hipErrorStreamCaptureImplicit); \
return hipErrorStreamCaptureImplicit; \
}
#define STREAM_CAPTURE(name, stream, ...) \
@@ -180,7 +180,7 @@ static amd::Monitor g_hipInitlock{"hipInit lock"};
reinterpret_cast<hip::Stream*>(stream)->GetCaptureStatus() == \
hipStreamCaptureStatusActive) { \
hipError_t status = capture##name(stream, ##__VA_ARGS__); \
HIP_RETURN(status); \
return status; \
}
#define EVENT_CAPTURE(name, event, ...) \
@@ -189,6 +189,11 @@ static amd::Monitor g_hipInitlock{"hipInit lock"};
HIP_RETURN(status); \
}
#define PER_THREAD_DEFAULT_STREAM(stream) \
if (stream == nullptr) { \
stream = getPerThreadDefaultStream(); \
}
namespace hc {
class accelerator;
class accelerator_view;
@@ -484,6 +489,7 @@ extern hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags);
extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset, size_t size = 0);
extern amd::Memory* getMemoryObjectWithOffset(const void* ptr, const size_t size);
extern void getStreamPerThread(hipStream_t& stream);
extern hipStream_t getPerThreadDefaultStream();
extern hipError_t ihipUnbindTexture(textureReference* texRef);
extern hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device);
+161 -37
Просмотреть файл
@@ -538,11 +538,27 @@ hipError_t hipFree(void* ptr) {
HIP_RETURN(ihipFree(ptr));
}
hipError_t hipMemcpy_common(void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind, hipStream_t stream = nullptr) {
CHECK_STREAM_CAPTURING();
amd::HostQueue* queue = nullptr;
if (stream != nullptr) {
queue = hip::getQueue(stream);
} else {
queue = hip::getNullStream();
}
return ihipMemcpy(dst, src, sizeBytes, kind, *queue);
}
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind);
CHECK_STREAM_CAPTURING();
amd::HostQueue* queue = hip::getNullStream();
HIP_RETURN_DURATION(ihipMemcpy(dst, src, sizeBytes, kind, *queue));
HIP_RETURN_DURATION(hipMemcpy_common(dst, src, sizeBytes, kind));
}
hipError_t hipMemcpy_spt(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind);
HIP_RETURN_DURATION(hipMemcpy_common(dst, src, sizeBytes, kind, getPerThreadDefaultStream()));
}
hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes,
@@ -1073,9 +1089,8 @@ inline hipError_t ihipMemcpySymbol_validate(const void* symbol, size_t sizeBytes
return hipSuccess;
}
hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes,
size_t offset, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpyToSymbol, symbol, src, sizeBytes, offset, kind);
hipError_t hipMemcpyToSymbol_common(const void* symbol, const void* src, size_t sizeBytes,
size_t offset, hipMemcpyKind kind, hipStream_t stream=nullptr) {
CHECK_STREAM_CAPTURING();
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
@@ -1086,23 +1101,48 @@ hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeByt
}
/* Copy memory from source to destination address */
HIP_RETURN_DURATION(hipMemcpy(device_ptr, src, sizeBytes, kind));
return hipMemcpy_common(device_ptr, src, sizeBytes, kind, stream);
}
hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes,
size_t offset, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpyToSymbol, symbol, src, sizeBytes, offset, kind);
HIP_RETURN_DURATION(hipMemcpyToSymbol_common(symbol, src, sizeBytes, offset, kind));
}
hipError_t hipMemcpyToSymbol_spt(const void* symbol, const void* src, size_t sizeBytes,
size_t offset, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpyToSymbol, symbol, src, sizeBytes, offset, kind);
HIP_RETURN_DURATION(hipMemcpyToSymbol_common(symbol, src, sizeBytes, offset, kind,
getPerThreadDefaultStream()));
}
hipError_t hipMemcpyFromSymbol_common(void* dst, const void* symbol, size_t sizeBytes,
size_t offset, hipMemcpyKind kind, hipStream_t stream=nullptr) {
CHECK_STREAM_CAPTURING();
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
hipError_t status = ihipMemcpySymbol_validate(symbol, sizeBytes, offset, sym_size, device_ptr);
if (status != hipSuccess) {
return status;
}
/* Copy memory from source to destination address */
return hipMemcpy_common(dst, device_ptr, sizeBytes, kind, stream);
}
hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t sizeBytes,
size_t offset, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpyFromSymbol, symbol, dst, sizeBytes, offset, kind);
CHECK_STREAM_CAPTURING();
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
HIP_RETURN_DURATION(hipMemcpyFromSymbol_common(dst, symbol, sizeBytes, offset, kind));
}
hipError_t status = ihipMemcpySymbol_validate(symbol, sizeBytes, offset, sym_size, device_ptr);
if (status != hipSuccess) {
return status;
}
/* Copy memory from source to destination address */
HIP_RETURN_DURATION(hipMemcpy(dst, device_ptr, sizeBytes, kind));
hipError_t hipMemcpyFromSymbol_spt(void* dst, const void* symbol, size_t sizeBytes,
size_t offset, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpyFromSymbol, symbol, dst, sizeBytes, offset, kind);
HIP_RETURN_DURATION(hipMemcpyFromSymbol_common(dst, symbol, sizeBytes, offset, kind,
getPerThreadDefaultStream()));
}
hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes,
@@ -1164,15 +1204,25 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dstDevice,
HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *hip::getQueue(nullptr)));
}
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
hipError_t hipMemcpyAsync_common(void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind, hipStream_t stream) {
HIP_INIT_API(hipMemcpyAsync, dst, src, sizeBytes, kind, stream);
STREAM_CAPTURE(hipMemcpyAsync, stream, dst, src, sizeBytes, kind);
amd::HostQueue* queue = hip::getQueue(stream);
return ihipMemcpy(dst, src, sizeBytes, kind, *queue, true);
}
HIP_RETURN_DURATION(ihipMemcpy(dst, src, sizeBytes, kind, *queue, true));
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind, hipStream_t stream) {
HIP_INIT_API(hipMemcpyAsync, dst, src, sizeBytes, kind, stream);
HIP_RETURN_DURATION(hipMemcpyAsync_common(dst, src, sizeBytes, kind, stream));
}
hipError_t hipMemcpyAsync_spt(void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind, hipStream_t stream) {
HIP_INIT_API(hipMemcpyAsync, dst, src, sizeBytes, kind, stream);
PER_THREAD_DEFAULT_STREAM(stream);
HIP_RETURN_DURATION(hipMemcpyAsync_common(dst, src, sizeBytes, kind, stream));
}
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, void* srcHost, size_t ByteCount,
@@ -1965,11 +2015,23 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
HIP_RETURN_DURATION(ihipMemcpyParam2D(pCopy, nullptr));
}
hipError_t hipMemcpy2D_common(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind, hipStream_t stream = nullptr) {
CHECK_STREAM_CAPTURING();
return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, stream);
}
hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy2D, dst, dpitch, src, spitch, width, height, kind);
CHECK_STREAM_CAPTURING();
HIP_RETURN_DURATION(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, nullptr));
HIP_RETURN_DURATION(hipMemcpy2D_common(dst, dpitch, src, spitch, width, height, kind));
}
hipError_t hipMemcpy2D_spt(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy2D, dst, dpitch, src, spitch, width, height, kind);
HIP_RETURN_DURATION(hipMemcpy2D_common(dst, dpitch, src, spitch, width, height, kind,
getPerThreadDefaultStream()));
}
hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
@@ -2008,14 +2070,25 @@ hipError_t ihipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, c
return ihipMemcpyParam2D(&desc, stream, isAsync);
}
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind);
hipError_t hipMemcpy2DToArray_common(hipArray* dst, size_t wOffset, size_t hOffset,
const void* src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind, hipStream_t stream=nullptr) {
CHECK_STREAM_CAPTURING();
if (spitch == 0) {
HIP_RETURN(hipErrorInvalidPitchValue);
}
return ihipMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, height, kind, stream);
}
HIP_RETURN_DURATION(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, height, kind, nullptr));
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind);
HIP_RETURN_DURATION(hipMemcpy2DToArray_common(dst, wOffset, hOffset, src, spitch, width, height, kind));
}
hipError_t hipMemcpy2DToArray_spt(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind);
HIP_RETURN_DURATION(hipMemcpy2DToArray_common(dst, wOffset, hOffset, src, spitch,
width, height, kind, getPerThreadDefaultStream()));
}
hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind) {
@@ -2217,10 +2290,19 @@ hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAs
return ihipMemcpyParam3D(&desc, stream, isAsync);
}
hipError_t hipMemcpy3D_common(const hipMemcpy3DParms* p, hipStream_t stream = nullptr) {
CHECK_STREAM_CAPTURING();
return ihipMemcpy3D(p, stream);
}
hipError_t hipMemcpy3D(const hipMemcpy3DParms* p) {
HIP_INIT_API(hipMemcpy3D, p);
CHECK_STREAM_CAPTURING();
HIP_RETURN_DURATION(ihipMemcpy3D(p, nullptr));
HIP_RETURN_DURATION(hipMemcpy3D_common(p));
}
hipError_t hipMemcpy3D_spt(const hipMemcpy3DParms* p) {
HIP_INIT_API(hipMemcpy3D, p);
HIP_RETURN_DURATION(hipMemcpy3D_common(p, getPerThreadDefaultStream()));
}
hipError_t hipMemcpy3DAsync(const hipMemcpy3DParms* p, hipStream_t stream) {
@@ -2357,10 +2439,19 @@ hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeByt
return hip_error;
}
hipError_t hipMemset_common(void* dst, int value, size_t sizeBytes, hipStream_t stream=nullptr) {
CHECK_STREAM_CAPTURING();
return ihipMemset(dst, value, sizeof(int8_t), sizeBytes, stream);
}
hipError_t hipMemset_spt(void* dst, int value, size_t sizeBytes) {
HIP_INIT_API(hipMemset, dst, value, sizeBytes);
HIP_RETURN(hipMemset_common(dst, value, sizeBytes, getPerThreadDefaultStream()));
}
hipError_t hipMemset(void* dst, int value, size_t sizeBytes) {
HIP_INIT_API(hipMemset, dst, value, sizeBytes);
CHECK_STREAM_CAPTURING();
HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, nullptr));
HIP_RETURN(hipMemset_common(dst, value, sizeBytes));
}
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) {
@@ -2490,12 +2581,24 @@ hipError_t ihipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent
return hipSuccess;
}
hipError_t hipMemset2D_common(void* dst, size_t pitch, int value, size_t width,
size_t height, hipStream_t stream=nullptr) {
CHECK_STREAM_CAPTURING();
return ihipMemset3D({dst, pitch, width, height}, value, {width, height, 1}, stream);
}
hipError_t hipMemset2D_spt(void* dst, size_t pitch, int value, size_t width, size_t height) {
HIP_INIT_API(hipMemset2D, dst, pitch, value, width, height);
hipStream_t stream = getPerThreadDefaultStream();
HIP_RETURN(hipMemset2D_common(dst, pitch, value, width, height, stream));
}
hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
HIP_INIT_API(hipMemset2D, dst, pitch, value, width, height);
CHECK_STREAM_CAPTURING();
HIP_RETURN(ihipMemset3D({dst, pitch, width, height}, value, {width, height, 1}, nullptr));
HIP_RETURN(hipMemset2D_common(dst, pitch, value, width, height));
}
hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value,
size_t width, size_t height, hipStream_t stream) {
HIP_INIT_API(hipMemset2DAsync, dst, pitch, value, width, height, stream);
@@ -2505,10 +2608,20 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value,
HIP_RETURN(ihipMemset3D({dst, pitch, width, height}, value, {width, height, 1}, stream, true));
}
hipError_t hipMemset3D_common(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream=nullptr) {
CHECK_STREAM_CAPTURING();
return ihipMemset3D(pitchedDevPtr, value, extent, stream);
}
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) {
HIP_INIT_API(hipMemset3D, pitchedDevPtr, value, extent);
CHECK_STREAM_CAPTURING();
HIP_RETURN(ihipMemset3D(pitchedDevPtr, value, extent, nullptr));
HIP_RETURN(hipMemset3D_common(pitchedDevPtr, value, extent));
}
hipError_t hipMemset3D_spt(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) {
HIP_INIT_API(hipMemset3D, pitchedDevPtr, value, extent);
hipStream_t stream = getPerThreadDefaultStream();
HIP_RETURN(hipMemset3D_common(pitchedDevPtr, value, extent,stream));
}
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream) {
@@ -2970,14 +3083,25 @@ hipError_t hipMemcpyArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffs
HIP_RETURN_DURATION(ihipMemcpy2DArrayToArray(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind, nullptr));
}
hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy2DFromArray, dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind);
hipError_t hipMemcpy2DFromArray_common(void* dst, size_t dpitch, hipArray_const_t src,
size_t wOffsetSrc, size_t hOffset, size_t width,
size_t height, hipMemcpyKind kind, hipStream_t stream=nullptr) {
CHECK_STREAM_CAPTURING();
if (dpitch == 0) {
HIP_RETURN(hipErrorInvalidPitchValue);
}
return ihipMemcpy2DFromArray(dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind, stream);
}
HIP_RETURN_DURATION(ihipMemcpy2DFromArray(dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind, nullptr));
hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy2DFromArray, dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind);
HIP_RETURN_DURATION(hipMemcpy2DFromArray_common(dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind));
}
hipError_t hipMemcpy2DFromArray_spt(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy2DFromArray, dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind);
hipStream_t stream = getPerThreadDefaultStream();
HIP_RETURN_DURATION(hipMemcpy2DFromArray_common(dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind, stream));
}
hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) {
+49 -11
Просмотреть файл
@@ -486,6 +486,18 @@ hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t globalWorkSizeX,
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
}
extern "C" hipError_t hipLaunchKernel_common(const void *hostFunction,
dim3 gridDim,
dim3 blockDim,
void** args,
size_t sharedMemBytes,
hipStream_t stream)
{
STREAM_CAPTURE(hipLaunchKernel, stream, hostFunction, gridDim, blockDim, args, sharedMemBytes);
return ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream,
nullptr, nullptr, 0);
}
extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
dim3 gridDim,
dim3 blockDim,
@@ -493,10 +505,20 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
size_t sharedMemBytes,
hipStream_t stream)
{
HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
STREAM_CAPTURE(hipLaunchKernel, stream, hostFunction, gridDim, blockDim, args, sharedMemBytes);
HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream,
nullptr, nullptr, 0));
HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
HIP_RETURN(hipLaunchKernel_common(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream));
}
extern "C" hipError_t hipLaunchKernel_spt(const void *hostFunction,
dim3 gridDim,
dim3 blockDim,
void** args,
size_t sharedMemBytes,
hipStream_t stream)
{
HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
PER_THREAD_DEFAULT_STREAM(stream);
HIP_RETURN(hipLaunchKernel_common(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream));
}
extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction,
@@ -513,13 +535,10 @@ extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction,
HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, startEvent, stopEvent, flags));
}
hipError_t hipLaunchCooperativeKernel(const void* f,
hipError_t hipLaunchCooperativeKernel_common(const void* f,
dim3 gridDim, dim3 blockDim,
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
{
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim,
sharedMemBytes, hStream);
if (!hip::isValid(hStream)) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -533,14 +552,33 @@ hipError_t hipLaunchCooperativeKernel(const void* f,
if (globalWorkSizeX > std::numeric_limits<uint32_t>::max() ||
globalWorkSizeY > std::numeric_limits<uint32_t>::max() ||
globalWorkSizeZ > std::numeric_limits<uint32_t>::max()) {
HIP_RETURN(hipErrorInvalidConfiguration);
return hipErrorInvalidConfiguration;
}
HIP_RETURN(ihipModuleLaunchKernel(func, static_cast<uint32_t>(globalWorkSizeX),
return ihipModuleLaunchKernel(func, static_cast<uint32_t>(globalWorkSizeX),
static_cast<uint32_t>(globalWorkSizeY),
static_cast<uint32_t>(globalWorkSizeZ),
blockDim.x, blockDim.y, blockDim.z,
sharedMemBytes, hStream, kernelParams, nullptr, nullptr, nullptr, 0,
amd::NDRangeKernelCommand::CooperativeGroups));
amd::NDRangeKernelCommand::CooperativeGroups);
}
hipError_t hipLaunchCooperativeKernel(const void* f,
dim3 gridDim, dim3 blockDim,
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
{
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim,
sharedMemBytes, hStream);
HIP_RETURN(hipLaunchCooperativeKernel_common(f,gridDim, blockDim, kernelParams, sharedMemBytes, hStream));
}
hipError_t hipLaunchCooperativeKernel_spt(const void* f,
dim3 gridDim, dim3 blockDim,
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
{
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim,
sharedMemBytes, hStream);
PER_THREAD_DEFAULT_STREAM(hStream);
HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes, hStream));
}
hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
+126 -56
Просмотреть файл
@@ -294,26 +294,45 @@ static hipError_t ihipStreamCreate(hipStream_t* stream,
// ================================================================================================
class stream_per_thread {
private:
hipStream_t m_stream;
std::vector<hipStream_t> m_streams;
public:
stream_per_thread():m_stream(nullptr) {}
stream_per_thread() {
m_streams.resize(g_devices.size());
for (auto &stream : m_streams) {
stream = nullptr;
}
}
stream_per_thread(const stream_per_thread& ) = delete;
void operator=(const stream_per_thread& ) = delete;
~stream_per_thread() {
if (m_stream != nullptr && hip::isValid(m_stream)) {
delete reinterpret_cast<hip::Stream*>(m_stream);
m_stream = nullptr;
for (auto &stream:m_streams) {
if (stream != nullptr && hip::isValid(stream)) {
delete reinterpret_cast<hip::Stream*>(stream);
stream = nullptr;
}
}
}
hipStream_t& get() {
hipStream_t get() {
hip::Device* device = hip::getCurrentDevice();
int currDev = device->deviceId();
// This is to make sure m_streams is not empty
if (m_streams.empty()) {
m_streams.resize(g_devices.size());
for (auto &stream : m_streams) {
stream = nullptr;
}
}
// There is a scenario where hipResetDevice destroys stream per thread
// hence isValid check is required to make sure only valid stream is used
if (m_stream == nullptr || !hip::isValid(m_stream)) {
hipError_t err = ihipStreamCreate(&m_stream, hipStreamDefault, hip::Stream::Priority::Normal);
assert(err == hipSuccess);
if (m_streams[currDev] == nullptr || !hip::isValid(m_streams[currDev])) {
hipError_t status = ihipStreamCreate(&m_streams[currDev], hipStreamDefault,
hip::Stream::Priority::Normal);
if (status != hipSuccess) {
DevLogError("Stream creation failed\n");
}
}
return m_stream;
return m_streams[currDev];
}
};
thread_local stream_per_thread streamPerThreadObj;
@@ -325,6 +344,15 @@ void getStreamPerThread(hipStream_t& stream) {
}
}
// ================================================================================================
hipStream_t getPerThreadDefaultStream() {
// Function to get per thread default stream
// More about the usecases yet to come
hipStream_t stream = hipStreamPerThread;
getStreamPerThread(stream);
return stream;
}
// ================================================================================================
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) {
HIP_INIT_API(hipStreamCreateWithFlags, stream, flags);
@@ -381,44 +409,53 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio
}
// ================================================================================================
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags) {
HIP_INIT_API(hipStreamGetFlags, stream, flags);
if (flags != nullptr) {
if (stream == nullptr) {
// hipStreamDefault
*flags = 0;
} else {
if (!hip::isValid(stream)) {
HIP_RETURN(hipErrorContextIsDestroyed);
}
*flags = reinterpret_cast<hip::Stream*>(stream)->Flags();
hipError_t hipStreamGetFlags_common(hipStream_t stream, unsigned int* flags) {
if ((flags != nullptr) && (stream != nullptr)) {
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
*flags = reinterpret_cast<hip::Stream*>(stream)->Flags();
} else {
HIP_RETURN(hipErrorInvalidValue);
return hipErrorInvalidValue;
}
HIP_RETURN(hipSuccess);
return hipSuccess;
}
// ================================================================================================
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags) {
HIP_INIT_API(hipStreamGetFlags, stream, flags);
HIP_RETURN(hipStreamGetFlags_common(stream, flags));
}
// ================================================================================================
hipError_t hipStreamGetFlags_spt(hipStream_t stream, unsigned int* flags) {
HIP_INIT_API(hipStreamGetFlags, stream, flags);
PER_THREAD_DEFAULT_STREAM(stream);
HIP_RETURN(hipStreamGetFlags_common(stream, flags));
}
// ================================================================================================
hipError_t hipStreamSynchronize_common(hipStream_t stream) {
if (!hip::isValid(stream)) {
HIP_RETURN(hipErrorContextIsDestroyed);
}
// Wait for the current host queue
hip::getQueue(stream)->finish();
return hipSuccess;
}
// ================================================================================================
hipError_t hipStreamSynchronize(hipStream_t stream) {
HIP_INIT_API(hipStreamSynchronize, stream);
HIP_RETURN(hipStreamSynchronize_common(stream));
}
if (!hip::isValid(stream)) {
HIP_RETURN(hipErrorContextIsDestroyed);
}
// Wait for the current host queue
hip::getQueue(stream)->finish();
// Make sure runtime releases memory for all memory pools on the device,
// associated with the queue
auto hip_stream = reinterpret_cast<hip::Stream*>(stream);
auto device = (hip_stream == nullptr) ? hip::getCurrentDevice() : hip_stream->GetDevice();
device->ReleaseFreedMemory(hip_stream);
HIP_RETURN(hipSuccess);
// ================================================================================================
hipError_t hipStreamSynchronize_spt(hipStream_t stream) {
HIP_INIT_API(hipStreamSynchronize, stream);
PER_THREAD_DEFAULT_STREAM(stream);
HIP_RETURN(hipStreamSynchronize_common(stream));
}
// ================================================================================================
@@ -463,33 +500,42 @@ void WaitThenDecrementSignal(hipStream_t stream, hipError_t status, void* user_d
}
// ================================================================================================
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) {
HIP_INIT_API(hipStreamWaitEvent, stream, event, flags);
hipError_t hipStreamWaitEvent_common(hipStream_t stream, hipEvent_t event, unsigned int flags) {
EVENT_CAPTURE(hipStreamWaitEvent, event, stream, flags);
if (event == nullptr) {
HIP_RETURN(hipErrorInvalidHandle);
return hipErrorInvalidHandle;
}
if (flags != 0) {
HIP_RETURN(hipErrorInvalidValue);
return hipErrorInvalidValue;
}
if (!hip::isValid(stream)) {
HIP_RETURN(hipErrorContextIsDestroyed);
return hipErrorContextIsDestroyed;
}
hip::Event* e = reinterpret_cast<hip::Event*>(event);
HIP_RETURN(e->streamWait(stream, flags));
return e->streamWait(stream, flags);
}
// ================================================================================================
hipError_t hipStreamQuery(hipStream_t stream) {
HIP_INIT_API(hipStreamQuery, stream);
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) {
HIP_INIT_API(hipStreamWaitEvent, stream, event, flags);
HIP_RETURN(hipStreamWaitEvent_common(stream, event, flags));
}
// ================================================================================================
hipError_t hipStreamWaitEvent_spt(hipStream_t stream, hipEvent_t event, unsigned int flags) {
HIP_INIT_API(hipStreamWaitEvent, stream, event, flags);
PER_THREAD_DEFAULT_STREAM(stream);
HIP_RETURN(hipStreamWaitEvent_common(stream, event, flags));
}
// ================================================================================================
hipError_t hipStreamQuery_common(hipStream_t stream) {
if (!hip::isValid(stream)) {
HIP_RETURN(hipErrorContextIsDestroyed);
return hipErrorContextIsDestroyed;
}
amd::HostQueue* hostQueue = hip::getQueue(stream);
@@ -497,7 +543,7 @@ hipError_t hipStreamQuery(hipStream_t stream) {
amd::Command* command = hostQueue->getLastQueuedCommand(true);
if (command == nullptr) {
// Nothing was submitted to the queue
HIP_RETURN(hipSuccess);
return hipSuccess;
}
amd::Event& event = command->event();
@@ -511,7 +557,20 @@ hipError_t hipStreamQuery(hipStream_t stream) {
}
hipError_t status = ready ? hipSuccess : hipErrorNotReady;
command->release();
HIP_RETURN(status);
return status;
}
// ================================================================================================
hipError_t hipStreamQuery(hipStream_t stream) {
HIP_INIT_API(hipStreamQuery, stream);
HIP_RETURN(hipStreamQuery_common(stream));
}
// ================================================================================================
hipError_t hipStreamQuery_spt(hipStream_t stream) {
HIP_INIT_API(hipStreamQuery, stream);
PER_THREAD_DEFAULT_STREAM(stream);
HIP_RETURN(hipStreamQuery_common(stream));
}
// ================================================================================================
@@ -586,24 +645,35 @@ hipError_t hipExtStreamCreateWithCUMask(hipStream_t* stream, uint32_t cuMaskSize
}
// ================================================================================================
hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) {
HIP_INIT_API(hipStreamGetPriority, stream, priority);
hipError_t hipStreamGetPriority_common(hipStream_t stream, int* priority) {
if ((priority != nullptr) && (stream == nullptr)) {
*priority = 0;
HIP_RETURN(hipSuccess);
return hipSuccess;
}
if ((priority != nullptr) && (stream != nullptr)) {
if (!hip::isValid(stream)) {
HIP_RETURN(hipErrorContextIsDestroyed);
return hipErrorContextIsDestroyed;
}
*priority = static_cast<int>(reinterpret_cast<hip::Stream*>(stream)->GetPriority());
} else {
HIP_RETURN(hipErrorInvalidValue);
return hipErrorInvalidValue;
}
HIP_RETURN(hipSuccess);
return hipSuccess;
}
// ================================================================================================
hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) {
HIP_INIT_API(hipStreamGetPriority, stream, priority);
HIP_RETURN(hipStreamGetPriority_common(stream, priority));
}
// ================================================================================================
hipError_t hipStreamGetPriority_spt(hipStream_t stream, int* priority) {
HIP_INIT_API(hipStreamGetPriority, stream, priority);
PER_THREAD_DEFAULT_STREAM(stream);
HIP_RETURN(hipStreamGetPriority_common(stream, priority));
}
// ================================================================================================