diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 370ac2abbb..d3211ed3f5 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -53,7 +53,7 @@ THE SOFTWARE. // define HIP_ENABLE_PRINTF to enable printf #ifdef HIP_ENABLE_PRINTF #define HCC_ENABLE_ACCELERATOR_PRINTF 1 -#endif +#endif //--- // Remainder of this file only compiles with HCC @@ -481,7 +481,7 @@ do {\ type* var = \ (type*)__get_dynamicgroupbaseptr(); \ -#define HIP_DYNAMIC_SHARED_ATTRIBUTE +#define HIP_DYNAMIC_SHARED_ATTRIBUTE diff --git a/src/hip_event.cpp b/src/hip_event.cpp index 58d764bc44..3664e88d2b 100644 --- a/src/hip_event.cpp +++ b/src/hip_event.cpp @@ -31,12 +31,9 @@ THE SOFTWARE. ihipEvent_t::ihipEvent_t(unsigned flags) + : _criticalData(this) { - _state = hipEventStatusCreated; - _stream = NULL; _flags = flags; - _timestamp = 0; - _type = hipEventTypeIndependent; }; @@ -45,56 +42,45 @@ ihipEvent_t::ihipEvent_t(unsigned flags) void ihipEvent_t::attachToCompletionFuture(const hc::completion_future *cf, hipStream_t stream, ihipEventType_t eventType) { - _marker = *cf; - _type = eventType; - _stream = stream; - _state = hipEventStatusRecording; + LockedAccessor_EventCrit_t crit(_criticalData); + crit->_eventData.marker(*cf); + crit->_eventData._type = eventType; + crit->_eventData._stream = stream; + crit->_eventData._state = hipEventStatusRecording; } -void ihipEvent_t::refereshEventStatus() +std::pair +ihipEvent_t::refreshEventStatus() { - bool isReady0 = locked_isReady(); - bool isReady1; - int val = 0; - if (_state == hipEventStatusRecording) { - // TODO - use completion-future functions to obtain ticks and timestamps: - hsa_signal_t *sig = static_cast (_marker.get_native_handle()); - isReady1 = locked_isReady(); - if (sig) { - val = hsa_signal_load_acquire(*sig); - if (val == 0) { + auto ecd = locked_copyCrit(); + if (ecd._state == hipEventStatusRecording) { + bool isReady1 = ecd._stream->locked_eventIsReady(this); + if (isReady1) { + LockedAccessor_EventCrit_t eCrit(_criticalData); - if ((_type == hipEventTypeIndependent) || (_type == hipEventTypeStopCommand)) { - _timestamp = _marker.get_end_tick(); - } else if (_type == hipEventTypeStartCommand) { - _timestamp = _marker.get_begin_tick(); - } else { - assert(0); // TODO - move to debug assert - _timestamp = 0; - } - - _state = hipEventStatusComplete; + if ((eCrit->_eventData._type == hipEventTypeIndependent) || + (eCrit->_eventData._type == hipEventTypeStopCommand)) { + eCrit->_eventData._timestamp = eCrit->_eventData.marker().get_end_tick(); + } else if (eCrit->_eventData._type == hipEventTypeStartCommand) { + eCrit->_eventData._timestamp = eCrit->_eventData.marker().get_begin_tick(); + } else { + eCrit->_eventData._timestamp = 0; + assert(0); // TODO - move to debug assert } + + eCrit->_eventData._state = hipEventStatusComplete; + + return std::pair (eCrit->_eventData._state, eCrit->_eventData._timestamp); } - } + } - if (_state != hipEventStatusComplete) { - //printf (" not ready isReady0=%d val=%d isReady1=%d\n", isReady0, val, isReady1); - } + // Not complete path here: + return std::pair (ecd._state, ecd._timestamp); } -bool ihipEvent_t::locked_isReady() -{ - return _stream->locked_eventIsReady(this); -} - -void ihipEvent_t::locked_waitComplete(hc::hcWaitMode waitMode) -{ - return _stream->locked_eventWaitComplete(this, waitMode); -} hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) @@ -136,33 +122,44 @@ hipError_t hipEventCreate(hipEvent_t* event) return ihipLogStatus(ihipEventCreate(event, 0)); } + hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_INIT_SPECIAL_API(TRACE_SYNC, event, stream); - if (event && event->_state != hipEventStatusUnitialized) { + auto ecd = event->locked_copyCrit(); + + if (event && ecd._state != hipEventStatusUnitialized) { stream = ihipSyncAndResolveStream(stream); - event->_stream = stream; - if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) { - // TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0 - + // // If default stream , then wait on all queues. ihipCtx_t *ctx = ihipGetTlsDefaultCtx(); ctx->locked_syncDefaultStream(true, true); - event->_timestamp = hc::get_system_ticks(); - event->_state = hipEventStatusComplete; + { + LockedAccessor_EventCrit_t eCrit(event->criticalData()); + eCrit->_eventData.marker(hc::completion_future()); // reset event + eCrit->_eventData._stream = stream; + eCrit->_eventData._timestamp = hc::get_system_ticks(); + eCrit->_eventData._state = hipEventStatusComplete; + } return ihipLogStatus(hipSuccess); } else { - // Clear timestamps - event->_timestamp = 0; - // Record the event in the stream: - stream->locked_recordEvent(event); - event->_state = hipEventStatusRecording; + // Keep a copy outside the critical section so we lock stream first, then event - to avoid deadlock + hc::completion_future cf = stream->locked_recordEvent(event); + + { + LockedAccessor_EventCrit_t eCrit(event->criticalData()); + eCrit->_eventData.marker(cf); + eCrit->_eventData._stream = stream; + eCrit->_eventData._timestamp = 0; + eCrit->_eventData._state = hipEventStatusRecording; + } + return ihipLogStatus(hipSuccess); } } else { @@ -170,15 +167,13 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) } } + hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(event); if (event) { - event->_state = hipEventStatusUnitialized; - delete event; - event = NULL; return ihipLogStatus(hipSuccess); } else { @@ -193,20 +188,24 @@ hipError_t hipEventSynchronize(hipEvent_t event) if (!(event->_flags & hipEventReleaseToSystem)) { tprintf(DB_WARN, "hipEventSynchronize on event without system-scope fence ; consider creating with hipEventReleaseToSystem\n"); } + auto ecd = event->locked_copyCrit(); if (event) { - if (event->_state == hipEventStatusUnitialized) { + if (ecd._state == hipEventStatusUnitialized) { return ihipLogStatus(hipErrorInvalidResourceHandle); - } else if (event->_state == hipEventStatusCreated ) { + } else if (ecd._state == hipEventStatusCreated ) { // Created but not actually recorded on any device: return ihipLogStatus(hipSuccess); - } else if (HIP_SYNC_NULL_STREAM && (event->_stream->isDefaultStream() )) { + } else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream() )) { auto *ctx = ihipGetTlsDefaultCtx(); // TODO-HIP_SYNC_NULL_STREAM - can remove this code ctx->locked_syncDefaultStream(true, true); return ihipLogStatus(hipSuccess); } else { - event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); + ecd._stream->locked_eventWaitComplete( + ecd.marker(), + (event->_flags & hipEventBlockingSync) ? + hc::hcWaitModeBlocked : hc::hcWaitModeActive); return ihipLogStatus(hipSuccess); } @@ -223,44 +222,50 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) *ms = 0.0f; - if ((start == nullptr) || - (start->_flags & hipEventDisableTiming) || - (start->_state == hipEventStatusUnitialized) || (start->_state == hipEventStatusCreated) || - (stop == nullptr) || - (stop->_flags & hipEventDisableTiming) || - ( stop->_state == hipEventStatusUnitialized) || ( stop->_state == hipEventStatusCreated)) { - - // Both events must be at least recorded else return hipErrorInvalidResourceHandle - + if ((start == nullptr) || (stop == nullptr)) { status = hipErrorInvalidResourceHandle; - } else { - // Refresh status, if still recording... - start->refereshEventStatus(); - stop->refereshEventStatus(); + + auto startEcd = start->locked_copyCrit(); + auto stopEcd = stop->locked_copyCrit(); - if ((start->_state == hipEventStatusComplete) && (stop->_state == hipEventStatusComplete)) { - // Common case, we have good information for both events. + if ((start->_flags & hipEventDisableTiming) || + (startEcd._state == hipEventStatusUnitialized) || (startEcd._state == hipEventStatusCreated) || + (stop->_flags & hipEventDisableTiming) || + (stopEcd._state == hipEventStatusUnitialized) || (stopEcd._state == hipEventStatusCreated)) { - int64_t tickDiff = (stop->timestamp() - start->timestamp()); + // Both events must be at least recorded else return hipErrorInvalidResourceHandle + + status = hipErrorInvalidResourceHandle; - uint64_t freqHz; - hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz); - if (freqHz) { - *ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f; - status = hipSuccess; } else { - * ms = 0.0f; - status = hipErrorInvalidValue; - } + // Refresh status, if still recording... + + auto startStatus = start->refreshEventStatus(); // pair < state, timestamp > + auto stopStatus = stop->refreshEventStatus(); // pair < state, timestamp > + + if ((startStatus.first == hipEventStatusComplete) && (stopStatus.first == hipEventStatusComplete)) { + // Common case, we have good information for both events. 'second" is the timestamp: + int64_t tickDiff = (stopStatus.second - startStatus.second); + + uint64_t freqHz; + hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz); + if (freqHz) { + *ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f; + status = hipSuccess; + } else { + * ms = 0.0f; + status = hipErrorInvalidValue; + } - } else if ((start->_state == hipEventStatusRecording) || - (stop->_state == hipEventStatusRecording)) { + } else if ((startStatus.first == hipEventStatusRecording) || + (stopStatus.first == hipEventStatusRecording)) { - status = hipErrorNotReady; - } else { + status = hipErrorNotReady; + } else { assert(0); + } } } @@ -275,7 +280,9 @@ hipError_t hipEventQuery(hipEvent_t event) tprintf(DB_WARN, "hipEventQuery on event without system-scope fence ; consider creating with hipEventReleaseToSystem\n"); } - if ((event->_state == hipEventStatusRecording) && !event->locked_isReady()) { + auto ecd = event->locked_copyCrit(); + + if ((ecd._state == hipEventStatusRecording) && !ecd._stream->locked_eventIsReady(event)) { return ihipLogStatus(hipErrorNotReady); } else { return ihipLogStatus(hipSuccess); diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 7b59e96975..2d2d2745fe 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -339,12 +339,11 @@ void ihipStream_t::locked_wait() // Causes current stream to wait for specified event to complete: // Note this does not provide any kind of host serialization. -void ihipStream_t::locked_streamWaitEvent(hipEvent_t event) +void ihipStream_t::locked_streamWaitEvent(ihipEventData_t &ecd) { LockedAccessor_StreamCrit_t crit(_criticalData); - - crit->_av.create_blocking_marker(event->marker(), hc::accelerator_scope); + crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope); } @@ -352,24 +351,28 @@ void ihipStream_t::locked_streamWaitEvent(hipEvent_t event) // Note this does not provide any kind of host serialization. bool ihipStream_t::locked_eventIsReady(hipEvent_t event) { + // Event query that returns "Complete" may cause HCC to manipulate // internal queue state so lock the stream's queue here. - LockedAccessor_StreamCrit_t crit(_criticalData); + LockedAccessor_StreamCrit_t scrit(_criticalData); - return (event->marker().is_ready()); + LockedAccessor_EventCrit_t ecrit(event->criticalData()); + + return (ecrit->_eventData.marker().is_ready()); } -void ihipStream_t::locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode) +// Waiting on event can cause HCC to reclaim stream resources - so need to lock the stream. +void ihipStream_t::locked_eventWaitComplete(hc::completion_future &marker, hc::hcWaitMode waitMode) { LockedAccessor_StreamCrit_t crit(_criticalData); - event->marker().wait(waitMode); + marker.wait(waitMode); } // Create a marker in this stream. // Save state in the event so it can track the status of the event. -void ihipStream_t::locked_recordEvent(hipEvent_t event) +hc::completion_future ihipStream_t::locked_recordEvent(hipEvent_t event) { // Lock the stream to prevent simultaneous access LockedAccessor_StreamCrit_t crit(_criticalData); @@ -385,7 +388,7 @@ void ihipStream_t::locked_recordEvent(hipEvent_t event) scopeFlag = HIP_EVENT_SYS_RELEASE ? hc::system_scope : hc::accelerator_scope; } - event->marker(crit->_av.create_marker(scopeFlag)); + return crit->_av.create_marker(scopeFlag); }; //============================================================================= diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 32fc4686fa..6b51f5c202 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -137,6 +137,7 @@ extern std::vector g_dbStopTriggers; class ihipStream_t; class ihipDevice_t; class ihipCtx_t; +struct ihipEventData_t; // Color defs for debug messages: #define KNRM "\x1B[0m" @@ -152,10 +153,12 @@ extern const char *API_COLOR; extern const char *API_COLOR_END; -// If set, thread-safety is enforced on all stream functions. -// Stream functions will acquire a mutex before entering critical sections. -#define STREAM_THREAD_SAFE 1 +// If set, thread-safety is enforced on all event/stream/ctx/device functions. +// Can disable for performance or functional experiments - in this case +// the code uses a dummy "no-op" mutex. +#define EVENT_THREAD_SAFE 1 +#define STREAM_THREAD_SAFE 1 #define CTX_THREAD_SAFE 1 @@ -390,6 +393,12 @@ class FakeMutex void unlock() { } }; +#if EVENT_THREAD_SAFE +typedef std::mutex EventMutex; +#else +#warning "Stream thread-safe disabled" +typedef FakeMutex EventMutex; +#endif #if STREAM_THREAD_SAFE typedef std::mutex StreamMutex; @@ -540,11 +549,11 @@ public: hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); }; - void locked_streamWaitEvent(hipEvent_t event); - void locked_recordEvent(hipEvent_t event); + void locked_streamWaitEvent(ihipEventData_t & event); + hc::completion_future locked_recordEvent(hipEvent_t event); bool locked_eventIsReady(hipEvent_t event); - void locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode); + void locked_eventWaitComplete(hc::completion_future &marker, hc::hcWaitMode waitMode); ihipStreamCritical_t &criticalData() { return _criticalData; }; @@ -628,32 +637,76 @@ enum ihipEventType_t { hipEventTypeStopCommand, }; + +struct ihipEventData_t +{ + ihipEventData_t() { + _state = hipEventStatusCreated; + _stream = NULL; + _timestamp = 0; + _type = hipEventTypeIndependent; + }; + + void marker(const hc::completion_future & marker) { _marker = marker; }; + hc::completion_future & marker() { return _marker; } + uint64_t timestamp() const { return _timestamp; } ; + ihipEventType_t type() const { return _type; }; + + ihipEventType_t _type; + hipEventStatus_t _state; + hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual stream when recorded + uint64_t _timestamp; // store timestamp, may be set on host or by marker. +private: + hc::completion_future _marker; +}; + + +//============================================================================= +//class ihipEventCriticalBase_t +template +class ihipEventCriticalBase_t : LockedBase +{ +public: + ihipEventCriticalBase_t(const ihipEvent_t *parentEvent) : + _parent(parentEvent) + {} + ~ihipEventCriticalBase_t() {}; + + // Keep data in structure so it can be easily copied into snapshots + // (used to reduce lock contention and preserve correct lock order) + ihipEventData_t _eventData; + +private: + const ihipEvent_t *_parent; + friend class LockedAccessor; +}; + +typedef ihipEventCriticalBase_t ihipEventCritical_t; + +typedef LockedAccessor LockedAccessor_EventCrit_t; + // internal hip event structure. class ihipEvent_t { public: ihipEvent_t(unsigned flags); void attachToCompletionFuture(const hc::completion_future *cf, hipStream_t stream, ihipEventType_t eventType); - void refereshEventStatus(); - hc::completion_future & marker() { return _marker; } - void marker(hc::completion_future cf) { _marker = cf; }; + std::pair refreshEventStatus(); // returns pair - bool locked_isReady(); - void locked_waitComplete(hc::hcWaitMode waitMode); - uint64_t timestamp() const { return _timestamp; } ; - ihipEventType_t type() const { return _type; }; + // Return a copy of the critical state. The critical data is locked during the copy. + ihipEventData_t locked_copyCrit() { + LockedAccessor_EventCrit_t crit(_criticalData); + return _criticalData._eventData; + }; + + ihipEventCritical_t &criticalData() { return _criticalData; }; public: - hipEventStatus_t _state; - - hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual stream when recorded unsigned _flags; - private: - hc::completion_future _marker; - ihipEventType_t _type; - uint64_t _timestamp; // store timestamp, may be set on host or by marker. + ihipEventCritical_t _criticalData; + friend hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream); } ; @@ -671,7 +724,6 @@ public: }; ~ihipDeviceCriticalBase_t() { - } // Contexts: diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 5a4b5f4b4e..8a5225d499 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -69,7 +69,7 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags) if (shareWithAll) { hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt+1, g_allAgents, NULL, ptr); - tprintf (DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt); + tprintf (DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt); if (s != HSA_STATUS_SUCCESS) { ret = -1; } @@ -126,7 +126,7 @@ void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, bool if (HIP_INIT_ALLOC != -1) { // TODO , dont' call HIP API directly here: hipMemset(ptr, HIP_INIT_ALLOC, sizeBytes); - } + } if (ptr != nullptr) { int r = sharePtr(ptr, ctx, shareWithAll, hipFlags); @@ -255,7 +255,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) hip_status = hipErrorMemoryAllocation; } - } + } return ihipLogStatus(hip_status); @@ -288,10 +288,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) } - const unsigned supportedFlags = hipHostMallocPortable - | hipHostMallocMapped - | hipHostMallocWriteCombined - | hipHostMallocCoherent + const unsigned supportedFlags = hipHostMallocPortable + | hipHostMallocMapped + | hipHostMallocWriteCombined + | hipHostMallocCoherent | hipHostMallocNonCoherent; @@ -304,7 +304,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) hip_status = hipErrorInvalidValue; } else { auto device = ctx->getWriteableDevice(); - + unsigned amFlags = 0; if (flags & hipHostMallocCoherent) { amFlags = amHostCoherent; @@ -585,7 +585,7 @@ hipError_t hipMalloc3DArray(hipArray_t *array, hsa_ext_image_data_info_t imageInfo; hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo); size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment; - + *ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0, alignment); if (size && (*ptr == NULL)) { diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 7799ad86c8..38411f2347 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -27,6 +27,7 @@ THE SOFTWARE. #include #include #include +#include #include #include @@ -613,6 +614,125 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); } +namespace +{ + struct Agent_global { + std::string name; + hipDeviceptr_t address; + std::uint32_t byte_cnt; + }; + + inline + void* address(hsa_executable_symbol_t x) + { + void* r = nullptr; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r); + + return r; + } + + inline + std::string name(hsa_executable_symbol_t x) + { + uint32_t sz = 0u; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz); + + std::string r(sz, '\0'); + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front()); + + return r; + } + + inline + std::uint32_t size(hsa_executable_symbol_t x) + { + std::uint32_t r = 0; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r); + + return r; + } + + inline + void track(const Agent_global& x) + { + tprintf( + DB_MEM, + " add variable '%s' with ptr=%p size=%u to tracker\n", + x.name.c_str(), + x.address, + x.byte_cnt); + + auto device = ihipGetTlsDefaultCtx()->getWriteableDevice(); + + hc::AmPointerInfo ptr_info( + nullptr, + x.address, + x.address, + x.byte_cnt, + device->_acc, + true, + false); + hc::am_memtracker_add(x.address, ptr_info); + hc::am_memtracker_update(x.address, device->_deviceId, 0u); + } + + template> + inline + hsa_status_t copy_agent_global_variables( + hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* out) + { + assert(out); + + hsa_symbol_kind_t t = {}; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &t); + + if (t == HSA_SYMBOL_KIND_VARIABLE) { + static_cast(out)->push_back( + Agent_global{name(x), address(x), size(x)}); + + track(static_cast(out)->back()); + } + + return HSA_STATUS_SUCCESS; + } + + inline + hsa_agent_t this_agent() + { + auto ctx = ihipGetTlsDefaultCtx(); + + if (!ctx) throw std::runtime_error{"No active HIP context."}; + + auto device = ctx->getDevice(); + + if (!device) throw std::runtime_error{"No device available for HIP."}; + + ihipDevice_t *currentDevice = ihipGetDevice(device->_deviceId); + + if (!currentDevice) { + throw std::runtime_error{"No active device for HIP"}; + } + + return currentDevice->_hsaAgent; + } + + inline + std::vector read_agent_globals(hipModule_t hmodule) + { + std::vector r; + + + hsa_executable_iterate_agent_symbols( + hmodule->executable, this_agent(), copy_agent_global_variables, &r); + + return r; + } +} + hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char* name) { @@ -625,11 +745,37 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, return ihipLogStatus(hipErrorNotInitialized); } else{ - hipFunction_t func; - ret = ihipModuleGetSymbol(&func, hmod, name); - *bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t); - *dptr = reinterpret_cast(func->_object); - return ihipLogStatus(ret); + static std::unordered_map< + hipModule_t, std::vector> agent_globals; + + // TODO: this is not particularly robust. + if (agent_globals.count(hmod) == 0) { + static std::mutex mtx; + std::lock_guard lck{mtx}; + + if (agent_globals.count(hmod) == 0) { + agent_globals.emplace(hmod, read_agent_globals(hmod)); + } + } + + // TODO: This is unsafe iff some other emplacement triggers rehashing. + // It will have to be properly fleshed out in the future. + const auto it0 = agent_globals.find(hmod); + if (it0 == agent_globals.cend()) { + throw std::runtime_error{"agent_globals data structure corrupted."}; + } + + const auto it1 = std::find_if( + it0->second.cbegin(), + it0->second.cend(), + [=](const Agent_global& x) { return x.name == name; }); + + if (it1 == it0->second.cend()) return ihipLogStatus(hipErrorNotFound); + + *dptr = it1->address; + *bytes = it1->byte_cnt; + + return ihipLogStatus(hipSuccess); } } diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index 0cb5e732ab..dab31dad62 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -93,18 +93,23 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int hipError_t e = hipSuccess; + auto ecd = event->locked_copyCrit(); + if (event == nullptr) { e = hipErrorInvalidResourceHandle; - } else if (event->_state != hipEventStatusUnitialized) { + } else if ((ecd._state != hipEventStatusUnitialized) && + (ecd._state != hipEventStatusCreated)) { if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) { // conservative wait on host for the specified event to complete: - event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); + // return _stream->locked_eventWaitComplete(this, waitMode); + // + ecd._stream->locked_eventWaitComplete(ecd.marker(), (event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); } else { stream = ihipSyncAndResolveStream(stream); // This will use create_blocking_marker to wait on the specified queue. - stream->locked_streamWaitEvent(event); + stream->locked_streamWaitEvent(ecd); } } // else event not recorded, return immediately and don't create marker. diff --git a/tests/README.md b/tests/README.md index 27cde7c534..a9638ba95f 100644 --- a/tests/README.md +++ b/tests/README.md @@ -75,6 +75,18 @@ RUN: %t CMAKE_TEST_NAME EXCLUDE_HIP_PLATFORM +``` +cmake_command: refers to any of the commands supported by ```cmake -E``` as specified in the [cmake documentation](https://cmake.org/cmake/help/latest/manual/cmake.1.html#command-line-tool-mode). Note that the commands are limited by the version of cmake the user is running. +options_to_cmake_command: refers to the arguments supported by the specific cmake_command. The arguments are parsed by HIT to replace special markers. The markers supported by HIT are: +%S: Refers to the source directory containing the current source file. +%B: Refers to the build directory for the current cmake project i.e. CMAKE_CURRENT_BINARY_DIR. + + ### Running tests: ``` ctest diff --git a/tests/hit/HIT.cmake b/tests/hit/HIT.cmake index fd0001214e..82e8508dcd 100644 --- a/tests/hit/HIT.cmake +++ b/tests/hit/HIT.cmake @@ -155,6 +155,20 @@ macro(HIT_ADD_FILES _dir _label _parent) endif() endforeach() + # Run cmake commands + execute_process(COMMAND ${HIP_SRC_PATH}/tests/hit/parser --cmakeCMDs ${file} + OUTPUT_VARIABLE _contents + ERROR_QUIET + WORKING_DIRECTORY ${_dir} + OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX REPLACE "\n" ";" _contents "${_contents}") + string(REGEX REPLACE "%S" ${_dir} _contents "${_contents}") + string(REGEX REPLACE "%B" ${CMAKE_CURRENT_BINARY_DIR} _contents "${_contents}") + foreach(_cmd ${_contents}) + string(REGEX REPLACE " " ";" _cmd "${_cmd}") + execute_process(COMMAND ${CMAKE_COMMAND} -E ${_cmd}) + endforeach() + # Add tests execute_process(COMMAND ${HIP_SRC_PATH}/tests/hit/parser --runCMDs ${file} OUTPUT_VARIABLE _contents diff --git a/tests/hit/parser b/tests/hit/parser index 3d851752e4..f77bd524f3 100755 --- a/tests/hit/parser +++ b/tests/hit/parser @@ -8,7 +8,7 @@ use File::Spec; sub parse_file { my $file = shift; (my $exe = $file) =~ s/\.[^.]+$//g; - my (@buildCMDs, @runCMDs, @runNamedCMDs); + my (@buildCMDs, @runCMDs, @runNamedCMDs, @cmakeCMDs); if (open (SOURCE, '<:encoding(UTF-8)', "$file")) { while () { my $line=$_; @@ -36,10 +36,17 @@ sub parse_file { $line =~ s/\R//g; # Remove line endings push @runNamedCMDs, $line; } + # Look for CMAKECMD instructions + if ($line =~ /^ \* CMAKECMD:/) { + $line =~ s/^ \* CMAKECMD: //g; # Remove " * CMAKECMD: " + # Substitute %S -> srcdir and %B -> builddir happens in cmake + $line =~ s/\R//g; # Remove line endings + push @cmakeCMDs, $line; + } } close(SOURCE); } - return (\@buildCMDs, \@runCMDs, \@runNamedCMDs); + return (\@buildCMDs, \@runCMDs, \@runNamedCMDs, \@cmakeCMDs); } # Exit if no arguments specified @@ -53,8 +60,9 @@ my @options = (); my $retBuildCMDs = 0; my $retRunCMDs = 0; my $retRunNamedCMDs = 0; +my $retCmakeCMDs = 0; foreach $arg (@ARGV) { - if ($retBuildCMDs or $retRunCMDs or $retRunNamedCMDs) { + if ($retBuildCMDs or $retRunCMDs or $retRunNamedCMDs or $retCmakeCMDs) { push (@options, $arg); } if ($arg eq '--buildCMDs') { @@ -66,18 +74,21 @@ foreach $arg (@ARGV) { if ($arg eq '--runNamedCMDs') { $retRunNamedCMDs = 1; } + if ($arg eq '--cmakeCMDs') { + $retCmakeCMDs = 1; + } } # Atleast one command needs to be specified -if (($retBuildCMDs eq 0) and ($retRunCMDs eq 0) and ($retRunNamedCMDs eq 0)) { - die "Usage: $0 <--buildCMDs|--runCMDs|--runNamedCMDs> FILENAMEs\n"; +if (($retBuildCMDs eq 0) and ($retRunCMDs eq 0) and ($retRunNamedCMDs eq 0) and ($retCmakeCMDs eq 0)) { + die "Usage: $0 <--buildCMDs|--runCMDs|--runNamedCMDs|--cmakeCMDs> FILENAMEs\n"; } # Iterate over input files foreach $file (@options) { # Convert absolute path to path relative to working directory my $relfile = File::Spec->abs2rel($file); - my ($buildCMDs, $runCMDs, $runNamedCMDs) = parse_file("$relfile"); + my ($buildCMDs, $runCMDs, $runNamedCMDs, $cmakeCMDs) = parse_file("$relfile"); if ($retBuildCMDs) { # print "BuildCMDs:\n"; print "$_\n" for @$buildCMDs; @@ -90,6 +101,10 @@ foreach $file (@options) { # print "RunNamedCMDs:\n"; print "$_\n" for @$runNamedCMDs; } + if ($retCmakeCMDs) { + # print "CmakeCMDs:\n"; + print "$_\n" for @$cmakeCMDs; + } } # vim: ts=4:sw=4:expandtab:smartindent diff --git a/tests/src/runtimeApi/event/record_event.cpp b/tests/src/runtimeApi/event/record_event.cpp index bd8a3ada8e..b9653bf522 100644 --- a/tests/src/runtimeApi/event/record_event.cpp +++ b/tests/src/runtimeApi/event/record_event.cpp @@ -52,7 +52,7 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_ if (!(testMask & p_tests)) { return; } - printf ("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n", + printf ("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n", testMask, stream, waitStart, syncModeString(syncMode)); size_t sizeBytes = numElements * sizeof(int); @@ -85,8 +85,8 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_ HIPCHECK(hipEventSynchronize(start)); } - - hipError_t expectedStopError = hipSuccess; + + hipError_t expectedStopError = hipSuccess; // How to wait for the events to finish: switch (syncMode) { @@ -97,12 +97,12 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_ HIPCHECK(hipStreamSynchronize(stream)); // wait for recording to finish... break; case syncStopEvent: - HIPCHECK(hipEventSynchronize(stop)); + HIPCHECK(hipEventSynchronize(stop)); break; default: assert(0); }; - + float t; @@ -111,25 +111,25 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_ failed ("start event not in expected state, was %d=%s\n", e, hipGetErrorName(e)); } - if (e == hipSuccess) + if (e == hipSuccess) assert (t==0.0f); - + // stop usually ready unless we skipped the synchronization (syncNone) HIPCHECK_API(hipEventElapsedTime(&t, stop, stop), expectedStopError); - if (e == hipSuccess) + if (e == hipSuccess) assert (t==0.0f); e = hipEventElapsedTime(&t, start, stop); HIPCHECK_API(e, expectedStopError); - if (expectedStopError == hipSuccess) + if (expectedStopError == hipSuccess) assert (t>0.0f); printf ("time=%6.2f error=%s\n", t, hipGetErrorName(e)); e = hipEventElapsedTime(&t, stop, start); HIPCHECK_API(e, expectedStopError); - if (expectedStopError == hipSuccess) + if (expectedStopError == hipSuccess) assert (t<0.0f); printf ("negtime=%6.2f error=%s\n", t, hipGetErrorName(e)); diff --git a/tests/src/runtimeApi/memory/hipMemcpy.cpp b/tests/src/runtimeApi/memory/hipMemcpy.cpp index e8e803e44c..d8438fa848 100644 --- a/tests/src/runtimeApi/memory/hipMemcpy.cpp +++ b/tests/src/runtimeApi/memory/hipMemcpy.cpp @@ -58,7 +58,7 @@ public: void offset(int offset) { _offset = offset; }; int offset() const { return _offset; }; - + private: T * _A_d; T* _B_d; @@ -72,7 +72,7 @@ private: template DeviceMemory::DeviceMemory(size_t numElements) - : _maxNumElements(numElements), + : _maxNumElements(numElements), _offset(0) { T ** np = nullptr; @@ -93,7 +93,7 @@ DeviceMemory::~DeviceMemory () HipTest::freeArrays (_A_d, _B_d, _C_d, np, np, np, 0); HIPCHECK (hipFree(_C_dd)); - + _C_dd = NULL; }; @@ -125,7 +125,7 @@ public: T * A_hh; T* B_hh; - bool _usePinnedHost; + bool _usePinnedHost; private: size_t _maxNumElements; @@ -165,11 +165,11 @@ HostMemory::HostMemory(size_t numElements, bool usePinnedHost) template void -HostMemory::reset(size_t numElements, bool full) +HostMemory::reset(size_t numElements, bool full) { // Initialize the host data: for (size_t i=0; i void memcpytest2(DeviceMemory *dmem, HostMemory *hmem, size_t numElements, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault) { size_t sizeElements = numElements * sizeof(T); - printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:dev:%+d host:+%d\n", - __func__, + printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:dev:%+d host:+%d\n", + __func__, TYPENAME(T), sizeElements, sizeElements/1024.0/1024.0, hmem->_usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault, @@ -273,8 +273,8 @@ void memcpytest2_for_type(size_t numElements) { printSep(); - DeviceMemory memD(numElements); - HostMemory memU(numElements, 0/*usePinnedHost*/); + DeviceMemory memD(numElements); + HostMemory memU(numElements, 0/*usePinnedHost*/); HostMemory memP(numElements, 1/*usePinnedHost*/); for (int usePinnedHost =0; usePinnedHost<=1; usePinnedHost++) { @@ -307,11 +307,11 @@ void memcpytest2_sizes(size_t maxElem=0) maxElem = free/sizeof(T)/8; } - printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", + printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0); HIPCHECK ( hipDeviceReset() ); - DeviceMemory memD(maxElem); - HostMemory memU(maxElem, 0/*usePinnedHost*/); + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0/*usePinnedHost*/); HostMemory memP(maxElem, 1/*usePinnedHost*/); for (size_t elem=1; elem<=maxElem; elem*=2) { @@ -336,11 +336,11 @@ void memcpytest2_offsets(size_t maxElem, bool devOffsets, bool hostOffsets) HIPCHECK(hipMemGetInfo(&free, &total)); - printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", + printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0); HIPCHECK ( hipDeviceReset() ); - DeviceMemory memD(maxElem); - HostMemory memU(maxElem, 0/*usePinnedHost*/); + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0/*usePinnedHost*/); HostMemory memP(maxElem, 1/*usePinnedHost*/); size_t elem = maxElem / 2; @@ -380,16 +380,16 @@ void multiThread_1(bool serialize, bool usePinnedHost) { printSep(); printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, usePinnedHost); - DeviceMemory memD(N); - HostMemory mem1(N, usePinnedHost); - HostMemory mem2(N, usePinnedHost); + DeviceMemory memD(N); + HostMemory mem1(N, usePinnedHost); + HostMemory mem2(N, usePinnedHost); std::thread t1 (memcpytest2, &memD, &mem1, N, 0,0,0); if (serialize) { t1.join(); } - + std::thread t2 (memcpytest2,&memD, &mem2, N, 0,0,0); if (serialize) { t2.join(); @@ -427,21 +427,21 @@ int main(int argc, char *argv[]) // Some tests around the 64KB boundary which have historically shown issues: printf ("\n\n=== tests&0x2 (64KB boundary)\n"); size_t maxElem = 32*1024*1024; - DeviceMemory memD(maxElem); - HostMemory memU(maxElem, 0/*usePinnedHost*/); - HostMemory memP(maxElem, 0/*usePinnedHost*/); + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0/*usePinnedHost*/); + HostMemory memP(maxElem, 0/*usePinnedHost*/); // These all pass: - memcpytest2(&memD, &memP, 15*1024*1024, 0, 0, 0); - memcpytest2(&memD, &memP, 16*1024*1024, 0, 0, 0); - memcpytest2(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 15*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 16*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0); // Just over 64MB: - memcpytest2(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0); - memcpytest2(&memD, &memP, 17*1024*1024+1024, 0, 0, 0); - memcpytest2(&memD, &memP, 32*1024*1024, 0, 0, 0); - memcpytest2(&memD, &memU, 32*1024*1024, 0, 0, 0); - memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 0); - memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 0); + memcpytest2(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 17*1024*1024+1024, 0, 0, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memU, 32*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 0); } @@ -464,7 +464,7 @@ int main(int argc, char *argv[]) // Simplest cases: serialize the threads, and also used pinned memory: // This verifies that the sub-calls to memcpytest2 are correct. - multiThread_1(true, true); + multiThread_1(true, true); // Serialize, but use unpinned memory to stress the unpinned memory xfer path. multiThread_1(true, false); diff --git a/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp b/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp index 4f73b67ad7..9b2e749cf9 100644 --- a/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp +++ b/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp @@ -41,8 +41,8 @@ void printSep() // Designed to stress a small number of simple smoke tests template< - typename T=float, - class P=HipTest::Unpinned, + typename T=float, + class P=HipTest::Unpinned, class C=HipTest::Memcpy > void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream) diff --git a/tests/src/runtimeApi/stream/hipNullStream.cpp b/tests/src/runtimeApi/stream/hipNullStream.cpp index b610315608..af5547e3fb 100644 --- a/tests/src/runtimeApi/stream/hipNullStream.cpp +++ b/tests/src/runtimeApi/stream/hipNullStream.cpp @@ -119,7 +119,7 @@ void Streamer::reset() { HipTest::setDefaultData(_numElements, _A_h, _B_h, _C_h); H2D(); - + } @@ -238,7 +238,7 @@ int main(int argc, char *argv[]) nullStreamer->D2H(); HIPCHECK(hipDeviceSynchronize()); - HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements); + HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements); } } @@ -263,7 +263,7 @@ int main(int argc, char *argv[]) HIPCHECK(hipDeviceSynchronize()); - HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements); + HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements); } } @@ -289,10 +289,10 @@ int main(int argc, char *argv[]) // Copy with stream1, this could go async if the streamSync doesn't synchronize ALL the streams. HIPCHECK(hipMemcpyAsync(streamers[0]->_C_h, streamers[0]->_C_d, streamers[0]->_numElements*sizeof(int), hipMemcpyDeviceToHost, streamers[1]->_stream)); - + HIPCHECK(hipDeviceSynchronize()); - HipTest::checkTest(expected_H, streamers[0]->_C_h, numElements); + HipTest::checkTest(expected_H, streamers[0]->_C_h, numElements); } diff --git a/tests/src/runtimeApi/stream/hipStreamSync2.cpp b/tests/src/runtimeApi/stream/hipStreamSync2.cpp index c6a58ce7d4..4c49d80c05 100644 --- a/tests/src/runtimeApi/stream/hipStreamSync2.cpp +++ b/tests/src/runtimeApi/stream/hipStreamSync2.cpp @@ -59,23 +59,23 @@ const char *syncModeString(int syncMode) { void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode syncMode, bool expectMismatch) { - // This test sends a long-running kernel to the null stream, then tests to see if the + // This test sends a long-running kernel to the null stream, then tests to see if the // specified synchronization technique is effective. // - // Some syncMode are not expected to correctly sync (for example "syncNone"). in these + // Some syncMode are not expected to correctly sync (for example "syncNone"). in these // cases the test sets expectMismatch and the check logic below will attempt to ensure that // the undesired synchronization did not occur - ie ensure the kernel is still running and did // not yet update the stop event. This can be tricky since if the kernel runs fast enough it - // may complete before the check. To prevent this, the addCountReverse has a count parameter - // which causes it to loop repeatedly, and the results are checked in reverse order. + // may complete before the check. To prevent this, the addCountReverse has a count parameter + // which causes it to loop repeatedly, and the results are checked in reverse order. // // Tests with expectMismatch=true should ensure the kernel finishes correctly. This results // are checked and we test to make sure stop event has completed. - + if (!(testMask & p_tests)) { return; } - printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n", + printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n", testMask, syncModeString(syncMode), expectMismatch); size_t sizeBytes = numElements * sizeof(int); @@ -98,7 +98,7 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode s unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); // Launch kernel into null stream, should result in C_h == count. hipLaunchKernelGGL(HipTest::addCountReverse , dim3(blocks), dim3(threadsPerBlock), 0, 0 /*stream*/, C_d, C_h, numElements, count); - HIPCHECK(hipEventRecord(stop, 0/*default*/)); + HIPCHECK(hipEventRecord(stop, 0/*default*/)); switch (syncMode) { case syncNone: @@ -108,18 +108,18 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode s break; case syncOtherStream: // Does this synchronize with the null stream? - HIPCHECK(hipStreamSynchronize(otherStream)); + HIPCHECK(hipStreamSynchronize(otherStream)); break; case syncMarkerThenOtherStream: case syncMarkerThenOtherNonBlockingStream: - - // this may wait for NULL stream depending hipStreamNonBlocking flag above - HIPCHECK(hipEventRecord(otherStreamEvent, otherStream)); - HIPCHECK(hipStreamSynchronize(otherStream)); + // this may wait for NULL stream depending hipStreamNonBlocking flag above + HIPCHECK(hipEventRecord(otherStreamEvent, otherStream)); + + HIPCHECK(hipStreamSynchronize(otherStream)); break; case syncDevice: - HIPCHECK(hipDeviceSynchronize()); + HIPCHECK(hipDeviceSynchronize()); break; default: assert(0); @@ -197,7 +197,7 @@ void runTests(int64_t numElements) int main(int argc, char *argv[]) { // Can' destroy the default stream:// TODO - move to another test - HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidResourceHandle); + HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidResourceHandle); HipTest::parseStandardArguments(argc, argv, true /*failOnUndefinedArg*/); diff --git a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp index f5b1b79550..cf463be76a 100644 --- a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp +++ b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp @@ -88,7 +88,7 @@ private: template Streamer::Streamer(int deviceId, T * A_d, size_t numElements, int commandType) : - _preA_d(NULL), + _preA_d(NULL), _A_d(A_d), _deviceId(deviceId), _numElements(numElements), @@ -239,7 +239,7 @@ size_t Streamer::check(int streamerNum, T initValue, T expectedOffset, bool e return _mismatchCount; } - + //--- //Parse arguments specific to this test. @@ -300,7 +300,7 @@ void checkAll(int initValue, std::vector &streamers, std::vector< for (int i=0; iexpectedAdd(); - + mismatchCount += streamers[i]->check(i+1, initValue, expected, expectPass); } @@ -330,7 +330,7 @@ void checkAll(int initValue, std::vector &streamers, std::vector< void sync_none(void) {}; -void sync_allDevices(int numDevices) +void sync_allDevices(int numDevices) { for (int d=0; d streamers) +void sync_queryAllUntilComplete(std::vector streamers) { for (int i=streamers.size()-1; i>=0; i--) { streamers[i]->queryUntilComplete(); @@ -347,7 +347,7 @@ void sync_queryAllUntilComplete(std::vector streamers) } -void sync_streamWaitEvent(hipEvent_t lastEvent, int sideDeviceId, hipStream_t sideStream, bool waitHere) +void sync_streamWaitEvent(hipEvent_t lastEvent, int sideDeviceId, hipStream_t sideStream, bool waitHere) { HIPCHECK(hipSetDevice(sideDeviceId)); @@ -389,7 +389,7 @@ int main(int argc, char *argv[]) initArray_h[i] = initValue; } HIPCHECK(hipMemcpy(initArray_d, initArray_h, sizeElements, hipMemcpyHostToDevice)); - + int numDevices; HIPCHECK(hipGetDeviceCount(&numDevices)); @@ -414,7 +414,7 @@ int main(int argc, char *argv[]) // A sideband stream channel that is independent from above. - // Used to check to ensure the WaitEvent or other synchronization is working correctly since by default sideStream is + // Used to check to ensure the WaitEvent or other synchronization is working correctly since by default sideStream is // asynchronous wrt the other streams. std::vector sideStreams; for (int d=0; d Test 0x1000 simple null stream tests\n"); + printf ("==> Test 0x1000 simple null stream tests\n"); // try some null stream: hipStreamQuery(0); @@ -463,7 +463,7 @@ int main(int argc, char *argv[]) HIPCHECK(hipEventRecord(e1, s1)) HIPCHECK(hipStreamWaitEvent(hipStream_t(0), e1, 0/*flags*/)); - + HIPCHECK(hipStreamDestroy(s1)); HIPCHECK(hipEventDestroy(e1)); } @@ -476,11 +476,11 @@ int main(int argc, char *argv[]) HIPCHECK(hipEventRecord(e1, hipStream_t(0))) HIPCHECK(hipStreamWaitEvent(s1, e1, 0/*flags*/)); - + HIPCHECK(hipStreamDestroy(s1)); HIPCHECK(hipEventDestroy(e1)); } - + }