Merge 'master' into 'amd-master'

Change-Id: If635fe33b97998b22c4a00c0e9a5e041ef332d82
Этот коммит содержится в:
Jenkins
2017-11-10 04:48:30 -06:00
родитель 3e7a60d69e 31bcb59f62
Коммит e2d81a6038
16 изменённых файлов: 475 добавлений и 221 удалений
+2 -2
Просмотреть файл
@@ -53,7 +53,7 @@ THE SOFTWARE.
// define HIP_ENABLE_PRINTF to enable printf
#ifdef HIP_ENABLE_PRINTF
#define HCC_ENABLE_ACCELERATOR_PRINTF 1
#endif
#endif
//---
// Remainder of this file only compiles with HCC
@@ -481,7 +481,7 @@ do {\
type* var = \
(type*)__get_dynamicgroupbaseptr(); \
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
+97 -90
Просмотреть файл
@@ -31,12 +31,9 @@ THE SOFTWARE.
ihipEvent_t::ihipEvent_t(unsigned flags)
: _criticalData(this)
{
_state = hipEventStatusCreated;
_stream = NULL;
_flags = flags;
_timestamp = 0;
_type = hipEventTypeIndependent;
};
@@ -45,56 +42,45 @@ ihipEvent_t::ihipEvent_t(unsigned flags)
void ihipEvent_t::attachToCompletionFuture(const hc::completion_future *cf,
hipStream_t stream, ihipEventType_t eventType)
{
_marker = *cf;
_type = eventType;
_stream = stream;
_state = hipEventStatusRecording;
LockedAccessor_EventCrit_t crit(_criticalData);
crit->_eventData.marker(*cf);
crit->_eventData._type = eventType;
crit->_eventData._stream = stream;
crit->_eventData._state = hipEventStatusRecording;
}
void ihipEvent_t::refereshEventStatus()
std::pair<hipEventStatus_t, uint64_t>
ihipEvent_t::refreshEventStatus()
{
bool isReady0 = locked_isReady();
bool isReady1;
int val = 0;
if (_state == hipEventStatusRecording) {
// TODO - use completion-future functions to obtain ticks and timestamps:
hsa_signal_t *sig = static_cast<hsa_signal_t*> (_marker.get_native_handle());
isReady1 = locked_isReady();
if (sig) {
val = hsa_signal_load_acquire(*sig);
if (val == 0) {
auto ecd = locked_copyCrit();
if (ecd._state == hipEventStatusRecording) {
bool isReady1 = ecd._stream->locked_eventIsReady(this);
if (isReady1) {
LockedAccessor_EventCrit_t eCrit(_criticalData);
if ((_type == hipEventTypeIndependent) || (_type == hipEventTypeStopCommand)) {
_timestamp = _marker.get_end_tick();
} else if (_type == hipEventTypeStartCommand) {
_timestamp = _marker.get_begin_tick();
} else {
assert(0); // TODO - move to debug assert
_timestamp = 0;
}
_state = hipEventStatusComplete;
if ((eCrit->_eventData._type == hipEventTypeIndependent) ||
(eCrit->_eventData._type == hipEventTypeStopCommand)) {
eCrit->_eventData._timestamp = eCrit->_eventData.marker().get_end_tick();
} else if (eCrit->_eventData._type == hipEventTypeStartCommand) {
eCrit->_eventData._timestamp = eCrit->_eventData.marker().get_begin_tick();
} else {
eCrit->_eventData._timestamp = 0;
assert(0); // TODO - move to debug assert
}
eCrit->_eventData._state = hipEventStatusComplete;
return std::pair<hipEventStatus_t, uint64_t> (eCrit->_eventData._state, eCrit->_eventData._timestamp);
}
}
}
if (_state != hipEventStatusComplete) {
//printf (" not ready isReady0=%d val=%d isReady1=%d\n", isReady0, val, isReady1);
}
// Not complete path here:
return std::pair<hipEventStatus_t, uint64_t> (ecd._state, ecd._timestamp);
}
bool ihipEvent_t::locked_isReady()
{
return _stream->locked_eventIsReady(this);
}
void ihipEvent_t::locked_waitComplete(hc::hcWaitMode waitMode)
{
return _stream->locked_eventWaitComplete(this, waitMode);
}
hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags)
@@ -136,33 +122,44 @@ hipError_t hipEventCreate(hipEvent_t* event)
return ihipLogStatus(ihipEventCreate(event, 0));
}
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
{
HIP_INIT_SPECIAL_API(TRACE_SYNC, event, stream);
if (event && event->_state != hipEventStatusUnitialized) {
auto ecd = event->locked_copyCrit();
if (event && ecd._state != hipEventStatusUnitialized) {
stream = ihipSyncAndResolveStream(stream);
event->_stream = stream;
if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) {
// TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0
//
// If default stream , then wait on all queues.
ihipCtx_t *ctx = ihipGetTlsDefaultCtx();
ctx->locked_syncDefaultStream(true, true);
event->_timestamp = hc::get_system_ticks();
event->_state = hipEventStatusComplete;
{
LockedAccessor_EventCrit_t eCrit(event->criticalData());
eCrit->_eventData.marker(hc::completion_future()); // reset event
eCrit->_eventData._stream = stream;
eCrit->_eventData._timestamp = hc::get_system_ticks();
eCrit->_eventData._state = hipEventStatusComplete;
}
return ihipLogStatus(hipSuccess);
} else {
// Clear timestamps
event->_timestamp = 0;
// Record the event in the stream:
stream->locked_recordEvent(event);
event->_state = hipEventStatusRecording;
// Keep a copy outside the critical section so we lock stream first, then event - to avoid deadlock
hc::completion_future cf = stream->locked_recordEvent(event);
{
LockedAccessor_EventCrit_t eCrit(event->criticalData());
eCrit->_eventData.marker(cf);
eCrit->_eventData._stream = stream;
eCrit->_eventData._timestamp = 0;
eCrit->_eventData._state = hipEventStatusRecording;
}
return ihipLogStatus(hipSuccess);
}
} else {
@@ -170,15 +167,13 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
}
}
hipError_t hipEventDestroy(hipEvent_t event)
{
HIP_INIT_API(event);
if (event) {
event->_state = hipEventStatusUnitialized;
delete event;
event = NULL;
return ihipLogStatus(hipSuccess);
} else {
@@ -193,20 +188,24 @@ hipError_t hipEventSynchronize(hipEvent_t event)
if (!(event->_flags & hipEventReleaseToSystem)) {
tprintf(DB_WARN, "hipEventSynchronize on event without system-scope fence ; consider creating with hipEventReleaseToSystem\n");
}
auto ecd = event->locked_copyCrit();
if (event) {
if (event->_state == hipEventStatusUnitialized) {
if (ecd._state == hipEventStatusUnitialized) {
return ihipLogStatus(hipErrorInvalidResourceHandle);
} else if (event->_state == hipEventStatusCreated ) {
} else if (ecd._state == hipEventStatusCreated ) {
// Created but not actually recorded on any device:
return ihipLogStatus(hipSuccess);
} else if (HIP_SYNC_NULL_STREAM && (event->_stream->isDefaultStream() )) {
} else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream() )) {
auto *ctx = ihipGetTlsDefaultCtx();
// TODO-HIP_SYNC_NULL_STREAM - can remove this code
ctx->locked_syncDefaultStream(true, true);
return ihipLogStatus(hipSuccess);
} else {
event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
ecd._stream->locked_eventWaitComplete(
ecd.marker(),
(event->_flags & hipEventBlockingSync) ?
hc::hcWaitModeBlocked : hc::hcWaitModeActive);
return ihipLogStatus(hipSuccess);
}
@@ -223,44 +222,50 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
*ms = 0.0f;
if ((start == nullptr) ||
(start->_flags & hipEventDisableTiming) ||
(start->_state == hipEventStatusUnitialized) || (start->_state == hipEventStatusCreated) ||
(stop == nullptr) ||
(stop->_flags & hipEventDisableTiming) ||
( stop->_state == hipEventStatusUnitialized) || ( stop->_state == hipEventStatusCreated)) {
// Both events must be at least recorded else return hipErrorInvalidResourceHandle
if ((start == nullptr) || (stop == nullptr)) {
status = hipErrorInvalidResourceHandle;
} else {
// Refresh status, if still recording...
start->refereshEventStatus();
stop->refereshEventStatus();
auto startEcd = start->locked_copyCrit();
auto stopEcd = stop->locked_copyCrit();
if ((start->_state == hipEventStatusComplete) && (stop->_state == hipEventStatusComplete)) {
// Common case, we have good information for both events.
if ((start->_flags & hipEventDisableTiming) ||
(startEcd._state == hipEventStatusUnitialized) || (startEcd._state == hipEventStatusCreated) ||
(stop->_flags & hipEventDisableTiming) ||
(stopEcd._state == hipEventStatusUnitialized) || (stopEcd._state == hipEventStatusCreated)) {
int64_t tickDiff = (stop->timestamp() - start->timestamp());
// Both events must be at least recorded else return hipErrorInvalidResourceHandle
status = hipErrorInvalidResourceHandle;
uint64_t freqHz;
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz);
if (freqHz) {
*ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f;
status = hipSuccess;
} else {
* ms = 0.0f;
status = hipErrorInvalidValue;
}
// Refresh status, if still recording...
auto startStatus = start->refreshEventStatus(); // pair < state, timestamp >
auto stopStatus = stop->refreshEventStatus(); // pair < state, timestamp >
if ((startStatus.first == hipEventStatusComplete) && (stopStatus.first == hipEventStatusComplete)) {
// Common case, we have good information for both events. 'second" is the timestamp:
int64_t tickDiff = (stopStatus.second - startStatus.second);
uint64_t freqHz;
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz);
if (freqHz) {
*ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f;
status = hipSuccess;
} else {
* ms = 0.0f;
status = hipErrorInvalidValue;
}
} else if ((start->_state == hipEventStatusRecording) ||
(stop->_state == hipEventStatusRecording)) {
} else if ((startStatus.first == hipEventStatusRecording) ||
(stopStatus.first == hipEventStatusRecording)) {
status = hipErrorNotReady;
} else {
status = hipErrorNotReady;
} else {
assert(0);
}
}
}
@@ -275,7 +280,9 @@ hipError_t hipEventQuery(hipEvent_t event)
tprintf(DB_WARN, "hipEventQuery on event without system-scope fence ; consider creating with hipEventReleaseToSystem\n");
}
if ((event->_state == hipEventStatusRecording) && !event->locked_isReady()) {
auto ecd = event->locked_copyCrit();
if ((ecd._state == hipEventStatusRecording) && !ecd._stream->locked_eventIsReady(event)) {
return ihipLogStatus(hipErrorNotReady);
} else {
return ihipLogStatus(hipSuccess);
+12 -9
Просмотреть файл
@@ -339,12 +339,11 @@ void ihipStream_t::locked_wait()
// Causes current stream to wait for specified event to complete:
// Note this does not provide any kind of host serialization.
void ihipStream_t::locked_streamWaitEvent(hipEvent_t event)
void ihipStream_t::locked_streamWaitEvent(ihipEventData_t &ecd)
{
LockedAccessor_StreamCrit_t crit(_criticalData);
crit->_av.create_blocking_marker(event->marker(), hc::accelerator_scope);
crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope);
}
@@ -352,24 +351,28 @@ void ihipStream_t::locked_streamWaitEvent(hipEvent_t event)
// Note this does not provide any kind of host serialization.
bool ihipStream_t::locked_eventIsReady(hipEvent_t event)
{
// Event query that returns "Complete" may cause HCC to manipulate
// internal queue state so lock the stream's queue here.
LockedAccessor_StreamCrit_t crit(_criticalData);
LockedAccessor_StreamCrit_t scrit(_criticalData);
return (event->marker().is_ready());
LockedAccessor_EventCrit_t ecrit(event->criticalData());
return (ecrit->_eventData.marker().is_ready());
}
void ihipStream_t::locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode)
// Waiting on event can cause HCC to reclaim stream resources - so need to lock the stream.
void ihipStream_t::locked_eventWaitComplete(hc::completion_future &marker, hc::hcWaitMode waitMode)
{
LockedAccessor_StreamCrit_t crit(_criticalData);
event->marker().wait(waitMode);
marker.wait(waitMode);
}
// Create a marker in this stream.
// Save state in the event so it can track the status of the event.
void ihipStream_t::locked_recordEvent(hipEvent_t event)
hc::completion_future ihipStream_t::locked_recordEvent(hipEvent_t event)
{
// Lock the stream to prevent simultaneous access
LockedAccessor_StreamCrit_t crit(_criticalData);
@@ -385,7 +388,7 @@ void ihipStream_t::locked_recordEvent(hipEvent_t event)
scopeFlag = HIP_EVENT_SYS_RELEASE ? hc::system_scope : hc::accelerator_scope;
}
event->marker(crit->_av.create_marker(scopeFlag));
return crit->_av.create_marker(scopeFlag);
};
//=============================================================================
+73 -21
Просмотреть файл
@@ -137,6 +137,7 @@ extern std::vector<ProfTrigger> g_dbStopTriggers;
class ihipStream_t;
class ihipDevice_t;
class ihipCtx_t;
struct ihipEventData_t;
// Color defs for debug messages:
#define KNRM "\x1B[0m"
@@ -152,10 +153,12 @@ extern const char *API_COLOR;
extern const char *API_COLOR_END;
// If set, thread-safety is enforced on all stream functions.
// Stream functions will acquire a mutex before entering critical sections.
#define STREAM_THREAD_SAFE 1
// If set, thread-safety is enforced on all event/stream/ctx/device functions.
// Can disable for performance or functional experiments - in this case
// the code uses a dummy "no-op" mutex.
#define EVENT_THREAD_SAFE 1
#define STREAM_THREAD_SAFE 1
#define CTX_THREAD_SAFE 1
@@ -390,6 +393,12 @@ class FakeMutex
void unlock() { }
};
#if EVENT_THREAD_SAFE
typedef std::mutex EventMutex;
#else
#warning "Stream thread-safe disabled"
typedef FakeMutex EventMutex;
#endif
#if STREAM_THREAD_SAFE
typedef std::mutex StreamMutex;
@@ -540,11 +549,11 @@ public:
hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); };
void locked_streamWaitEvent(hipEvent_t event);
void locked_recordEvent(hipEvent_t event);
void locked_streamWaitEvent(ihipEventData_t & event);
hc::completion_future locked_recordEvent(hipEvent_t event);
bool locked_eventIsReady(hipEvent_t event);
void locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode);
void locked_eventWaitComplete(hc::completion_future &marker, hc::hcWaitMode waitMode);
ihipStreamCritical_t &criticalData() { return _criticalData; };
@@ -628,32 +637,76 @@ enum ihipEventType_t {
hipEventTypeStopCommand,
};
struct ihipEventData_t
{
ihipEventData_t() {
_state = hipEventStatusCreated;
_stream = NULL;
_timestamp = 0;
_type = hipEventTypeIndependent;
};
void marker(const hc::completion_future & marker) { _marker = marker; };
hc::completion_future & marker() { return _marker; }
uint64_t timestamp() const { return _timestamp; } ;
ihipEventType_t type() const { return _type; };
ihipEventType_t _type;
hipEventStatus_t _state;
hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual stream when recorded
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
private:
hc::completion_future _marker;
};
//=============================================================================
//class ihipEventCriticalBase_t
template <typename MUTEX_TYPE>
class ihipEventCriticalBase_t : LockedBase<MUTEX_TYPE>
{
public:
ihipEventCriticalBase_t(const ihipEvent_t *parentEvent) :
_parent(parentEvent)
{}
~ihipEventCriticalBase_t() {};
// Keep data in structure so it can be easily copied into snapshots
// (used to reduce lock contention and preserve correct lock order)
ihipEventData_t _eventData;
private:
const ihipEvent_t *_parent;
friend class LockedAccessor<ihipEventCriticalBase_t>;
};
typedef ihipEventCriticalBase_t<EventMutex> ihipEventCritical_t;
typedef LockedAccessor<ihipEventCritical_t> LockedAccessor_EventCrit_t;
// internal hip event structure.
class ihipEvent_t {
public:
ihipEvent_t(unsigned flags);
void attachToCompletionFuture(const hc::completion_future *cf, hipStream_t stream, ihipEventType_t eventType);
void refereshEventStatus();
hc::completion_future & marker() { return _marker; }
void marker(hc::completion_future cf) { _marker = cf; };
std::pair<hipEventStatus_t, uint64_t> refreshEventStatus(); // returns pair <state, timestamp>
bool locked_isReady();
void locked_waitComplete(hc::hcWaitMode waitMode);
uint64_t timestamp() const { return _timestamp; } ;
ihipEventType_t type() const { return _type; };
// Return a copy of the critical state. The critical data is locked during the copy.
ihipEventData_t locked_copyCrit() {
LockedAccessor_EventCrit_t crit(_criticalData);
return _criticalData._eventData;
};
ihipEventCritical_t &criticalData() { return _criticalData; };
public:
hipEventStatus_t _state;
hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual stream when recorded
unsigned _flags;
private:
hc::completion_future _marker;
ihipEventType_t _type;
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
ihipEventCritical_t _criticalData;
friend hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream);
} ;
@@ -671,7 +724,6 @@ public:
};
~ihipDeviceCriticalBase_t() {
}
// Contexts:
+9 -9
Просмотреть файл
@@ -69,7 +69,7 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags)
if (shareWithAll) {
hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt+1, g_allAgents, NULL, ptr);
tprintf (DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt);
tprintf (DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt);
if (s != HSA_STATUS_SUCCESS) {
ret = -1;
}
@@ -126,7 +126,7 @@ void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, bool
if (HIP_INIT_ALLOC != -1) {
// TODO , dont' call HIP API directly here:
hipMemset(ptr, HIP_INIT_ALLOC, sizeBytes);
}
}
if (ptr != nullptr) {
int r = sharePtr(ptr, ctx, shareWithAll, hipFlags);
@@ -255,7 +255,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
hip_status = hipErrorMemoryAllocation;
}
}
}
return ihipLogStatus(hip_status);
@@ -288,10 +288,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
}
const unsigned supportedFlags = hipHostMallocPortable
| hipHostMallocMapped
| hipHostMallocWriteCombined
| hipHostMallocCoherent
const unsigned supportedFlags = hipHostMallocPortable
| hipHostMallocMapped
| hipHostMallocWriteCombined
| hipHostMallocCoherent
| hipHostMallocNonCoherent;
@@ -304,7 +304,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
hip_status = hipErrorInvalidValue;
} else {
auto device = ctx->getWriteableDevice();
unsigned amFlags = 0;
if (flags & hipHostMallocCoherent) {
amFlags = amHostCoherent;
@@ -585,7 +585,7 @@ hipError_t hipMalloc3DArray(hipArray_t *array,
hsa_ext_image_data_info_t imageInfo;
hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0, alignment);
if (size && (*ptr == NULL)) {
+151 -5
Просмотреть файл
@@ -27,6 +27,7 @@ THE SOFTWARE.
#include <memory>
#include <mutex>
#include <string>
#include <unordered_map>
#include <vector>
#include <map>
@@ -613,6 +614,125 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
}
namespace
{
struct Agent_global {
std::string name;
hipDeviceptr_t address;
std::uint32_t byte_cnt;
};
inline
void* address(hsa_executable_symbol_t x)
{
void* r = nullptr;
hsa_executable_symbol_get_info(
x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r);
return r;
}
inline
std::string name(hsa_executable_symbol_t x)
{
uint32_t sz = 0u;
hsa_executable_symbol_get_info(
x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz);
std::string r(sz, '\0');
hsa_executable_symbol_get_info(
x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front());
return r;
}
inline
std::uint32_t size(hsa_executable_symbol_t x)
{
std::uint32_t r = 0;
hsa_executable_symbol_get_info(
x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r);
return r;
}
inline
void track(const Agent_global& x)
{
tprintf(
DB_MEM,
" add variable '%s' with ptr=%p size=%u to tracker\n",
x.name.c_str(),
x.address,
x.byte_cnt);
auto device = ihipGetTlsDefaultCtx()->getWriteableDevice();
hc::AmPointerInfo ptr_info(
nullptr,
x.address,
x.address,
x.byte_cnt,
device->_acc,
true,
false);
hc::am_memtracker_add(x.address, ptr_info);
hc::am_memtracker_update(x.address, device->_deviceId, 0u);
}
template<typename Container = std::vector<Agent_global>>
inline
hsa_status_t copy_agent_global_variables(
hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* out)
{
assert(out);
hsa_symbol_kind_t t = {};
hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &t);
if (t == HSA_SYMBOL_KIND_VARIABLE) {
static_cast<Container*>(out)->push_back(
Agent_global{name(x), address(x), size(x)});
track(static_cast<Container*>(out)->back());
}
return HSA_STATUS_SUCCESS;
}
inline
hsa_agent_t this_agent()
{
auto ctx = ihipGetTlsDefaultCtx();
if (!ctx) throw std::runtime_error{"No active HIP context."};
auto device = ctx->getDevice();
if (!device) throw std::runtime_error{"No device available for HIP."};
ihipDevice_t *currentDevice = ihipGetDevice(device->_deviceId);
if (!currentDevice) {
throw std::runtime_error{"No active device for HIP"};
}
return currentDevice->_hsaAgent;
}
inline
std::vector<Agent_global> read_agent_globals(hipModule_t hmodule)
{
std::vector<Agent_global> r;
hsa_executable_iterate_agent_symbols(
hmodule->executable, this_agent(), copy_agent_global_variables, &r);
return r;
}
}
hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
hipModule_t hmod, const char* name)
{
@@ -625,11 +745,37 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
return ihipLogStatus(hipErrorNotInitialized);
}
else{
hipFunction_t func;
ret = ihipModuleGetSymbol(&func, hmod, name);
*bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t);
*dptr = reinterpret_cast<void*>(func->_object);
return ihipLogStatus(ret);
static std::unordered_map<
hipModule_t, std::vector<Agent_global>> agent_globals;
// TODO: this is not particularly robust.
if (agent_globals.count(hmod) == 0) {
static std::mutex mtx;
std::lock_guard<std::mutex> lck{mtx};
if (agent_globals.count(hmod) == 0) {
agent_globals.emplace(hmod, read_agent_globals(hmod));
}
}
// TODO: This is unsafe iff some other emplacement triggers rehashing.
// It will have to be properly fleshed out in the future.
const auto it0 = agent_globals.find(hmod);
if (it0 == agent_globals.cend()) {
throw std::runtime_error{"agent_globals data structure corrupted."};
}
const auto it1 = std::find_if(
it0->second.cbegin(),
it0->second.cend(),
[=](const Agent_global& x) { return x.name == name; });
if (it1 == it0->second.cend()) return ihipLogStatus(hipErrorNotFound);
*dptr = it1->address;
*bytes = it1->byte_cnt;
return ihipLogStatus(hipSuccess);
}
}
+8 -3
Просмотреть файл
@@ -93,18 +93,23 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
hipError_t e = hipSuccess;
auto ecd = event->locked_copyCrit();
if (event == nullptr) {
e = hipErrorInvalidResourceHandle;
} else if (event->_state != hipEventStatusUnitialized) {
} else if ((ecd._state != hipEventStatusUnitialized) &&
(ecd._state != hipEventStatusCreated)) {
if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) {
// conservative wait on host for the specified event to complete:
event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
// return _stream->locked_eventWaitComplete(this, waitMode);
//
ecd._stream->locked_eventWaitComplete(ecd.marker(), (event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
} else {
stream = ihipSyncAndResolveStream(stream);
// This will use create_blocking_marker to wait on the specified queue.
stream->locked_streamWaitEvent(event);
stream->locked_streamWaitEvent(ecd);
}
} // else event not recorded, return immediately and don't create marker.
+12
Просмотреть файл
@@ -75,6 +75,18 @@ RUN: %t CMAKE_TEST_NAME <arguments_to_test_executable> EXCLUDE_HIP_PLATFORM <hcc
```
#### CMAKECMD command
The supported syntax for the CMAKECMD command is:
```
CMAKECMD: <cmake_command> <options_to_cmake_command>
```
cmake_command: refers to any of the commands supported by ```cmake -E``` as specified in the [cmake documentation](https://cmake.org/cmake/help/latest/manual/cmake.1.html#command-line-tool-mode). Note that the commands are limited by the version of cmake the user is running.
options_to_cmake_command: refers to the arguments supported by the specific cmake_command. The arguments are parsed by HIT to replace special markers. The markers supported by HIT are:
%S: Refers to the source directory containing the current source file.
%B: Refers to the build directory for the current cmake project i.e. CMAKE_CURRENT_BINARY_DIR.
### Running tests:
```
ctest
+14
Просмотреть файл
@@ -155,6 +155,20 @@ macro(HIT_ADD_FILES _dir _label _parent)
endif()
endforeach()
# Run cmake commands
execute_process(COMMAND ${HIP_SRC_PATH}/tests/hit/parser --cmakeCMDs ${file}
OUTPUT_VARIABLE _contents
ERROR_QUIET
WORKING_DIRECTORY ${_dir}
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX REPLACE "\n" ";" _contents "${_contents}")
string(REGEX REPLACE "%S" ${_dir} _contents "${_contents}")
string(REGEX REPLACE "%B" ${CMAKE_CURRENT_BINARY_DIR} _contents "${_contents}")
foreach(_cmd ${_contents})
string(REGEX REPLACE " " ";" _cmd "${_cmd}")
execute_process(COMMAND ${CMAKE_COMMAND} -E ${_cmd})
endforeach()
# Add tests
execute_process(COMMAND ${HIP_SRC_PATH}/tests/hit/parser --runCMDs ${file}
OUTPUT_VARIABLE _contents
+21 -6
Просмотреть файл
@@ -8,7 +8,7 @@ use File::Spec;
sub parse_file {
my $file = shift;
(my $exe = $file) =~ s/\.[^.]+$//g;
my (@buildCMDs, @runCMDs, @runNamedCMDs);
my (@buildCMDs, @runCMDs, @runNamedCMDs, @cmakeCMDs);
if (open (SOURCE, '<:encoding(UTF-8)', "$file")) {
while (<SOURCE>) {
my $line=$_;
@@ -36,10 +36,17 @@ sub parse_file {
$line =~ s/\R//g; # Remove line endings
push @runNamedCMDs, $line;
}
# Look for CMAKECMD instructions
if ($line =~ /^ \* CMAKECMD:/) {
$line =~ s/^ \* CMAKECMD: //g; # Remove " * CMAKECMD: "
# Substitute %S -> srcdir and %B -> builddir happens in cmake
$line =~ s/\R//g; # Remove line endings
push @cmakeCMDs, $line;
}
}
close(SOURCE);
}
return (\@buildCMDs, \@runCMDs, \@runNamedCMDs);
return (\@buildCMDs, \@runCMDs, \@runNamedCMDs, \@cmakeCMDs);
}
# Exit if no arguments specified
@@ -53,8 +60,9 @@ my @options = ();
my $retBuildCMDs = 0;
my $retRunCMDs = 0;
my $retRunNamedCMDs = 0;
my $retCmakeCMDs = 0;
foreach $arg (@ARGV) {
if ($retBuildCMDs or $retRunCMDs or $retRunNamedCMDs) {
if ($retBuildCMDs or $retRunCMDs or $retRunNamedCMDs or $retCmakeCMDs) {
push (@options, $arg);
}
if ($arg eq '--buildCMDs') {
@@ -66,18 +74,21 @@ foreach $arg (@ARGV) {
if ($arg eq '--runNamedCMDs') {
$retRunNamedCMDs = 1;
}
if ($arg eq '--cmakeCMDs') {
$retCmakeCMDs = 1;
}
}
# Atleast one command needs to be specified
if (($retBuildCMDs eq 0) and ($retRunCMDs eq 0) and ($retRunNamedCMDs eq 0)) {
die "Usage: $0 <--buildCMDs|--runCMDs|--runNamedCMDs> FILENAMEs\n";
if (($retBuildCMDs eq 0) and ($retRunCMDs eq 0) and ($retRunNamedCMDs eq 0) and ($retCmakeCMDs eq 0)) {
die "Usage: $0 <--buildCMDs|--runCMDs|--runNamedCMDs|--cmakeCMDs> FILENAMEs\n";
}
# Iterate over input files
foreach $file (@options) {
# Convert absolute path to path relative to working directory
my $relfile = File::Spec->abs2rel($file);
my ($buildCMDs, $runCMDs, $runNamedCMDs) = parse_file("$relfile");
my ($buildCMDs, $runCMDs, $runNamedCMDs, $cmakeCMDs) = parse_file("$relfile");
if ($retBuildCMDs) {
# print "BuildCMDs:\n";
print "$_\n" for @$buildCMDs;
@@ -90,6 +101,10 @@ foreach $file (@options) {
# print "RunNamedCMDs:\n";
print "$_\n" for @$runNamedCMDs;
}
if ($retCmakeCMDs) {
# print "CmakeCMDs:\n";
print "$_\n" for @$cmakeCMDs;
}
}
# vim: ts=4:sw=4:expandtab:smartindent
+10 -10
Просмотреть файл
@@ -52,7 +52,7 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
if (!(testMask & p_tests)) {
return;
}
printf ("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n",
printf ("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n",
testMask, stream, waitStart, syncModeString(syncMode));
size_t sizeBytes = numElements * sizeof(int);
@@ -85,8 +85,8 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
HIPCHECK(hipEventSynchronize(start));
}
hipError_t expectedStopError = hipSuccess;
hipError_t expectedStopError = hipSuccess;
// How to wait for the events to finish:
switch (syncMode) {
@@ -97,12 +97,12 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
HIPCHECK(hipStreamSynchronize(stream)); // wait for recording to finish...
break;
case syncStopEvent:
HIPCHECK(hipEventSynchronize(stop));
HIPCHECK(hipEventSynchronize(stop));
break;
default:
assert(0);
};
float t;
@@ -111,25 +111,25 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
failed ("start event not in expected state, was %d=%s\n", e, hipGetErrorName(e));
}
if (e == hipSuccess)
if (e == hipSuccess)
assert (t==0.0f);
// stop usually ready unless we skipped the synchronization (syncNone)
HIPCHECK_API(hipEventElapsedTime(&t, stop, stop), expectedStopError);
if (e == hipSuccess)
if (e == hipSuccess)
assert (t==0.0f);
e = hipEventElapsedTime(&t, start, stop);
HIPCHECK_API(e, expectedStopError);
if (expectedStopError == hipSuccess)
if (expectedStopError == hipSuccess)
assert (t>0.0f);
printf ("time=%6.2f error=%s\n", t, hipGetErrorName(e));
e = hipEventElapsedTime(&t, stop, start);
HIPCHECK_API(e, expectedStopError);
if (expectedStopError == hipSuccess)
if (expectedStopError == hipSuccess)
assert (t<0.0f);
printf ("negtime=%6.2f error=%s\n", t, hipGetErrorName(e));
+33 -33
Просмотреть файл
@@ -58,7 +58,7 @@ public:
void offset(int offset) { _offset = offset; };
int offset() const { return _offset; };
private:
T * _A_d;
T* _B_d;
@@ -72,7 +72,7 @@ private:
template<typename T>
DeviceMemory<T>::DeviceMemory(size_t numElements)
: _maxNumElements(numElements),
: _maxNumElements(numElements),
_offset(0)
{
T ** np = nullptr;
@@ -93,7 +93,7 @@ DeviceMemory<T>::~DeviceMemory ()
HipTest::freeArrays (_A_d, _B_d, _C_d, np, np, np, 0);
HIPCHECK (hipFree(_C_dd));
_C_dd = NULL;
};
@@ -125,7 +125,7 @@ public:
T * A_hh;
T* B_hh;
bool _usePinnedHost;
bool _usePinnedHost;
private:
size_t _maxNumElements;
@@ -165,11 +165,11 @@ HostMemory<T>::HostMemory(size_t numElements, bool usePinnedHost)
template<typename T>
void
HostMemory<T>::reset(size_t numElements, bool full)
HostMemory<T>::reset(size_t numElements, bool full)
{
// Initialize the host data:
for (size_t i=0; i<numElements; i++) {
(A_hh)[i] = 1097.0 + i;
(A_hh)[i] = 1097.0 + i;
(B_hh)[i] = 1492.0 + i; // Phi
if (full) {
@@ -213,8 +213,8 @@ template <typename T>
void memcpytest2(DeviceMemory<T> *dmem, HostMemory<T> *hmem, size_t numElements, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault)
{
size_t sizeElements = numElements * sizeof(T);
printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:dev:%+d host:+%d\n",
__func__,
printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:dev:%+d host:+%d\n",
__func__,
TYPENAME(T),
sizeElements, sizeElements/1024.0/1024.0,
hmem->_usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault,
@@ -273,8 +273,8 @@ void memcpytest2_for_type(size_t numElements)
{
printSep();
DeviceMemory<T> memD(numElements);
HostMemory<T> memU(numElements, 0/*usePinnedHost*/);
DeviceMemory<T> memD(numElements);
HostMemory<T> memU(numElements, 0/*usePinnedHost*/);
HostMemory<T> memP(numElements, 1/*usePinnedHost*/);
for (int usePinnedHost =0; usePinnedHost<=1; usePinnedHost++) {
@@ -307,11 +307,11 @@ void memcpytest2_sizes(size_t maxElem=0)
maxElem = free/sizeof(T)/8;
}
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0);
HIPCHECK ( hipDeviceReset() );
DeviceMemory<T> memD(maxElem);
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
DeviceMemory<T> memD(maxElem);
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
HostMemory<T> memP(maxElem, 1/*usePinnedHost*/);
for (size_t elem=1; elem<=maxElem; elem*=2) {
@@ -336,11 +336,11 @@ void memcpytest2_offsets(size_t maxElem, bool devOffsets, bool hostOffsets)
HIPCHECK(hipMemGetInfo(&free, &total));
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0);
HIPCHECK ( hipDeviceReset() );
DeviceMemory<T> memD(maxElem);
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
DeviceMemory<T> memD(maxElem);
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
HostMemory<T> memP(maxElem, 1/*usePinnedHost*/);
size_t elem = maxElem / 2;
@@ -380,16 +380,16 @@ void multiThread_1(bool serialize, bool usePinnedHost)
{
printSep();
printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, usePinnedHost);
DeviceMemory<T> memD(N);
HostMemory<T> mem1(N, usePinnedHost);
HostMemory<T> mem2(N, usePinnedHost);
DeviceMemory<T> memD(N);
HostMemory<T> mem1(N, usePinnedHost);
HostMemory<T> mem2(N, usePinnedHost);
std::thread t1 (memcpytest2<T>, &memD, &mem1, N, 0,0,0);
if (serialize) {
t1.join();
}
std::thread t2 (memcpytest2<T>,&memD, &mem2, N, 0,0,0);
if (serialize) {
t2.join();
@@ -427,21 +427,21 @@ int main(int argc, char *argv[])
// Some tests around the 64KB boundary which have historically shown issues:
printf ("\n\n=== tests&0x2 (64KB boundary)\n");
size_t maxElem = 32*1024*1024;
DeviceMemory<float> memD(maxElem);
HostMemory<float> memU(maxElem, 0/*usePinnedHost*/);
HostMemory<float> memP(maxElem, 0/*usePinnedHost*/);
DeviceMemory<float> memD(maxElem);
HostMemory<float> memU(maxElem, 0/*usePinnedHost*/);
HostMemory<float> memP(maxElem, 0/*usePinnedHost*/);
// These all pass:
memcpytest2<float>(&memD, &memP, 15*1024*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 16*1024*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 15*1024*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 16*1024*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0);
// Just over 64MB:
memcpytest2<float>(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 17*1024*1024+1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 32*1024*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memU, 32*1024*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
memcpytest2<float>(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 17*1024*1024+1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 32*1024*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memU, 32*1024*1024, 0, 0, 0);
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
}
@@ -464,7 +464,7 @@ int main(int argc, char *argv[])
// Simplest cases: serialize the threads, and also used pinned memory:
// This verifies that the sub-calls to memcpytest2 are correct.
multiThread_1<float>(true, true);
multiThread_1<float>(true, true);
// Serialize, but use unpinned memory to stress the unpinned memory xfer path.
multiThread_1<float>(true, false);
+2 -2
Просмотреть файл
@@ -41,8 +41,8 @@ void printSep()
// Designed to stress a small number of simple smoke tests
template<
typename T=float,
class P=HipTest::Unpinned,
typename T=float,
class P=HipTest::Unpinned,
class C=HipTest::Memcpy
>
void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream)
+5 -5
Просмотреть файл
@@ -119,7 +119,7 @@ void Streamer<T>::reset()
{
HipTest::setDefaultData(_numElements, _A_h, _B_h, _C_h);
H2D();
}
@@ -238,7 +238,7 @@ int main(int argc, char *argv[])
nullStreamer->D2H();
HIPCHECK(hipDeviceSynchronize());
HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements);
HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements);
}
}
@@ -263,7 +263,7 @@ int main(int argc, char *argv[])
HIPCHECK(hipDeviceSynchronize());
HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements);
HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements);
}
}
@@ -289,10 +289,10 @@ int main(int argc, char *argv[])
// Copy with stream1, this could go async if the streamSync doesn't synchronize ALL the streams.
HIPCHECK(hipMemcpyAsync(streamers[0]->_C_h, streamers[0]->_C_d, streamers[0]->_numElements*sizeof(int), hipMemcpyDeviceToHost, streamers[1]->_stream));
HIPCHECK(hipDeviceSynchronize());
HipTest::checkTest(expected_H, streamers[0]->_C_h, numElements);
HipTest::checkTest(expected_H, streamers[0]->_C_h, numElements);
}
+14 -14
Просмотреть файл
@@ -59,23 +59,23 @@ const char *syncModeString(int syncMode) {
void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode syncMode, bool expectMismatch)
{
// This test sends a long-running kernel to the null stream, then tests to see if the
// This test sends a long-running kernel to the null stream, then tests to see if the
// specified synchronization technique is effective.
//
// Some syncMode are not expected to correctly sync (for example "syncNone"). in these
// Some syncMode are not expected to correctly sync (for example "syncNone"). in these
// cases the test sets expectMismatch and the check logic below will attempt to ensure that
// the undesired synchronization did not occur - ie ensure the kernel is still running and did
// not yet update the stop event. This can be tricky since if the kernel runs fast enough it
// may complete before the check. To prevent this, the addCountReverse has a count parameter
// which causes it to loop repeatedly, and the results are checked in reverse order.
// may complete before the check. To prevent this, the addCountReverse has a count parameter
// which causes it to loop repeatedly, and the results are checked in reverse order.
//
// Tests with expectMismatch=true should ensure the kernel finishes correctly. This results
// are checked and we test to make sure stop event has completed.
if (!(testMask & p_tests)) {
return;
}
printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n",
printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n",
testMask, syncModeString(syncMode), expectMismatch);
size_t sizeBytes = numElements * sizeof(int);
@@ -98,7 +98,7 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode s
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);
// Launch kernel into null stream, should result in C_h == count.
hipLaunchKernelGGL(HipTest::addCountReverse , dim3(blocks), dim3(threadsPerBlock), 0, 0 /*stream*/, C_d, C_h, numElements, count);
HIPCHECK(hipEventRecord(stop, 0/*default*/));
HIPCHECK(hipEventRecord(stop, 0/*default*/));
switch (syncMode) {
case syncNone:
@@ -108,18 +108,18 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode s
break;
case syncOtherStream:
// Does this synchronize with the null stream?
HIPCHECK(hipStreamSynchronize(otherStream));
HIPCHECK(hipStreamSynchronize(otherStream));
break;
case syncMarkerThenOtherStream:
case syncMarkerThenOtherNonBlockingStream:
// this may wait for NULL stream depending hipStreamNonBlocking flag above
HIPCHECK(hipEventRecord(otherStreamEvent, otherStream));
HIPCHECK(hipStreamSynchronize(otherStream));
// this may wait for NULL stream depending hipStreamNonBlocking flag above
HIPCHECK(hipEventRecord(otherStreamEvent, otherStream));
HIPCHECK(hipStreamSynchronize(otherStream));
break;
case syncDevice:
HIPCHECK(hipDeviceSynchronize());
HIPCHECK(hipDeviceSynchronize());
break;
default:
assert(0);
@@ -197,7 +197,7 @@ void runTests(int64_t numElements)
int main(int argc, char *argv[])
{
// Can' destroy the default stream:// TODO - move to another test
HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidResourceHandle);
HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidResourceHandle);
HipTest::parseStandardArguments(argc, argv, true /*failOnUndefinedArg*/);
+12 -12
Просмотреть файл
@@ -88,7 +88,7 @@ private:
template <typename T>
Streamer<T>::Streamer(int deviceId, T * A_d, size_t numElements, int commandType) :
_preA_d(NULL),
_preA_d(NULL),
_A_d(A_d),
_deviceId(deviceId),
_numElements(numElements),
@@ -239,7 +239,7 @@ size_t Streamer<T>::check(int streamerNum, T initValue, T expectedOffset, bool e
return _mismatchCount;
}
//---
//Parse arguments specific to this test.
@@ -300,7 +300,7 @@ void checkAll(int initValue, std::vector<IntStreamer *> &streamers, std::vector<
for (int i=0; i<streamers.size(); i++) {
expected += streamers[i]->expectedAdd();
mismatchCount += streamers[i]->check(i+1, initValue, expected, expectPass);
}
@@ -330,7 +330,7 @@ void checkAll(int initValue, std::vector<IntStreamer *> &streamers, std::vector<
void sync_none(void) {};
void sync_allDevices(int numDevices)
void sync_allDevices(int numDevices)
{
for (int d=0; d<numDevices; d++) {
HIPCHECK(hipSetDevice(d));
@@ -339,7 +339,7 @@ void sync_allDevices(int numDevices)
}
void sync_queryAllUntilComplete(std::vector<IntStreamer *> streamers)
void sync_queryAllUntilComplete(std::vector<IntStreamer *> streamers)
{
for (int i=streamers.size()-1; i>=0; i--) {
streamers[i]->queryUntilComplete();
@@ -347,7 +347,7 @@ void sync_queryAllUntilComplete(std::vector<IntStreamer *> streamers)
}
void sync_streamWaitEvent(hipEvent_t lastEvent, int sideDeviceId, hipStream_t sideStream, bool waitHere)
void sync_streamWaitEvent(hipEvent_t lastEvent, int sideDeviceId, hipStream_t sideStream, bool waitHere)
{
HIPCHECK(hipSetDevice(sideDeviceId));
@@ -389,7 +389,7 @@ int main(int argc, char *argv[])
initArray_h[i] = initValue;
}
HIPCHECK(hipMemcpy(initArray_d, initArray_h, sizeElements, hipMemcpyHostToDevice));
int numDevices;
HIPCHECK(hipGetDeviceCount(&numDevices));
@@ -414,7 +414,7 @@ int main(int argc, char *argv[])
// A sideband stream channel that is independent from above.
// Used to check to ensure the WaitEvent or other synchronization is working correctly since by default sideStream is
// Used to check to ensure the WaitEvent or other synchronization is working correctly since by default sideStream is
// asynchronous wrt the other streams.
std::vector<hipStream_t> sideStreams;
for (int d=0; d<numDevices; d++) {
@@ -446,7 +446,7 @@ int main(int argc, char *argv[])
if (p_tests & 0x1000) {
printf ("==> Test 0x1000 simple null stream tests\n");
printf ("==> Test 0x1000 simple null stream tests\n");
// try some null stream:
hipStreamQuery(0);
@@ -463,7 +463,7 @@ int main(int argc, char *argv[])
HIPCHECK(hipEventRecord(e1, s1))
HIPCHECK(hipStreamWaitEvent(hipStream_t(0), e1, 0/*flags*/));
HIPCHECK(hipStreamDestroy(s1));
HIPCHECK(hipEventDestroy(e1));
}
@@ -476,11 +476,11 @@ int main(int argc, char *argv[])
HIPCHECK(hipEventRecord(e1, hipStream_t(0)))
HIPCHECK(hipStreamWaitEvent(s1, e1, 0/*flags*/));
HIPCHECK(hipStreamDestroy(s1));
HIPCHECK(hipEventDestroy(e1));
}
}