2
0

Fixed copyright and header names

Change-Id: Id595c65ea3b7289e87be4c42db5d8a31905a4fdd


[ROCm/hip commit: 1ef7222c3a]
Este cometimento está contido em:
Aditya Atluri
2017-03-31 12:40:29 -05:00
ascendente 7635e0a57e
cometimento fcb4331a6a
14 ficheiros modificados com 54 adições e 912 eliminações
+2 -2
Ver ficheiro
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -26,7 +26,7 @@ THE SOFTWARE.
#include "helpers.hpp"
#include "hc.hpp"
#include "hcc_acc.h"
#include "hip_hcc.h"
#include <stdexcept>
#include <type_traits>
-3
Ver ficheiro
@@ -464,9 +464,6 @@ __device__ static inline __half2 h2trunc(const __half2 h) {
return a;
}
#endif
#if __clang_major__ == 3
+2
Ver ficheiro
@@ -284,3 +284,5 @@ static inline hipError_t hipHostMalloc( T** ptr, size_t size, unsigned int flags
return hipHostMalloc((void**)ptr, size, flags);
}
#endif
#endif
+5 -2
Ver ficheiro
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -20,6 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_CHANNEL_DESCRIPTOR_H
#define HIP_INCLUDE_HIP_NVCC_DETAIL_CHANNEL_DESCRIPTOR_H
#include"channel_descriptor.h"
#endif
+3 -3
Ver ficheiro
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIPCOMPLEX_H
#define HIPCOMPLEX_H
#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COMPLEX_H
#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COMPLEX_H
#include"cuComplex.h"
+5 -2
Ver ficheiro
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_H
#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_H
#include <cuda_runtime.h>
@@ -105,3 +106,5 @@ kernelName<<<numblocks,numthreads,memperblock,streamId>>>(0, ##__VA_ARGS__);\
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
#endif
#endif
+3 -2
Ver ficheiro
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
#include <cuda_runtime_api.h>
#include <cuda.h>
+24 -2
Ver ficheiro
@@ -1,5 +1,27 @@
#ifndef HIP_TEXTURE_H
#define HIP_TEXTURE_H
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_H
#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_H
#include <texture_types.h>
+2 -2
Ver ficheiro
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include"hip/hip_fp16.h"
#include"hip/hcc_detail/hip_fp16.h"
struct hipHalfHolder{
union {
-885
Ver ficheiro
@@ -1,885 +0,0 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_HCC_H
#define HIP_HCC_H
#include <hc.hpp>
#include <hsa/hsa.h>
#include "hsa/hsa_ext_amd.h"
#include "hip/hip_runtime.h"
#include "hip_util.h"
#include "env.h"
#if defined(__HCC__) && (__hcc_workweek__ < 16354)
#error("This version of HIP requires a newer version of HCC.");
#endif
#define USE_IPC 1
//---
// Environment variables:
// Intended to distinguish whether an environment variable should be visible only in debug mode, or in debug+release.
//static const int debug = 0;
extern const int release;
// TODO - this blocks both kernels and memory ops. Perhaps should have separate env var for kernels?
extern int HIP_LAUNCH_BLOCKING;
extern int HIP_API_BLOCKING;
extern int HIP_PRINT_ENV;
extern int HIP_PROFILE_API;
//extern int HIP_TRACE_API;
extern int HIP_ATP;
extern int HIP_DB;
extern int HIP_STAGING_SIZE; /* size of staging buffers, in KB */
extern int HIP_STREAM_SIGNALS; /* number of signals to allocate at stream creation */
extern int HIP_VISIBLE_DEVICES; /* Contains a comma-separated sequence of GPU identifiers */
extern int HIP_FORCE_P2P_HOST;
extern int HIP_COHERENT_HOST_ALLOC;
//---
// Chicken bits for disabling functionality to work around potential issues:
extern int HIP_SYNC_HOST_ALLOC;
// TODO - remove when this is standard behavior.
extern int HCC_OPT_FLUSH;
// Class to assign a short TID to each new thread, for HIP debugging purposes.
class TidInfo {
public:
TidInfo() ;
int tid() const { return _shortTid; };
uint64_t incApiSeqNum() { return ++_apiSeqNum; };
uint64_t apiSeqNum() const { return _apiSeqNum; };
private:
int _shortTid;
// monotonically increasing API sequence number for this threa.
uint64_t _apiSeqNum;
};
struct ProfTrigger {
static const uint64_t MAX_TRIGGER = std::numeric_limits<uint64_t>::max();
void print (int tid) {
std::cout << "Enabling tracing for ";
for (auto iter=_profTrigger.begin(); iter != _profTrigger.end(); iter++) {
std::cout << "tid:" << tid << "." << *iter << ",";
}
std::cout << "\n";
};
uint64_t nextTrigger() { return _profTrigger.empty() ? MAX_TRIGGER : _profTrigger.back(); };
void add(uint64_t trigger) { _profTrigger.push_back(trigger); };
void sort() { std::sort (_profTrigger.begin(), _profTrigger.end(), std::greater<int>()); };
private:
std::vector<uint64_t> _profTrigger;
};
//---
//Extern tls
extern thread_local hipError_t tls_lastHipError;
extern thread_local TidInfo tls_tidInfo;
extern std::vector<ProfTrigger> g_dbStartTriggers;
extern std::vector<ProfTrigger> g_dbStopTriggers;
//---
//Forward defs:
class ihipStream_t;
class ihipDevice_t;
class ihipCtx_t;
// Color defs for debug messages:
#define KNRM "\x1B[0m"
#define KRED "\x1B[31m"
#define KGRN "\x1B[32m"
#define KYEL "\x1B[33m"
#define KBLU "\x1B[34m"
#define KMAG "\x1B[35m"
#define KCYN "\x1B[36m"
#define KWHT "\x1B[37m"
extern const char *API_COLOR;
extern const char *API_COLOR_END;
// If set, thread-safety is enforced on all stream functions.
// Stream functions will acquire a mutex before entering critical sections.
#define STREAM_THREAD_SAFE 1
#define CTX_THREAD_SAFE 1
#define DEVICE_THREAD_SAFE 1
// Compile debug trace mode - this prints debug messages to stderr when env var HIP_DB is set.
// May be set to 0 to remove debug if checks - possible code size and performance difference?
#define COMPILE_HIP_DB 1
// Compile HIP tracing capability.
// 0x1 = print a string at function entry with arguments.
// 0x2 = prints a simple message with function name + return code when function exits.
// 0x3 = print both.
// Must be enabled at runtime with HIP_TRACE_API
#define COMPILE_HIP_TRACE_API 0x3
// Compile code that generates trace markers for CodeXL ATP at HIP function begin/end.
// ATP is standard CodeXL format that includes timestamps for kernels, HSA RT APIs, and HIP APIs.
#ifndef COMPILE_HIP_ATP_MARKER
#define COMPILE_HIP_ATP_MARKER 0
#endif
// Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function boundary.
// TODO - currently we print the trace message at the beginning. if we waited, we could also tls_tidInfo return codes, and any values returned
// through ptr-to-args (ie the pointers allocated by hipMalloc).
#if COMPILE_HIP_ATP_MARKER
#include "CXLActivityLogger.h"
#define MARKER_BEGIN(markerName,group) amdtBeginMarker(markerName, group, nullptr);
#define MARKER_END() amdtEndMarker();
#define RESUME_PROFILING amdtResumeProfiling(AMDT_ALL_PROFILING);
#define STOP_PROFILING amdtStopProfiling(AMDT_ALL_PROFILING);
#else
// Swallow scoped markers:
#define MARKER_BEGIN(markerName,group)
#define MARKER_END()
#define RESUME_PROFILING
#define STOP_PROFILING
#endif
//---
//HIP Trace modes
#define TRACE_ALL 0 // 0x1
#define TRACE_CMD 1 // 0x2
#define TRACE_MEM 2 // 0x4
//---
//HIP_DB Debug flags:
#define DB_API 0 /* 0x01 - shortcut to enable HIP_TRACE_API on single switch */
#define DB_SYNC 1 /* 0x02 - trace synchronization pieces */
#define DB_MEM 2 /* 0x04 - trace memory allocation / deallocation */
#define DB_COPY 3 /* 0x08 - trace memory copy and peer commands. . */
#define DB_MAX_FLAG 4
// When adding a new debug flag, also add to the char name table below.
//
//
struct DbName {
const char *_color;
const char *_shortName;
};
// This table must be kept in-sync with the defines above.
static const DbName dbName [] =
{
{KGRN, "api"}, // not used,
{KYEL, "sync"},
{KCYN, "mem"},
{KMAG, "copy"},
};
#if COMPILE_HIP_DB
#define tprintf(trace_level, ...) {\
if (HIP_DB & (1<<(trace_level))) {\
char msgStr[1000];\
snprintf(msgStr, 2000, __VA_ARGS__);\
fprintf (stderr, " %ship-%s tid:%d:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tls_tidInfo.tid(), msgStr, KNRM); \
}\
}
#else
/* Compile to empty code */
#define tprintf(trace_level, ...)
#endif
//---
extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
#define API_TRACE(forceTrace, ...)\
{\
tls_tidInfo.incApiSeqNum();\
if (forceTrace || (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1<<TRACE_ALL))))) {\
std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
std::string fullStr;\
recordApiTrace(&fullStr, apiStr);\
if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\
else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\
}\
}
#else
// Swallow API_TRACE
#define API_TRACE(IS_CMD, ...)\
tls_tidInfo.incApiSeqNum();
#endif
// Just initialize the HIP runtime, but don't log any trace information.
#define HIP_INIT()\
std::call_once(hip_initialized, ihipInit);\
ihipCtxStackUpdate();
#define HIP_SET_DEVICE()\
ihipDeviceSetState();
// This macro should be called at the beginning of every HIP API.
// It initializes the hip runtime (exactly once), and
// generates a trace string that can be output to stderr or to ATP file.
#define HIP_INIT_API(...) \
HIP_INIT()\
API_TRACE(0, __VA_ARGS__);
// Like above, but will trace with DB_CMD.
// Replace HIP_INIT_API with this call inside important APIs that launch work on the GPU:
// kernel launches, copy commands, memory sets, etc.
#define HIP_INIT_CMD_API(...) \
HIP_INIT()\
API_TRACE((HIP_TRACE_API&(1<<TRACE_CMD)), __VA_ARGS__);
// This macro should be called at the end of every HIP API, and only at the end of top-level hip APIS (not internal hip)
// It has dual function: logs the last error returned for use by hipGetLastError,
// and also prints the closing message when the debug trace is enabled.
#define ihipLogStatus(hipStatus) \
({\
hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \
tls_lastHipError = localHipStatus;\
\
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1<<TRACE_ALL)) {\
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
}\
if (HIP_PROFILE_API) { MARKER_END(); }\
localHipStatus;\
})
class ihipException : public std::exception
{
public:
ihipException(hipError_t e) : _code(e) {};
hipError_t _code;
};
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
const hipStream_t hipStreamNull = 0x0;
/**
* HIP IPC Handle Size
*/
#define HIP_IPC_RESERVED_SIZE 24
class ihipIpcMemHandle_t
{
public:
#if USE_IPC
hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr
#endif
size_t psize;
char reserved[HIP_IPC_RESERVED_SIZE];
};
class ihipModule_t {
public:
hsa_executable_t executable;
hsa_code_object_t object;
std::string fileName;
void *ptr;
size_t size;
std::list<hipFunction_t> funcTrack;
ihipModule_t() : executable(), object(), fileName(), ptr(nullptr), size(0) {}
};
//---
// Used to remove lock, for performance or stimulating bugs.
class FakeMutex
{
public:
void lock() { }
bool try_lock() {return true; }
void unlock() { }
};
#if STREAM_THREAD_SAFE
typedef std::mutex StreamMutex;
#else
#warning "Stream thread-safe disabled"
typedef FakeMutex StreamMutex;
#endif
// Pair Device and Ctx together, these could also be toggled separately if desired.
#if CTX_THREAD_SAFE
typedef std::mutex CtxMutex;
#else
typedef FakeMutex CtxMutex;
#warning "Ctx thread-safe disabled"
#endif
#if DEVICE_THREAD_SAFE
typedef std::mutex DeviceMutex;
#else
typedef FakeMutex DeviceMutex;
#warning "Device thread-safe disabled"
#endif
//
//---
// Protects access to the member _data with a lock acquired on contruction/destruction.
// T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock)
template<typename T>
class LockedAccessor
{
public:
LockedAccessor(T &criticalData, bool autoUnlock=true) :
_criticalData(&criticalData),
_autoUnlock(autoUnlock)
{
tprintf(DB_SYNC, "locking criticalData=%p for %s..\n", _criticalData, ToString(_criticalData->_parent).c_str());
_criticalData->_mutex.lock();
};
~LockedAccessor()
{
if (_autoUnlock) {
tprintf(DB_SYNC, "auto-unlocking criticalData=%p for %s...\n", _criticalData, ToString(_criticalData->_parent).c_str());
_criticalData->_mutex.unlock();
}
}
void unlock()
{
tprintf(DB_SYNC, "unlocking criticalData=%p for %s...\n", _criticalData, ToString(_criticalData->_parent).c_str());
_criticalData->_mutex.unlock();
}
// Syntactic sugar so -> can be used to get the underlying type.
T *operator->() { return _criticalData; };
private:
T *_criticalData;
bool _autoUnlock;
};
template <typename MUTEX_TYPE>
struct LockedBase {
// Experts-only interface for explicit locking.
// Most uses should use the lock-accessor.
void lock() { _mutex.lock(); }
void unlock() { _mutex.unlock(); }
bool try_lock() { return _mutex.try_lock(); }
MUTEX_TYPE _mutex;
};
template <typename MUTEX_TYPE>
class ihipStreamCriticalBase_t : public LockedBase<MUTEX_TYPE>
{
public:
ihipStreamCriticalBase_t(ihipStream_t *parentStream, hc::accelerator_view av) :
_kernelCnt(0),
_av(av),
_hasQueue(true),
_parent(parentStream)
{
};
~ihipStreamCriticalBase_t() {
}
ihipStreamCriticalBase_t<StreamMutex> * mlock() { LockedBase<MUTEX_TYPE>::lock(); return this;};
void munlock() {
tprintf(DB_SYNC, "munlocking criticalData=%p for %s...\n", this, ToString(this->_parent).c_str());
LockedBase<MUTEX_TYPE>::unlock();
};
ihipStreamCriticalBase_t<StreamMutex> * mtry_lock() {
bool gotLock = LockedBase<MUTEX_TYPE>::try_lock() ;
tprintf(DB_SYNC, "mtry_locking=%d criticalData=%p for %s...\n", gotLock, this, ToString(this->_parent).c_str());
return gotLock ? this: nullptr;
};
public:
ihipStream_t * _parent;
uint32_t _kernelCnt; // Count of inflight kernels in this stream. Reset at ::wait().
hc::accelerator_view _av;
// True if the stream has an allocated queue (accelerato_view) for its use:
// Always true at ihipStream creation but queue may later be stolen.
// This acts as a valid bit for the _av.
bool _hasQueue;
private:
};
// if HIP code needs to acquire locks for both ihipCtx_t and ihipStream_t, it should first acquire the lock
// for the ihipCtx_t and then for the individual streams. The locks should not be acquired in reverse order
// or deadlock may occur. In some cases, it may be possible to reduce the range where the locks must be held.
// HIP routines should avoid acquiring and releasing the same lock during the execution of a single HIP API.
// Another option is to use try_lock in the innermost lock query.
typedef ihipStreamCriticalBase_t<StreamMutex> ihipStreamCritical_t;
typedef LockedAccessor<ihipStreamCritical_t> LockedAccessor_StreamCrit_t;
//---
// Internal stream structure.
class ihipStream_t {
public:
enum ScheduleMode {Auto, Spin, Yield};
typedef uint64_t SeqNum_t ;
// TODOD -make av a reference to avoid shared_ptr overhead?
ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags);
~ihipStream_t();
// kind is hipMemcpyKind
void locked_copySync (void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn = true);
void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
void lockedSymbolCopySync(hc::accelerator &acc, void *dst, void* src, size_t sizeBytes, size_t offset, unsigned kind);
void lockedSymbolCopyAsync(hc::accelerator &acc, void *dst, void* src, size_t sizeBytes, size_t offset, unsigned kind);
//---
// Member functions that begin with locked_ are thread-safe accessors - these acquire / release the critical mutex.
LockedAccessor_StreamCrit_t lockopen_preKernelCommand();
void lockclose_postKernelCommand(const char *kernelName, hc::accelerator_view *av);
void locked_wait();
hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); };
void locked_waitEvent(hipEvent_t event);
void locked_recordEvent(hipEvent_t event);
//---
// Use this if we already have the stream critical data mutex:
void wait(LockedAccessor_StreamCrit_t &crit);
void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
uint32_t groupSegmentSize, uint32_t sharedMemBytes,
void *kernarg, size_t kernSize, uint64_t kernel);
//-- Non-racy accessors:
// These functions access fields set at initialization time and are non-racy (so do not acquire mutex)
const ihipDevice_t * getDevice() const;
ihipCtx_t * getCtx() const;
void ensureHaveQueue(LockedAccessor_StreamCrit_t &streamCrit);
public:
//---
//Public member vars - these are set at initialization and never change:
SeqNum_t _id; // monotonic sequence ID
unsigned _flags;
private:
// The unsigned return is hipMemcpyKind
unsigned resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem);
void resolveHcMemcpyDirection(unsigned hipMemKind,
const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
hc::hcCommandKind *hcCopyDir,
ihipCtx_t **copyDevice,
bool *forceUnpinnedCopy);
bool canSeeMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo);
void addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes);
public: // TODO - move private
// Critical Data - MUST be accessed through LockedAccessor_StreamCrit_t
ihipStreamCritical_t _criticalData;
private: // Data
std::mutex _hasQueueLock;
ihipCtx_t *_ctx; // parent context that owns this stream.
// Friends:
friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s);
friend hipError_t hipStreamQuery(hipStream_t);
ScheduleMode _scheduleMode;
};
//----
// Internal event structure:
enum hipEventStatus_t {
hipEventStatusUnitialized = 0, // event is unutilized, must be "Created" before use.
hipEventStatusCreated = 1,
hipEventStatusRecording = 2, // event has been enqueued to record something.
hipEventStatusRecorded = 3, // event has been recorded - timestamps are valid.
} ;
// internal hip event structure.
struct ihipEvent_t {
hipEventStatus_t _state;
hipStream_t _stream; // Stream where the event is recorded, or NULL if all streams.
unsigned _flags;
hc::completion_future _marker;
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
} ;
//=============================================================================
//class ihipDeviceCriticalBase_t
template <typename MUTEX_TYPE>
class ihipDeviceCriticalBase_t : LockedBase<MUTEX_TYPE>
{
public:
ihipDeviceCriticalBase_t(ihipDevice_t *parentDevice) :
_parent(parentDevice)
{
};
~ihipDeviceCriticalBase_t() {
}
// Contexts:
void addContext(ihipCtx_t *ctx);
void removeContext(ihipCtx_t *ctx);
std::list<ihipCtx_t*> &ctxs() { return _ctxs; };
const std::list<ihipCtx_t*> &const_ctxs() const { return _ctxs; };
int getcount() {return _ctxCount;};
friend class LockedAccessor<ihipDeviceCriticalBase_t>;
private:
ihipDevice_t *_parent;
//--- Context Tracker:
std::list< ihipCtx_t* > _ctxs; // contexts associated with this device across all threads.
int _ctxCount;
};
typedef ihipDeviceCriticalBase_t<DeviceMutex> ihipDeviceCritical_t;
typedef LockedAccessor<ihipDeviceCritical_t> LockedAccessor_DeviceCrit_t;
//----
// Properties of the HIP device.
// Multiple contexts can point to same device.
class ihipDevice_t
{
public:
ihipDevice_t(unsigned deviceId, unsigned deviceCnt, hc::accelerator &acc);
~ihipDevice_t();
// Accessors:
ihipCtx_t *getPrimaryCtx() const { return _primaryCtx; };
void locked_removeContext(ihipCtx_t *c);
void locked_reset();
ihipDeviceCritical_t &criticalData() { return _criticalData; };
public:
unsigned _deviceId; // device ID
hc::accelerator _acc;
hsa_agent_t _hsaAgent; // hsa agent handle
//! Number of compute units supported by the device:
unsigned _computeUnits;
hipDeviceProp_t _props; // saved device properties.
// TODO - report this through device properties, base on HCC API call.
int _isLargeBar;
ihipCtx_t *_primaryCtx;
int _state; //1 if device is set otherwise 0
private:
hipError_t initProperties(hipDeviceProp_t* prop);
private:
ihipDeviceCritical_t _criticalData;
};
//=============================================================================
//=============================================================================
//class ihipCtxCriticalBase_t
template <typename MUTEX_TYPE>
class ihipCtxCriticalBase_t : LockedBase<MUTEX_TYPE>
{
public:
ihipCtxCriticalBase_t(ihipCtx_t *parentCtx, unsigned deviceCnt) :
_parent(parentCtx),
_peerCnt(0)
{
_peerAgents = new hsa_agent_t[deviceCnt];
};
~ihipCtxCriticalBase_t() {
if (_peerAgents != nullptr) {
delete _peerAgents;
_peerAgents = nullptr;
}
_peerCnt = 0;
}
// Streams:
void addStream(ihipStream_t *stream);
std::list<ihipStream_t*> &streams() { return _streams; };
const std::list<ihipStream_t*> &const_streams() const { return _streams; };
// Peer Accessor classes:
bool isPeerWatcher(const ihipCtx_t *peer); // returns True if peer has access to memory physically located on this device.
bool addPeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer);
bool removePeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer);
void resetPeerWatchers(ihipCtx_t *thisDevice);
void printPeerWatchers(FILE *f) const;
uint32_t peerCnt() const { return _peerCnt; };
hsa_agent_t *peerAgents() const { return _peerAgents; };
// TODO - move private
std::list<ihipCtx_t*> _peers; // list of enabled peer devices.
friend class LockedAccessor<ihipCtxCriticalBase_t>;
private:
ihipCtx_t * _parent;
//--- Stream Tracker:
std::list< ihipStream_t* > _streams; // streams associated with this device.
//--- Peer Tracker:
// These reflect the currently Enabled set of peers for this GPU:
// Enabled peers have permissions to access the memory physically allocated on this device.
// Note the peers always contain the self agent for easy interfacing with HSA APIs.
uint32_t _peerCnt; // number of enabled peers
hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.)
private:
void recomputePeerAgents();
};
// Note Mutex type Real/Fake selected based on CtxMutex
typedef ihipCtxCriticalBase_t<CtxMutex> ihipCtxCritical_t;
// This type is used by functions that need access to the critical device structures.
typedef LockedAccessor<ihipCtxCritical_t> LockedAccessor_CtxCrit_t;
//=============================================================================
//=============================================================================
//class ihipCtx_t:
// A HIP CTX (context) points at one of the existing devices and contains the streams,
// peer-to-peer mappings, creation flags. Multiple contexts can point to the same
// device.
//
class ihipCtx_t
{
public: // Functions:
ihipCtx_t(ihipDevice_t *device, unsigned deviceCnt, unsigned flags); // note: calls constructor for _criticalData
~ihipCtx_t();
// Functions which read or write the critical data are named locked_.
// (might be better called "locking_"
// ihipCtx_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function.
// External functions which call several locked_ functions will acquire and release the lock for each function. if this occurs in
// performance-sensitive code we may want to refactor by adding non-locked functions and creating a new locked_ member function to call them all.
void locked_removeStream(ihipStream_t *s);
void locked_reset();
void locked_waitAllStreams();
void locked_syncDefaultStream(bool waitOnSelf);
// Will allocate a queue and assign it to the needyStream:
hc::accelerator_view stealActiveQueue(LockedAccessor_CtxCrit_t &ctxCrit, ihipStream_t *needyStream);
hc::accelerator_view createOrStealQueue(LockedAccessor_CtxCrit_t &ctxCrit);
ihipCtxCritical_t &criticalData() { return _criticalData; };
const ihipDevice_t *getDevice() const { return _device; };
int getDeviceNum() const { return _device->_deviceId; };
// TODO - review uses of getWriteableDevice(), can these be converted to getDevice()
ihipDevice_t *getWriteableDevice() const { return _device; };
std::string toString() const;
public: // Data
// The NULL stream is used if no other stream is specified.
// Default stream has special synchronization properties with other streams.
ihipStream_t *_defaultStream;
// Flags specified when the context is created:
unsigned _ctxFlags;
private:
ihipDevice_t *_device;
private: // Critical data, protected with locked access:
// Members of _protected data MUST be accessed through the LockedAccessor.
// Search for LockedAccessor<ihipCtxCritical_t> for examples; do not access _criticalData directly.
ihipCtxCritical_t _criticalData;
};
//=================================================================================================
// Global variable definition:
extern std::once_flag hip_initialized;
extern unsigned g_deviceCnt;
extern hsa_agent_t g_cpu_agent ; // the CPU agent.
//=================================================================================================
// Extern functions:
extern void ihipInit();
extern const char *ihipErrorString(hipError_t);
extern ihipCtx_t *ihipGetTlsDefaultCtx();
extern void ihipSetTlsDefaultCtx(ihipCtx_t *ctx);
extern hipError_t ihipSynchronize(void);
extern void ihipCtxStackUpdate();
extern hipError_t ihipDeviceSetState();
extern ihipDevice_t *ihipGetDevice(int);
ihipCtx_t * ihipGetPrimaryCtx(unsigned deviceIndex);
extern void ihipSetTs(hipEvent_t e);
hipStream_t ihipSyncAndResolveStream(hipStream_t);
// Stream printf functions:
inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s)
{
os << "stream:";
os << s.getDevice()->_deviceId;;
os << '.';
os << s._id;
return os;
}
inline std::ostream & operator<<(std::ostream& os, const dim3& s)
{
os << '{';
os << s.x;
os << ',';
os << s.y;
os << ',';
os << s.z;
os << '}';
return os;
}
inline std::ostream & operator<<(std::ostream& os, const gl_dim3& s)
{
os << '{';
os << s.x;
os << ',';
os << s.y;
os << ',';
os << s.z;
os << '}';
return os;
}
// Stream printf functions:
inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e)
{
os << "event:" << std::hex << static_cast<void*> (e);
return os;
}
inline std::ostream& operator<<(std::ostream& os, const ihipCtx_t* c)
{
os << "ctx:" << static_cast<const void*> (c)
<< ".dev:" << c->getDevice()->_deviceId;
return os;
}
// Helper functions that are used across src files:
namespace hip_internal {
hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream);
};
#endif
+3 -3
Ver ficheiro
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_HCC_H
#define HIP_HCC_H
#ifndef HIP_SRC_HIP_HCC_INTERNAL_H
#define HIP_SRC_HIP_HCC_INTERNAL_H
#include <hc.hpp>
#include <hsa/hsa.h>
+1 -1
Ver ficheiro
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
+3 -3
Ver ficheiro
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_UTIL_H
#define HIP_UTIL_H
#ifndef HIP_INCLUDE_HCC_DETAIL_HIP_UTIL_H
#define HIP_INCLUDE_HCC_DETAIL_HIP_UTIL_H
#include <assert.h>
#include <stdint.h>
+1 -2
Ver ficheiro
@@ -30,7 +30,7 @@ THE SOFTWARE.
#include <stdio.h>
#include <iostream>
#include "hip/hip_runtime.h"
#include "hip/hcc.h"
#include "hip/hip_hcc.h"
#include "test_common.h"
#define CHECK(error) \
@@ -61,4 +61,3 @@ int main(int argc, char *argv[])
passed();
};