Merge branch 'amd-develop' into amd-master
Change-Id: I4d12b7ad56d743ea52e97a100f93818ee0fd916c
[ROCm/clr commit: a0ceaef5d4]
Этот коммит содержится в:
@@ -402,61 +402,54 @@ Code should not assume a warp size of 32 or 64. See [Warp Cross-Lane Functions]
|
||||
|
||||
## memcpyToSymbol
|
||||
|
||||
HIP support for hipMemCpyToSymbol is under-development. This feature allows a kernel
|
||||
HIP support for hipMemCpyToSymbol is complete. This feature allows a kernel
|
||||
to define a device-side data symbol which can be accessed on the host side. The symbol
|
||||
can be in __constant or device space. As a workaround, programs can pass the symbol
|
||||
as an argument to the kernel, and use standard hipMemcpy routines to initialize it.
|
||||
can be in __constant or device space.
|
||||
|
||||
For example:
|
||||
|
||||
Device Code:
|
||||
```
|
||||
#include<hip_runtime_api.h>
|
||||
#include<hip_runtime.h>
|
||||
#include<hip/hip_runtime.h>
|
||||
#include<hip/hip_runtime_api.h>
|
||||
#include<iostream>
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
__global__ void Inc(hipLaunchParm lp, float *Ad, float *Out)
|
||||
#endif
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
__constant__ float Ad[1024];
|
||||
__global__ void Inc(hipLaunchParm lp, float *Out)
|
||||
#endif
|
||||
#define HIP_ASSERT(status) \
|
||||
assert(status == hipSuccess)
|
||||
|
||||
#define LEN 512
|
||||
#define SIZE 2048
|
||||
|
||||
__constant__ int Value[LEN];
|
||||
|
||||
__global__ void Get(hipLaunchParm lp, int *Ad)
|
||||
{
|
||||
int tx = hipThreadIdx_x;
|
||||
Out[tx] = Ad[tx] + 1.0f;
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
Ad[tid] = Value[tid];
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
float *A, *Ad;
|
||||
float *Out, *Outd;
|
||||
A = new float[1024];
|
||||
Out = new float[1024];
|
||||
|
||||
for(uint32_t i=0;i<1024;i++)
|
||||
int *A, *B, *Ad;
|
||||
A = new int[LEN];
|
||||
B = new int[LEN];
|
||||
for(unsigned i=0;i<LEN;i++)
|
||||
{
|
||||
A[i] = 1.0f*i;
|
||||
Out[i] = 0.0f;
|
||||
A[i] = -1*i;
|
||||
B[i] = 0;
|
||||
}
|
||||
|
||||
hipMalloc((void**)&Ad, 1024*sizeof(float));
|
||||
hipMalloc((void**)&Outd, 1024*sizeof(float));
|
||||
HIP_ASSERT(hipMalloc((void**)&Ad, SIZE));
|
||||
|
||||
hipMemcpy(Outd, Out, 1024*sizeof(float), hipMemcpyHostToDevice);
|
||||
HIP_ASSERT(hipMemcpyToSymbol(HIP_SYMBOL(Value), A, SIZE, 0, hipMemcpyHostToDevice));
|
||||
hipLaunchKernel(Get, dim3(1,1,1), dim3(LEN,1,1), 0, 0, Ad);
|
||||
HIP_ASSERT(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost));
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
assert(hipSuccess == hipMemcpy(Ad, A, 1024*sizeof(float), hipMemcpyHostToDevice));
|
||||
hipLaunchKernel(Inc, dim3(1,1,1), dim3(1024,1,1), 0, 0, Ad, Outd);
|
||||
#endif
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
assert(hipSuccess == hipMemcpyToSymbol(Ad, A, 1024*sizeof(float)));
|
||||
hipLaunchKernel(Inc, dim3(1,1,1), dim3(1024,1,1), 0, 0, Outd);
|
||||
#endif
|
||||
|
||||
hipMemcpy(Out, Outd, 1024*sizeof(float), hipMemcpyDeviceToHost);
|
||||
std::cout<<Out[10]<<" "<<A[10]<<std::endl;
|
||||
assert(Out[10] - A[10] == 1.0f);
|
||||
for(unsigned i=0;i<LEN;i++)
|
||||
{
|
||||
assert(A[i] == B[i]);
|
||||
}
|
||||
std::cout<<"Passed"<<std::endl;
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -1985,23 +1985,33 @@ void HipifyPPCallbacks::handleEndSource() {
|
||||
// Set up the command line options
|
||||
static cl::OptionCategory ToolTemplateCategory("CUDA to HIP source translator options");
|
||||
|
||||
static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
|
||||
cl::value_desc("filename"), cl::cat(ToolTemplateCategory));
|
||||
static cl::opt<std::string> OutputFilename("o",
|
||||
cl::desc("Output filename"),
|
||||
cl::value_desc("filename"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
Inplace("inplace",
|
||||
cl::desc("Modify input file inplace, replacing input with hipified "
|
||||
"output, save backup in .prehip file. "),
|
||||
cl::value_desc("inplace"));
|
||||
static cl::opt<bool> Inplace("inplace",
|
||||
cl::desc("Modify input file inplace, replacing input with hipified "
|
||||
"output, save backup in .prehip file"),
|
||||
cl::value_desc("inplace"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
NoOutput("no-output",
|
||||
cl::desc("don't write any translated output to stdout"),
|
||||
cl::value_desc("no-output"));
|
||||
static cl::opt<bool> NoOutput("no-output",
|
||||
cl::desc("Don't write any translated output to stdout"),
|
||||
cl::value_desc("no-output"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
PrintStats("print-stats", cl::desc("print the command-line, like a header"),
|
||||
cl::value_desc("print-stats"));
|
||||
static cl::opt<bool> PrintStats("print-stats",
|
||||
cl::desc("Print translation statistics"),
|
||||
cl::value_desc("print-stats"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::opt<bool> N("n",
|
||||
cl::desc("Combines -no-output and -print-stats options"),
|
||||
cl::value_desc("n"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
|
||||
|
||||
void addAllMatchers(ast_matchers::MatchFinder &Finder, Cuda2HipCallback *Callback) {
|
||||
Finder.addMatcher(callExpr(isExpansionInMainFile(),
|
||||
@@ -2086,6 +2096,9 @@ int main(int argc, const char **argv) {
|
||||
CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::Required);
|
||||
std::vector<std::string> fileSources = OptionsParser.getSourcePathList();
|
||||
std::string dst = OutputFilename;
|
||||
if (N) {
|
||||
NoOutput = PrintStats = true;
|
||||
}
|
||||
if (dst.empty()) {
|
||||
dst = fileSources[0];
|
||||
if (!Inplace) {
|
||||
|
||||
@@ -99,7 +99,7 @@ enum hipLimit_t
|
||||
#define hipDeviceScheduleSpin 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may consume more power.
|
||||
#define hipDeviceScheduleYield 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers power and is friendlier to other threads in the system.
|
||||
#define hipDeviceScheduleBlockingSync 0x4
|
||||
#define hipDeviceScheduleMask 0x7
|
||||
#define hipDeviceScheduleMask 0x7
|
||||
|
||||
#define hipDeviceMapHost 0x8
|
||||
#define hipDeviceLmemResizeToMax 0x16
|
||||
@@ -385,7 +385,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
|
||||
*
|
||||
* @param [in] flags
|
||||
*
|
||||
* The schedule flags impact how HIP waits for the completion of a command running on a device.
|
||||
* The schedule flags impact how HIP waits for the completion of a command running on a device.
|
||||
* hipDeviceScheduleSpin : HIP runtime will actively spin in the thread which submitted the work until the command completes. This offers the lowest latency, but will consume a CPU core and may increase power.
|
||||
* hipDeviceScheduleYield : The HIP runtime will yield the CPU to system so that other tasks can use it. This may increase latency to detect the completion but will consume less power and is friendlier to other tasks in the system.
|
||||
* hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield.
|
||||
@@ -393,7 +393,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
|
||||
*
|
||||
*
|
||||
* hipDeviceMapHost : Allow mapping host memory. On ROCM, this is always allowed and the flag is ignored.
|
||||
* hipDeviceLmemResizeToMax : @warning ROCm silently ignores this flag.
|
||||
* hipDeviceLmemResizeToMax : @warning ROCm silently ignores this flag.
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess
|
||||
*
|
||||
@@ -568,14 +568,14 @@ hipError_t hipStreamQuery(hipStream_t stream);
|
||||
* @brief Wait for all commands in stream to complete.
|
||||
*
|
||||
* @param[in] stream stream identifier.
|
||||
*
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorInvalidResourceHandle
|
||||
*
|
||||
* If the null stream is specified, this command blocks until all
|
||||
* This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active or blocking.
|
||||
* This command is host-synchronous : the host will block until the stream is empty.
|
||||
*
|
||||
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamWaitEvent, hipStreamDestroy
|
||||
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamWaitEvent, hipStreamDestroy
|
||||
*
|
||||
*/
|
||||
hipError_t hipStreamSynchronize(hipStream_t stream);
|
||||
@@ -594,7 +594,7 @@ hipError_t hipStreamSynchronize(hipStream_t stream);
|
||||
* All future work submitted to @p stream will wait until @p event reports completion before beginning execution.
|
||||
* This function is host-asynchronous and the function may return before the wait has completed.
|
||||
*
|
||||
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamSynchronize, hipStreamDestroy
|
||||
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamSynchronize, hipStreamDestroy
|
||||
*
|
||||
*/
|
||||
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags);
|
||||
@@ -612,10 +612,31 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
|
||||
*
|
||||
* Return flags associated with this stream in *@p flags.
|
||||
*
|
||||
* @see hipStreamCreateWithFlags
|
||||
* @see hipStreamCreateWithFlags
|
||||
*/
|
||||
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags);
|
||||
|
||||
/**
|
||||
* Stream CallBack struct
|
||||
*/
|
||||
typedef void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
|
||||
|
||||
/**
|
||||
* @brief Adds a callback to be called on the host after all currently enqueued
|
||||
* items in the stream have completed. For each
|
||||
* cudaStreamAddCallback call, a callback will be executed exactly once.
|
||||
* The callback will block later work in the stream until it is finished.
|
||||
* @param[in] stream - Stream to add callback to
|
||||
* @param[in] callback - The function to call once preceding stream operations are complete
|
||||
* @param[in] userData - User specified data to be passed to the callback function
|
||||
* @param[in] flags - Reserved for future use, must be 0
|
||||
* @return #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorNotSupported
|
||||
*
|
||||
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
|
||||
*
|
||||
*/
|
||||
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags);
|
||||
|
||||
|
||||
// end doxygen Stream
|
||||
/**
|
||||
@@ -637,11 +658,11 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags);
|
||||
*
|
||||
* @param[in,out] event Returns the newly created event.
|
||||
* @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault, #hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess
|
||||
|
||||
|
||||
* #hipEventDefault : Default flag. The event will use active synchronization and will support timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a CPU to poll on the eevent.
|
||||
* #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is called on this event, the thread will block until the event completes. This can increase latency for the synchroniation but can result in lower power and more resources for other CPU threads.
|
||||
* #hipEventDisableTiming : Disable recording of timing information. On ROCM platform, timing information is always recorded and this flag has no performance benefit.
|
||||
|
||||
|
||||
* @warning On HCC platform, hipEventInterprocess support is under development. Use of this flag will return an error.
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInitializationError, #hipErrorInvalidValue, #hipErrorLaunchFailure, #hipErrorMemoryAllocation
|
||||
@@ -1098,7 +1119,7 @@ hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t siz
|
||||
|
||||
|
||||
/**
|
||||
* @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area pointed to by @p offset bytes from the start of symbol @p symbol
|
||||
* @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area pointed to by @p offset bytes from the start of symbol @p symbol
|
||||
*
|
||||
* The memory areas may not overlap. Symbol can either be a variable that resides in global or constant memory space, or it can be a character string,
|
||||
* naming a variable that resides in global or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice
|
||||
@@ -1405,9 +1426,9 @@ hipError_t hipCtxGetDevice(hipDevice_t *device);
|
||||
/**
|
||||
* @brief Returns the approximate HIP api version.
|
||||
*
|
||||
* @param [in] ctx Context to check
|
||||
* @param [in] ctx Context to check
|
||||
* @param [out] apiVersion
|
||||
*
|
||||
*
|
||||
* @return #hipSuccess
|
||||
*
|
||||
* @warning The HIP feature set does not correspond to an exact CUDA SDK api revision.
|
||||
@@ -1435,7 +1456,7 @@ hipError_t hipCtxGetCacheConfig ( hipFuncCache *cacheConfig );
|
||||
|
||||
/**
|
||||
* @brief Set L1/Shared cache partition.
|
||||
*
|
||||
*
|
||||
* @param [in] cacheConfiguration
|
||||
*
|
||||
* @return #hipSuccess
|
||||
@@ -1581,7 +1602,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device);
|
||||
|
||||
/**
|
||||
* @brief Returns the approximate HIP driver version.
|
||||
*
|
||||
*
|
||||
* @param [out] driverVersion
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInavlidValue
|
||||
|
||||
@@ -47,7 +47,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
// _restrict is supported by the compiler
|
||||
#define __shared__ tile_static
|
||||
#define __constant__ __attribute__((address_space(2)))
|
||||
#define __constant__ __attribute__((address_space(1)))
|
||||
|
||||
#else
|
||||
// Non-HCC compiler
|
||||
|
||||
@@ -198,7 +198,7 @@ typedef enum hipError_t {
|
||||
hipErrorInvalidDevice = 1010, ///< DeviceID must be in range 0...#compute-devices.
|
||||
hipErrorInvalidValue = 1011, ///< One or more of the parameters passed to the API call is NULL or not in an acceptable range.
|
||||
hipErrorInvalidDevicePointer = 1017, ///< Invalid Device Pointer
|
||||
hipErrorInvalidMemcpyDirection = 1021, ///< Invalid memory copy direction
|
||||
hipErrorInvalidMemcpyDirection = 1021, ///< Invalid memory copy direction
|
||||
hipErrorUnknown = 1030, ///< Unknown error.
|
||||
hipErrorInvalidResourceHandle = 1033, ///< Resource handle (hipEvent_t or hipStream_t) invalid.
|
||||
hipErrorNotReady = 1034, ///< Indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion). APIs that return this error include hipEventQuery and hipStreamQuery.
|
||||
|
||||
@@ -164,6 +164,11 @@ inline static cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream CallBack struct
|
||||
*/
|
||||
typedef void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
|
||||
|
||||
inline static hipError_t hipInit(unsigned int flags)
|
||||
{
|
||||
return hipCUResultTohipError(cuInit(flags));
|
||||
@@ -578,6 +583,11 @@ inline static hipError_t hipStreamQuery(hipStream_t stream)
|
||||
return hipCUDAErrorTohipError(cudaStreamQuery(stream));
|
||||
}
|
||||
|
||||
inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaStreamAddCallback(cudaStream_t stream,
|
||||
cudaStreamCallback_t callback, void *userData, unsigned int flags));
|
||||
}
|
||||
|
||||
inline static hipError_t hipDriverGetVersion(int *driverVersion)
|
||||
{
|
||||
@@ -741,7 +751,7 @@ inline static hipError_t hipDeviceGetPCIBusId(int *pciBusId,int len,hipDevice_t
|
||||
|
||||
inline static hipError_t hipDeviceGetLimit(size_t *pValue, hipLimit_t limit)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit);
|
||||
return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
|
||||
}
|
||||
|
||||
inline static hipError_t hipDeviceTotalMem(size_t *bytes,hipDevice_t device)
|
||||
|
||||
@@ -1120,6 +1120,8 @@ void ihipInit()
|
||||
HIP_TRACE_API = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
READ_ENV_I(release, HIP_TRACE_API, 0, "Trace each HIP API call. Print function name and return code to stderr as program executes.");
|
||||
READ_ENV_S(release, HIP_TRACE_API_COLOR, 0, "Color to use for HIP_API. None/Red/Green/Yellow/Blue/Magenta/Cyan/White");
|
||||
READ_ENV_I(release, HIP_ATP_MARKER, 0, "Add HIP function begin/end to ATP file generated with CodeXL");
|
||||
@@ -1143,6 +1145,22 @@ void ihipInit()
|
||||
fprintf (stderr, "warning: env var HIP_ATP_MARKER=0x%x but COMPILE_HIP_ATP_MARKER=0. (perhaps enable COMPILE_HIP_DB in src code before compiling?)", HIP_ATP_MARKER);
|
||||
}
|
||||
|
||||
if (HIP_DB) {
|
||||
fprintf (stderr, "HIP_DB=0x%x [", HIP_DB);
|
||||
bool first=true;
|
||||
for (int i=0; i<DB_MAX_BITPOS; i++) {
|
||||
if (HIP_DB & (1<<i)) {
|
||||
if (first) {
|
||||
fprintf (stderr, "%s%s%s", dbName[i]._color, dbName[i]._shortName, KNRM);
|
||||
} else {
|
||||
fprintf (stderr, "+%s%s%s", dbName[i]._color, dbName[i]._shortName, KNRM);
|
||||
};
|
||||
first=false;
|
||||
};
|
||||
}
|
||||
fprintf (stderr, "]\n");
|
||||
}
|
||||
|
||||
std::transform(HIP_TRACE_API_COLOR.begin(), HIP_TRACE_API_COLOR.end(), HIP_TRACE_API_COLOR.begin(), ::tolower);
|
||||
|
||||
if (HIP_TRACE_API_COLOR == "none") {
|
||||
@@ -1497,6 +1515,21 @@ bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx,
|
||||
};
|
||||
|
||||
|
||||
#define CASE_STRING(X) case X: return #X ;break;
|
||||
|
||||
const char* memcpyStr(unsigned memKind)
|
||||
{
|
||||
switch (memKind) {
|
||||
CASE_STRING(hipMemcpyHostToHost);
|
||||
CASE_STRING(hipMemcpyHostToDevice);
|
||||
CASE_STRING(hipMemcpyDeviceToHost);
|
||||
CASE_STRING(hipMemcpyDeviceToDevice);
|
||||
CASE_STRING(hipMemcpyDefault);
|
||||
default : return ("unknown memcpyKind");
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Resolve hipMemcpyDefault to a known type.
|
||||
// TODO - review why is this so complicated, does this need srcTracked and dstTracked?
|
||||
@@ -1553,6 +1586,7 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
case hipMemcpyHostToDevice: hcCopyDir = hc::hcMemcpyHostToDevice; break;
|
||||
case hipMemcpyDeviceToHost: hcCopyDir = hc::hcMemcpyDeviceToHost; break;
|
||||
case hipMemcpyDeviceToDevice: hcCopyDir = hc::hcMemcpyDeviceToDevice; break;
|
||||
default: throw ihipException(hipErrorRuntimeOther);
|
||||
};
|
||||
|
||||
|
||||
@@ -1569,6 +1603,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
}
|
||||
};
|
||||
|
||||
tprintf (DB_COPY1, "locked_copy dir=%s dst=%p src=%p sz=%zu\n", memcpyStr(kind), src, dst, sizeBytes);
|
||||
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit (_criticalData);
|
||||
#if DISABLE_COPY_EXT
|
||||
@@ -1588,11 +1624,12 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
const ihipCtx_t *ctx = this->getCtx();
|
||||
|
||||
if ((ctx == nullptr) || (ctx->getDevice() == nullptr)) {
|
||||
tprintf (DB_COPY1, "locked_copyAsync bad ctx or device\n");
|
||||
throw ihipException(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
if (kind == hipMemcpyHostToHost) {
|
||||
tprintf (DB_COPY2, "Asyc: H2H with memcpy");
|
||||
tprintf (DB_COPY1, "locked_copyAsync: H2H with memcpy");
|
||||
|
||||
// TODO - consider if we want to perhaps use the GPU SDMA engines anyway, to avoid the host-side sync here and keep everything flowing on the GPU.
|
||||
/* As this is a CPU op, we need to wait until all
|
||||
@@ -1613,10 +1650,14 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
|
||||
|
||||
bool copyEngineCanSeeSrcAndDest = true;
|
||||
if (kind == hipMemcpyDeviceToDevice) {
|
||||
if ((kind == hipMemcpyDeviceToDevice) ||
|
||||
((kind == hipMemcpyDefault) && srcTracked && dstTracked)) {
|
||||
copyEngineCanSeeSrcAndDest = canSeePeerMemory(ctx, ihipGetPrimaryCtx(dstPtrInfo._appId), ihipGetPrimaryCtx(srcPtrInfo._appId));
|
||||
}
|
||||
|
||||
tprintf (DB_COPY1, "locked_copyAsync: async memcpy dstTracked=%d srcTracked=%d copyEngineCanSeeSrcAndDest=%d\n",
|
||||
dstTracked, srcTracked, copyEngineCanSeeSrcAndDest);
|
||||
|
||||
|
||||
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
|
||||
// If both pointers are not tracked, we need to fall back to a sync copy.
|
||||
@@ -1637,6 +1678,7 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
}
|
||||
|
||||
} else {
|
||||
// TODO - call copy_ext directly here?
|
||||
locked_copySync(dst, src, sizeBytes, kind);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,16 +190,23 @@ extern const char *API_COLOR_END;
|
||||
#define DB_COPY1 3 /* 0x08 - trace memory copy commands. . */
|
||||
#define DB_SIGNAL 4 /* 0x10 - trace signal pool commands */
|
||||
#define DB_COPY2 5 /* 0x20 - trace memory copy commands. Detailed. */
|
||||
#define DB_MAX_BITPOS 5
|
||||
// When adding a new debug flag, also add to the char name table below.
|
||||
//
|
||||
|
||||
static const char *dbName [] =
|
||||
struct DbName {
|
||||
const char *_color;
|
||||
const char *_shortName;
|
||||
};
|
||||
|
||||
static const DbName dbName [] =
|
||||
{
|
||||
KNRM "hip-api", // not used,
|
||||
KYEL "hip-sync",
|
||||
KCYN "hip-mem",
|
||||
KMAG "hip-copy1",
|
||||
KRED "hip-signal",
|
||||
KNRM "hip-copy2",
|
||||
{KGRN, "api"}, // not used,
|
||||
{KYEL, "sync"},
|
||||
{KCYN, "mem"},
|
||||
{KMAG, "copy1"},
|
||||
{KRED, "signal"},
|
||||
{KNRM, "copy2"},
|
||||
};
|
||||
|
||||
|
||||
@@ -210,7 +217,7 @@ static const char *dbName [] =
|
||||
char msgStr[1000];\
|
||||
snprintf(msgStr, 2000, __VA_ARGS__);\
|
||||
COMPUTE_TID_STR\
|
||||
fprintf (stderr, " %s%s:%s%s", dbName[trace_level], tid_ss.str().c_str(), msgStr, KNRM); \
|
||||
fprintf (stderr, " %ship-%s%s:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tid_ss.str().c_str(), msgStr, KNRM); \
|
||||
}\
|
||||
}
|
||||
#else
|
||||
|
||||
@@ -198,4 +198,13 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
|
||||
}
|
||||
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(stream, callback, userData, flags);
|
||||
hipError_t e = hipSuccess;
|
||||
//--- explicitly synchronize stream to add callback routines
|
||||
hipStreamSynchronize(stream);
|
||||
callback(stream, e, userData);
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include<hip/hip_runtime.h>
|
||||
#include<hip/hip_runtime_api.h>
|
||||
#include<iostream>
|
||||
|
||||
#define HIP_ASSERT(status) \
|
||||
assert(status == hipSuccess)
|
||||
|
||||
#define LEN 512
|
||||
#define SIZE 2048
|
||||
|
||||
__constant__ int Value[LEN];
|
||||
|
||||
__global__ void Get(hipLaunchParm lp, int *Ad)
|
||||
{
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
Ad[tid] = Value[tid];
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int *A, *B, *Ad;
|
||||
A = new int[LEN];
|
||||
B = new int[LEN];
|
||||
for(unsigned i=0;i<LEN;i++)
|
||||
{
|
||||
A[i] = -1*i;
|
||||
B[i] = 0;
|
||||
}
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&Ad, SIZE));
|
||||
|
||||
HIP_ASSERT(hipMemcpyToSymbol(HIP_SYMBOL(Value), A, SIZE, 0, hipMemcpyHostToDevice));
|
||||
hipLaunchKernel(Get, dim3(1,1,1), dim3(LEN,1,1), 0, 0, Ad);
|
||||
HIP_ASSERT(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost));
|
||||
|
||||
for(unsigned i=0;i<LEN;i++)
|
||||
{
|
||||
assert(A[i] == B[i]);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
// Test under-development. Call hipStreamAddCallback function and see if it works as expected.
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
#define HIPRT_CB
|
||||
|
||||
class CallbackClass
|
||||
{
|
||||
public:
|
||||
static void HIPRT_CB Callback(hipStream_t stream, hipError_t status, void *userData);
|
||||
|
||||
private:
|
||||
void callbackFunc(hipError_t status);
|
||||
};
|
||||
|
||||
void HIPRT_CB CallbackClass::Callback(hipStream_t stream, hipError_t status, void *userData)
|
||||
{
|
||||
CallbackClass* obj = (CallbackClass*) userData;
|
||||
obj->callbackFunc(status);
|
||||
}
|
||||
|
||||
void CallbackClass::callbackFunc(hipError_t status)
|
||||
{
|
||||
HIPASSERT(status==hipSuccess);
|
||||
}
|
||||
|
||||
int main(){
|
||||
hipStream_t mystream;
|
||||
HIPCHECK(hipStreamCreate(&mystream));
|
||||
CallbackClass* obj = new CallbackClass;
|
||||
HIPCHECK(hipStreamAddCallback(mystream, CallbackClass::Callback, obj, 0));
|
||||
HIPCHECK(hipStreamAddCallback(NULL, CallbackClass::Callback, obj, 0));
|
||||
|
||||
passed();
|
||||
}
|
||||
Ссылка в новой задаче
Block a user