From d4a090e7fbeacedb0e042f543102d61363441023 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Thu, 20 Oct 2016 09:57:53 -0500 Subject: [PATCH 1/8] Added support for constant memory 1. Added support for constant memory 2. Added test which uses memcpytosymbol for constant memory 3. Corrected code error on nvcc path Change-Id: I2ab69f516832bf7a037132ac81273ea6f5107401 [ROCm/clr commit: 3a1c8f92596a38bb6d7ebab9dee5cb3864353908] --- .../include/hip/hcc_detail/host_defines.h | 2 +- .../include/hip/nvcc_detail/hip_runtime_api.h | 2 +- .../tests/src/kernel/hipTestConstant.cpp | 59 +++++++++++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 projects/clr/hipamd/tests/src/kernel/hipTestConstant.cpp diff --git a/projects/clr/hipamd/include/hip/hcc_detail/host_defines.h b/projects/clr/hipamd/include/hip/hcc_detail/host_defines.h index cc22b39ea7..906c39421e 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/host_defines.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/host_defines.h @@ -47,7 +47,7 @@ THE SOFTWARE. */ // _restrict is supported by the compiler #define __shared__ tile_static -#define __constant__ __attribute__((address_space(2))) +#define __constant__ __attribute__((address_space(1))) #else // Non-HCC compiler diff --git a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index f7d67e6662..d27b937b60 100644 --- a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -741,7 +741,7 @@ inline static hipError_t hipDeviceGetPCIBusId(int *pciBusId,int len,hipDevice_t inline static hipError_t hipDeviceGetLimit(size_t *pValue, hipLimit_t limit) { - return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit); + return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit)); } inline static hipError_t hipDeviceTotalMem(size_t *bytes,hipDevice_t device) diff --git a/projects/clr/hipamd/tests/src/kernel/hipTestConstant.cpp b/projects/clr/hipamd/tests/src/kernel/hipTestConstant.cpp new file mode 100644 index 0000000000..f86e8ace4f --- /dev/null +++ b/projects/clr/hipamd/tests/src/kernel/hipTestConstant.cpp @@ -0,0 +1,59 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#define HIP_ASSERT(status) \ + assert(status == hipSuccess) + +#define LEN 512 +#define SIZE 2048 + +__constant__ int Value[LEN]; + +__global__ void Get(hipLaunchParm lp, int *Ad) +{ + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + Ad[tid] = Value[tid]; +} + +int main() +{ + int *A, *B, *Ad; + A = new int[LEN]; + B = new int[LEN]; + for(unsigned i=0;i Date: Thu, 20 Oct 2016 09:45:59 -0500 Subject: [PATCH 2/8] Fix P2P for async Also improve HIP debug message: Add more DB_COPY1 messages. memcpyStr, expand HIP_DB bitmask. [ROCm/clr commit: 714968cdc62b47be847458b70b2b1281c2a588de] --- projects/clr/hipamd/src/hip_hcc.cpp | 46 +++++++++++++++++++++++++++-- projects/clr/hipamd/src/hip_hcc.h | 23 ++++++++++----- 2 files changed, 59 insertions(+), 10 deletions(-) diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index fd3292a6f4..7fa25334bc 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -1120,6 +1120,8 @@ void ihipInit() HIP_TRACE_API = 1; } + + READ_ENV_I(release, HIP_TRACE_API, 0, "Trace each HIP API call. Print function name and return code to stderr as program executes."); READ_ENV_S(release, HIP_TRACE_API_COLOR, 0, "Color to use for HIP_API. None/Red/Green/Yellow/Blue/Magenta/Cyan/White"); READ_ENV_I(release, HIP_ATP_MARKER, 0, "Add HIP function begin/end to ATP file generated with CodeXL"); @@ -1143,6 +1145,22 @@ void ihipInit() fprintf (stderr, "warning: env var HIP_ATP_MARKER=0x%x but COMPILE_HIP_ATP_MARKER=0. (perhaps enable COMPILE_HIP_DB in src code before compiling?)", HIP_ATP_MARKER); } + if (HIP_DB) { + fprintf (stderr, "HIP_DB=0x%x [", HIP_DB); + bool first=true; + for (int i=0; igetCtx(); if ((ctx == nullptr) || (ctx->getDevice() == nullptr)) { + tprintf (DB_COPY1, "locked_copyAsync bad ctx or device\n"); throw ihipException(hipErrorInvalidDevice); } if (kind == hipMemcpyHostToHost) { - tprintf (DB_COPY2, "Asyc: H2H with memcpy"); + tprintf (DB_COPY1, "locked_copyAsync: H2H with memcpy"); // TODO - consider if we want to perhaps use the GPU SDMA engines anyway, to avoid the host-side sync here and keep everything flowing on the GPU. /* As this is a CPU op, we need to wait until all @@ -1613,10 +1650,14 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes bool copyEngineCanSeeSrcAndDest = true; - if (kind == hipMemcpyDeviceToDevice) { + if ((kind == hipMemcpyDeviceToDevice) || + ((kind == hipMemcpyDefault) && srcTracked && dstTracked)) { copyEngineCanSeeSrcAndDest = canSeePeerMemory(ctx, ihipGetPrimaryCtx(dstPtrInfo._appId), ihipGetPrimaryCtx(srcPtrInfo._appId)); } + tprintf (DB_COPY1, "locked_copyAsync: async memcpy dstTracked=%d srcTracked=%d copyEngineCanSeeSrcAndDest=%d\n", + dstTracked, srcTracked, copyEngineCanSeeSrcAndDest); + // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. // If both pointers are not tracked, we need to fall back to a sync copy. @@ -1637,6 +1678,7 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes } } else { + // TODO - call copy_ext directly here? locked_copySync(dst, src, sizeBytes, kind); } } diff --git a/projects/clr/hipamd/src/hip_hcc.h b/projects/clr/hipamd/src/hip_hcc.h index 4e05450b19..bb06e23c67 100644 --- a/projects/clr/hipamd/src/hip_hcc.h +++ b/projects/clr/hipamd/src/hip_hcc.h @@ -190,16 +190,23 @@ extern const char *API_COLOR_END; #define DB_COPY1 3 /* 0x08 - trace memory copy commands. . */ #define DB_SIGNAL 4 /* 0x10 - trace signal pool commands */ #define DB_COPY2 5 /* 0x20 - trace memory copy commands. Detailed. */ +#define DB_MAX_BITPOS 5 // When adding a new debug flag, also add to the char name table below. +// -static const char *dbName [] = +struct DbName { + const char *_color; + const char *_shortName; +}; + +static const DbName dbName [] = { - KNRM "hip-api", // not used, - KYEL "hip-sync", - KCYN "hip-mem", - KMAG "hip-copy1", - KRED "hip-signal", - KNRM "hip-copy2", + {KGRN, "api"}, // not used, + {KYEL, "sync"}, + {KCYN, "mem"}, + {KMAG, "copy1"}, + {KRED, "signal"}, + {KNRM, "copy2"}, }; @@ -210,7 +217,7 @@ static const char *dbName [] = char msgStr[1000];\ snprintf(msgStr, 2000, __VA_ARGS__);\ COMPUTE_TID_STR\ - fprintf (stderr, " %s%s:%s%s", dbName[trace_level], tid_ss.str().c_str(), msgStr, KNRM); \ + fprintf (stderr, " %ship-%s%s:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tid_ss.str().c_str(), msgStr, KNRM); \ }\ } #else From 62c000d2fa72b12c40d7fb95dbd86a2da9ea9277 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 20 Oct 2016 18:29:24 +0300 Subject: [PATCH 3/8] [HIPIFY] Add missing options to tool's -help Also HelpMessage with a description of the common command-line options related to the compilation database and input files is added. [ROCm/clr commit: d9b2ed07c3f4b171e6255a3c7df76a8526325229] --- .../clr/hipamd/hipify-clang/src/Cuda2Hip.cpp | 41 ++++++++++++------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp b/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp index d2be3c242d..aaf1a38a81 100644 --- a/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp +++ b/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp @@ -1985,23 +1985,33 @@ void HipifyPPCallbacks::handleEndSource() { // Set up the command line options static cl::OptionCategory ToolTemplateCategory("CUDA to HIP source translator options"); -static cl::opt OutputFilename("o", cl::desc("Output filename"), - cl::value_desc("filename"), cl::cat(ToolTemplateCategory)); +static cl::opt OutputFilename("o", + cl::desc("Output filename"), + cl::value_desc("filename"), + cl::cat(ToolTemplateCategory)); -static cl::opt - Inplace("inplace", - cl::desc("Modify input file inplace, replacing input with hipified " - "output, save backup in .prehip file. "), - cl::value_desc("inplace")); +static cl::opt Inplace("inplace", + cl::desc("Modify input file inplace, replacing input with hipified " + "output, save backup in .prehip file"), + cl::value_desc("inplace"), + cl::cat(ToolTemplateCategory)); -static cl::opt - NoOutput("no-output", - cl::desc("don't write any translated output to stdout"), - cl::value_desc("no-output")); +static cl::opt NoOutput("no-output", + cl::desc("Don't write any translated output to stdout"), + cl::value_desc("no-output"), + cl::cat(ToolTemplateCategory)); -static cl::opt - PrintStats("print-stats", cl::desc("print the command-line, like a header"), - cl::value_desc("print-stats")); +static cl::opt PrintStats("print-stats", + cl::desc("Print translation statisitics"), + cl::value_desc("print-stats"), + cl::cat(ToolTemplateCategory)); + +static cl::opt N("n", + cl::desc("Combines -no-output and -print-stats options"), + cl::value_desc("n"), + cl::cat(ToolTemplateCategory)); + +static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); void addAllMatchers(ast_matchers::MatchFinder &Finder, Cuda2HipCallback *Callback) { Finder.addMatcher(callExpr(isExpansionInMainFile(), @@ -2086,6 +2096,9 @@ int main(int argc, const char **argv) { CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::Required); std::vector fileSources = OptionsParser.getSourcePathList(); std::string dst = OutputFilename; + if (N) { + NoOutput = PrintStats = true; + } if (dst.empty()) { dst = fileSources[0]; if (!Inplace) { From 2a3dd02bbb8a618d0f516d9c614bea47b6e7162d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 20 Oct 2016 18:57:52 +0300 Subject: [PATCH 4/8] [HIPIFY] Fix typo in option's help description [ROCm/clr commit: 4831ac9f7e2eec6505d3f50a21816e2c7266c0d5] --- projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp b/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp index aaf1a38a81..33e38b2c4b 100644 --- a/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp +++ b/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp @@ -2002,7 +2002,7 @@ static cl::opt NoOutput("no-output", cl::cat(ToolTemplateCategory)); static cl::opt PrintStats("print-stats", - cl::desc("Print translation statisitics"), + cl::desc("Print translation statistics"), cl::value_desc("print-stats"), cl::cat(ToolTemplateCategory)); From 32ee434c5efa7a4c448a336e6500d06b8600cd44 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Thu, 20 Oct 2016 14:05:43 -0500 Subject: [PATCH 5/8] changed docs to update support for memcpyToSymbol Change-Id: I63169cb10e64033a92dafd46930f499cdf145a8d [ROCm/clr commit: c39b40db5827b824d27d46255d9a160f782a19c6] --- .../hipamd/docs/markdown/hip_porting_guide.md | 67 +++++++++---------- 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/projects/clr/hipamd/docs/markdown/hip_porting_guide.md b/projects/clr/hipamd/docs/markdown/hip_porting_guide.md index cc964bc62a..621726ee5f 100644 --- a/projects/clr/hipamd/docs/markdown/hip_porting_guide.md +++ b/projects/clr/hipamd/docs/markdown/hip_porting_guide.md @@ -402,61 +402,54 @@ Code should not assume a warp size of 32 or 64. See [Warp Cross-Lane Functions] ## memcpyToSymbol -HIP support for hipMemCpyToSymbol is under-development. This feature allows a kernel +HIP support for hipMemCpyToSymbol is complete. This feature allows a kernel to define a device-side data symbol which can be accessed on the host side. The symbol -can be in __constant or device space. As a workaround, programs can pass the symbol -as an argument to the kernel, and use standard hipMemcpy routines to initialize it. +can be in __constant or device space. For example: Device Code: ``` -#include -#include +#include +#include #include -#ifdef __HIP_PLATFORM_HCC__ -__global__ void Inc(hipLaunchParm lp, float *Ad, float *Out) -#endif -#ifdef __HIP_PLATFORM_NVCC__ -__constant__ float Ad[1024]; -__global__ void Inc(hipLaunchParm lp, float *Out) -#endif +#define HIP_ASSERT(status) \ + assert(status == hipSuccess) + +#define LEN 512 +#define SIZE 2048 + +__constant__ int Value[LEN]; + +__global__ void Get(hipLaunchParm lp, int *Ad) { - int tx = hipThreadIdx_x; - Out[tx] = Ad[tx] + 1.0f; + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + Ad[tid] = Value[tid]; } int main() { - float *A, *Ad; - float *Out, *Outd; - A = new float[1024]; - Out = new float[1024]; - - for(uint32_t i=0;i<1024;i++) + int *A, *B, *Ad; + A = new int[LEN]; + B = new int[LEN]; + for(unsigned i=0;i Date: Sat, 22 Oct 2016 23:59:39 -0500 Subject: [PATCH 6/8] Add workaround for hipStreamAddCallback function: call stream synchronize on host and then add execute the call back function Change-Id: If361f8e053949904b19b9e09245d267f05e29f7b [ROCm/clr commit: 7a0375854a467bf0c6f5ba3b777e9b08de53f6ae] --- .../include/hip/hcc_detail/hip_runtime_api.h | 49 +++++++++++++------ .../clr/hipamd/include/hip/hip_runtime_api.h | 2 +- .../include/hip/nvcc_detail/hip_runtime_api.h | 10 ++++ projects/clr/hipamd/src/hip_stream.cpp | 11 ++++- 4 files changed, 56 insertions(+), 16 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h index 1a40f6bd21..8c575eedc0 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -99,7 +99,7 @@ enum hipLimit_t #define hipDeviceScheduleSpin 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may consume more power. #define hipDeviceScheduleYield 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers power and is friendlier to other threads in the system. #define hipDeviceScheduleBlockingSync 0x4 -#define hipDeviceScheduleMask 0x7 +#define hipDeviceScheduleMask 0x7 #define hipDeviceMapHost 0x8 #define hipDeviceLmemResizeToMax 0x16 @@ -385,7 +385,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ); * * @param [in] flags * - * The schedule flags impact how HIP waits for the completion of a command running on a device. + * The schedule flags impact how HIP waits for the completion of a command running on a device. * hipDeviceScheduleSpin : HIP runtime will actively spin in the thread which submitted the work until the command completes. This offers the lowest latency, but will consume a CPU core and may increase power. * hipDeviceScheduleYield : The HIP runtime will yield the CPU to system so that other tasks can use it. This may increase latency to detect the completion but will consume less power and is friendlier to other tasks in the system. * hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield. @@ -393,7 +393,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ); * * * hipDeviceMapHost : Allow mapping host memory. On ROCM, this is always allowed and the flag is ignored. - * hipDeviceLmemResizeToMax : @warning ROCm silently ignores this flag. + * hipDeviceLmemResizeToMax : @warning ROCm silently ignores this flag. * * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess * @@ -568,14 +568,14 @@ hipError_t hipStreamQuery(hipStream_t stream); * @brief Wait for all commands in stream to complete. * * @param[in] stream stream identifier. - * + * * @return #hipSuccess, #hipErrorInvalidResourceHandle * * If the null stream is specified, this command blocks until all * This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active or blocking. * This command is host-synchronous : the host will block until the stream is empty. * - * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamWaitEvent, hipStreamDestroy + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamWaitEvent, hipStreamDestroy * */ hipError_t hipStreamSynchronize(hipStream_t stream); @@ -594,7 +594,7 @@ hipError_t hipStreamSynchronize(hipStream_t stream); * All future work submitted to @p stream will wait until @p event reports completion before beginning execution. * This function is host-asynchronous and the function may return before the wait has completed. * - * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamSynchronize, hipStreamDestroy + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamSynchronize, hipStreamDestroy * */ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags); @@ -612,10 +612,31 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int * * Return flags associated with this stream in *@p flags. * - * @see hipStreamCreateWithFlags + * @see hipStreamCreateWithFlags */ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags); +/** + * Stream CallBack struct + */ +typedef void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); + +/** + * @brief Adds a callback to be called on the host after all currently enqueued + * items in the stream have completed. For each + * cudaStreamAddCallback call, a callback will be executed exactly once. + * The callback will block later work in the stream until it is finished. + * @param[in] stream - Stream to add callback to + * @param[in] callback - The function to call once preceding stream operations are complete + * @param[in] userData - User specified data to be passed to the callback function + * @param[in] flags - Reserved for future use, must be 0 + * @return #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorNotSupported + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + * + */ +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags); + // end doxygen Stream /** @@ -637,11 +658,11 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags); * * @param[in,out] event Returns the newly created event. * @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault, #hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess - + * #hipEventDefault : Default flag. The event will use active synchronization and will support timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a CPU to poll on the eevent. * #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is called on this event, the thread will block until the event completes. This can increase latency for the synchroniation but can result in lower power and more resources for other CPU threads. * #hipEventDisableTiming : Disable recording of timing information. On ROCM platform, timing information is always recorded and this flag has no performance benefit. - + * @warning On HCC platform, hipEventInterprocess support is under development. Use of this flag will return an error. * * @returns #hipSuccess, #hipErrorInitializationError, #hipErrorInvalidValue, #hipErrorLaunchFailure, #hipErrorMemoryAllocation @@ -1098,7 +1119,7 @@ hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t siz /** - * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area pointed to by @p offset bytes from the start of symbol @p symbol + * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area pointed to by @p offset bytes from the start of symbol @p symbol * * The memory areas may not overlap. Symbol can either be a variable that resides in global or constant memory space, or it can be a character string, * naming a variable that resides in global or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice @@ -1405,9 +1426,9 @@ hipError_t hipCtxGetDevice(hipDevice_t *device); /** * @brief Returns the approximate HIP api version. * - * @param [in] ctx Context to check + * @param [in] ctx Context to check * @param [out] apiVersion - * + * * @return #hipSuccess * * @warning The HIP feature set does not correspond to an exact CUDA SDK api revision. @@ -1435,7 +1456,7 @@ hipError_t hipCtxGetCacheConfig ( hipFuncCache *cacheConfig ); /** * @brief Set L1/Shared cache partition. - * + * * @param [in] cacheConfiguration * * @return #hipSuccess @@ -1581,7 +1602,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device); /** * @brief Returns the approximate HIP driver version. - * + * * @param [out] driverVersion * * @returns #hipSuccess, #hipErrorInavlidValue diff --git a/projects/clr/hipamd/include/hip/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hip_runtime_api.h index 884cb0c649..5a8dd44e61 100644 --- a/projects/clr/hipamd/include/hip/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hip_runtime_api.h @@ -198,7 +198,7 @@ typedef enum hipError_t { hipErrorInvalidDevice = 1010, ///< DeviceID must be in range 0...#compute-devices. hipErrorInvalidValue = 1011, ///< One or more of the parameters passed to the API call is NULL or not in an acceptable range. hipErrorInvalidDevicePointer = 1017, ///< Invalid Device Pointer - hipErrorInvalidMemcpyDirection = 1021, ///< Invalid memory copy direction + hipErrorInvalidMemcpyDirection = 1021, ///< Invalid memory copy direction hipErrorUnknown = 1030, ///< Unknown error. hipErrorInvalidResourceHandle = 1033, ///< Resource handle (hipEvent_t or hipStream_t) invalid. hipErrorNotReady = 1034, ///< Indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion). APIs that return this error include hipEventQuery and hipStreamQuery. diff --git a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index d27b937b60..a632e57f97 100644 --- a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -164,6 +164,11 @@ inline static cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) { } } +/** + * Stream CallBack struct + */ +typedef void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); + inline static hipError_t hipInit(unsigned int flags) { return hipCUResultTohipError(cuInit(flags)); @@ -578,6 +583,11 @@ inline static hipError_t hipStreamQuery(hipStream_t stream) return hipCUDAErrorTohipError(cudaStreamQuery(stream)); } +inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags) +{ + return hipCUDAErrorTohipError(cudaStreamAddCallback(cudaStream_t stream, + cudaStreamCallback_t callback, void *userData, unsigned int flags)); +} inline static hipError_t hipDriverGetVersion(int *driverVersion) { diff --git a/projects/clr/hipamd/src/hip_stream.cpp b/projects/clr/hipamd/src/hip_stream.cpp index 3b1d6af038..8350035357 100644 --- a/projects/clr/hipamd/src/hip_stream.cpp +++ b/projects/clr/hipamd/src/hip_stream.cpp @@ -198,4 +198,13 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) } - +//--- +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags) +{ + HIP_INIT_API(stream, callback, userData, flags); + hipError_t e = hipSuccess; + //--- explicitly synchronize stream to add callback routines + hipStreamSynchronize(stream); + callback(stream, e, userData); + return ihipLogStatus(e); +} From 57fab304c7c620cb31c9defbf3bc411499cfd30b Mon Sep 17 00:00:00 2001 From: pensun Date: Sun, 23 Oct 2016 12:18:09 -0500 Subject: [PATCH 7/8] Add direct test for hipStreamAddCallback workaround Change-Id: I890574eb30a76922888846882986edd0dc345309 [ROCm/clr commit: ad09270d7baca8daea858294ac93235ced962aa2] --- .../stream/hipStreamAddCallback.cpp | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 projects/clr/hipamd/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp diff --git a/projects/clr/hipamd/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp b/projects/clr/hipamd/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp new file mode 100644 index 0000000000..d6169fc806 --- /dev/null +++ b/projects/clr/hipamd/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp @@ -0,0 +1,95 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t + * HIT_END + */ + +// Test under-development. Call hipStreamAddCallback function and see if it works as expected. + +#include "hip/hip_runtime.h" +#include "test_common.h" +#define HIPRT_CB +const int NN = 1 << 21; + +__global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){ + int tid = hipThreadIdx_x; + if(tid < 1){ + for(int i=0;icallbackFunc(status); +} + +void CallbackClass::callbackFunc(hipError_t status) +{ + HIPASSERT(status==hipSuccess); +} + +int main(){ + const int num_streams = 8; + hipStream_t streams[num_streams]; + /* float *data[num_streams], *yd, *xd;*/ + //float y = 1.0f, x = 1.0f; + //HIPCHECK(hipMalloc((void**)&yd, sizeof(float))); + //HIPCHECK(hipMalloc((void**)&xd, sizeof(float))); + //HIPCHECK(hipMemcpy(yd, &y, sizeof(float), hipMemcpyHostToDevice)); + //HIPCHECK(hipMemcpy(xd, &x, sizeof(float), hipMemcpyHostToDevice)); + //for(int i=0;i Date: Sun, 23 Oct 2016 12:36:02 -0500 Subject: [PATCH 8/8] Modify hipStreamAddCallback test case to consider both NULL stream and stream Change-Id: I8468c988f0f01c7d3f3176a01469d69bf7a68237 [ROCm/clr commit: 4a93beb3b067a0b14bf554527aa093925d40bc78] --- .../stream/hipStreamAddCallback.cpp | 45 +++---------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/projects/clr/hipamd/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp b/projects/clr/hipamd/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp index d6169fc806..07a64a194d 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp @@ -28,21 +28,6 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "test_common.h" #define HIPRT_CB -const int NN = 1 << 21; - -__global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){ - int tid = hipThreadIdx_x; - if(tid < 1){ - for(int i=0;i