From 1cead6a4cd478ab74bdf42a5e2b3e58dfbf066ef Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Fri, 31 Mar 2017 12:11:34 -0500 Subject: [PATCH] added new api hipHccModuleLaunchKernel 1. hipHccModuleLaunchKernel is same as hipModuleLaunchKernel with OpenCL workitem model 2. Added copy right 3. Fixed header naming Change-Id: I6a7c35a3566e2f8d3f5056613e34193775d4b236 --- hipamd/include/hip/channel_descriptor.h | 7 +- hipamd/include/hip/device_functions.h | 9 +- hipamd/include/hip/driver_types.h | 13 +- .../hip/hcc_detail/channel_descriptor.h | 6 +- hipamd/include/hip/hcc_detail/hcc_acc.h | 13 + hipamd/include/hip/hcc_detail/hip_hcc.h | 62 ++ hipamd/include/hip/hip_common.h | 7 +- hipamd/include/hip/hip_complex.h | 7 +- hipamd/include/hip/hip_fp16.h | 7 +- hipamd/include/hip/{hcc.h => hip_hcc.h} | 8 +- hipamd/include/hip/hip_profile.h | 7 +- hipamd/include/hip/hip_runtime.h | 7 +- hipamd/include/hip/hip_runtime_api.h | 5 +- hipamd/include/hip/hip_texture.h | 8 +- hipamd/include/hip/hip_vector_types.h | 7 +- hipamd/include/hip/math_functions.h | 7 +- hipamd/include/hip/texture_types.h | 10 +- .../samples/0_Intro/module_api/runKernel.cpp | 18 +- .../0_Intro/module_api/vcpy_kernel.cpp | 3 +- hipamd/src/env.cpp | 24 +- hipamd/src/hip_context.cpp | 4 +- hipamd/src/hip_device.cpp | 4 +- hipamd/src/hip_error.cpp | 4 +- hipamd/src/hip_event.cpp | 4 +- hipamd/src/hip_hcc.cpp | 8 +- hipamd/src/hip_hcc_internal.h | 885 ++++++++++++++++++ hipamd/src/hip_memory.cpp | 12 +- hipamd/src/hip_module.cpp | 65 +- hipamd/src/hip_peer.cpp | 4 +- hipamd/src/hip_stream.cpp | 4 +- 30 files changed, 1138 insertions(+), 91 deletions(-) create mode 100644 hipamd/include/hip/hcc_detail/hip_hcc.h rename hipamd/include/hip/{hcc.h => hip_hcc.h} (85%) create mode 100644 hipamd/src/hip_hcc_internal.h diff --git a/hipamd/include/hip/channel_descriptor.h b/hipamd/include/hip/channel_descriptor.h index af8875e256..b8e750b079 100644 --- a/hipamd/include/hip/channel_descriptor.h +++ b/hipamd/include/hip/channel_descriptor.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#ifndef HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H // Some standard header files, these are included by hc.hpp and so want to make them avail on both // paths to provide a consistent include env and avoid "missing symbol" errors that only appears @@ -34,3 +35,5 @@ THE SOFTWARE. #else #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); #endif + +#endif diff --git a/hipamd/include/hip/device_functions.h b/hipamd/include/hip/device_functions.h index 24211b7d2d..aae6775d48 100644 --- a/hipamd/include/hip/device_functions.h +++ b/hipamd/include/hip/device_functions.h @@ -1,13 +1,16 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -17,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef HIP_DEVICE_FUNCTIONS_H -#define HIP_DEVICE_FUNCTIONS_H +#ifndef HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H #include diff --git a/hipamd/include/hip/driver_types.h b/hipamd/include/hip/driver_types.h index a4010d6b4e..5d06457dd5 100644 --- a/hipamd/include/hip/driver_types.h +++ b/hipamd/include/hip/driver_types.h @@ -1,22 +1,27 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#ifndef HIP_INCLUDE_HIP_DRIVER_TYPES_H +#define HIP_INCLUDE_HIP_DRIVER_TYPES_H #include @@ -27,3 +32,5 @@ THE SOFTWARE. #else #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); #endif + +#endif diff --git a/hipamd/include/hip/hcc_detail/channel_descriptor.h b/hipamd/include/hip/hcc_detail/channel_descriptor.h index 85689438e2..4be023f6ca 100644 --- a/hipamd/include/hip/hcc_detail/channel_descriptor.h +++ b/hipamd/include/hip/hcc_detail/channel_descriptor.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H -#define HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H #include #include diff --git a/hipamd/include/hip/hcc_detail/hcc_acc.h b/hipamd/include/hip/hcc_detail/hcc_acc.h index c36acc52f5..962f795d6f 100644 --- a/hipamd/include/hip/hcc_detail/hcc_acc.h +++ b/hipamd/include/hip/hcc_detail/hcc_acc.h @@ -40,6 +40,19 @@ hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc); * @return #hipSuccess */ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av); + + +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, + uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, + uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, + uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, + size_t sharedMemBytes, + hipStream_t hStream, + void **kernelParams, + void **extra); #endif #endif diff --git a/hipamd/include/hip/hcc_detail/hip_hcc.h b/hipamd/include/hip/hcc_detail/hip_hcc.h new file mode 100644 index 0000000000..645e980376 --- /dev/null +++ b/hipamd/include/hip/hcc_detail/hip_hcc.h @@ -0,0 +1,62 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_HCC_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_HCC_H + +#include "hip/hip_runtime_api.h" + +#if __cplusplus +#ifdef __HCC__ +#include +/** + * @brief Return hc::accelerator associated with the specified deviceId + * @return #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc); + +/** + * @brief Return hc::accelerator_view associated with the specified stream + * + * If stream is 0, the accelerator_view for the default stream is returned. + * @return #hipSuccess + */ +hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av); + + +#endif // #ifdef __HCC__ + +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, + uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, + uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, + uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, + size_t sharedMemBytes, + hipStream_t hStream, + void **kernelParams, + void **extra); + +#endif // #if __cplusplus + +#endif // diff --git a/hipamd/include/hip/hip_common.h b/hipamd/include/hip/hip_common.h index 6317a792ee..da8ec4a55d 100644 --- a/hipamd/include/hip/hip_common.h +++ b/hipamd/include/hip/hip_common.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#ifndef HIP_INCLUDE_HIP_HIP_COMMON_H +#define HIP_INCLUDE_HIP_HIP_COMMON_H // Common code included at start of every hip file. // Auto enable __HIP_PLATFORM_HCC__ if compiling with HCC @@ -73,3 +74,5 @@ THE SOFTWARE. #define __HIP_ARCH_HAS_3DGRID__ (0) #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) #endif + +#endif diff --git a/hipamd/include/hip/hip_complex.h b/hipamd/include/hip/hip_complex.h index ea15137894..dc691be480 100644 --- a/hipamd/include/hip/hip_complex.h +++ b/hipamd/include/hip/hip_complex.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#ifndef HIP_INCLUDE_HIP_HIP_COMPLEX_H +#define HIP_INCLUDE_HIP_HIP_COMPLEX_H #include @@ -31,3 +32,5 @@ THE SOFTWARE. #else #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); #endif + +#endif diff --git a/hipamd/include/hip/hip_fp16.h b/hipamd/include/hip/hip_fp16.h index 2f64c1a143..0e002d9396 100644 --- a/hipamd/include/hip/hip_fp16.h +++ b/hipamd/include/hip/hip_fp16.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#ifdef HIP_INCLUDE_HIP_HIP_FP16_H +#define HIP_INCLUDE_HIP_HIP_FP16_H #include @@ -31,3 +32,5 @@ THE SOFTWARE. #else #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); #endif + +#endif diff --git a/hipamd/include/hip/hcc.h b/hipamd/include/hip/hip_hcc.h similarity index 85% rename from hipamd/include/hip/hcc.h rename to hipamd/include/hip/hip_hcc.h index 9b8a649412..3407a311bd 100644 --- a/hipamd/include/hip/hcc.h +++ b/hipamd/include/hip/hip_hcc.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,11 +20,11 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef HIP_HCC_H -#define HIP_HCC_H +#ifndef HIP_INCLUDE_HIP_HIP_HCC_H +#define HIP_INCLUDE_HIP_HIP_HCC_H #if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__) -#include "hip/hcc_detail/hcc_acc.h" +#include "hip/hcc_detail/hip_hcc.h" #endif #endif diff --git a/hipamd/include/hip/hip_profile.h b/hipamd/include/hip/hip_profile.h index e621ae8c79..389f334c74 100644 --- a/hipamd/include/hip/hip_profile.h +++ b/hipamd/include/hip/hip_profile.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#ifndef HIP_INCLUDE_HIP_HIP_PROFILE_H +#define HIP_INCLUDE_HIP_HIP_PROFILE_H #if not defined (ENABLE_HIP_PROFILE) #define ENABLE_HIP_PROFILE 1 @@ -36,3 +37,5 @@ THE SOFTWARE. #define HIP_BEGIN_MARKER(markerName, group) #define HIP_END_MARKER() #endif + +#endif diff --git a/hipamd/include/hip/hip_runtime.h b/hipamd/include/hip/hip_runtime.h index 9bc45f300d..fba4d46d8f 100644 --- a/hipamd/include/hip/hip_runtime.h +++ b/hipamd/include/hip/hip_runtime.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -33,7 +33,8 @@ THE SOFTWARE. //! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch macros. //! hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ features. -#pragma once +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_H // Some standard header files, these are included by hc.hpp and so want to make them avail on both // paths to provide a consistent include env and avoid "missing symbol" errors that only appears @@ -61,3 +62,5 @@ THE SOFTWARE. #include #include + +#endif diff --git a/hipamd/include/hip/hip_runtime_api.h b/hipamd/include/hip/hip_runtime_api.h index 818c0b7c34..5715be0599 100644 --- a/hipamd/include/hip/hip_runtime_api.h +++ b/hipamd/include/hip/hip_runtime_api.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,8 @@ THE SOFTWARE. * This file can be compiled with a standard compiler. */ -#pragma once +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H #include // for getDeviceProp diff --git a/hipamd/include/hip/hip_texture.h b/hipamd/include/hip/hip_texture.h index 66ec4a6ca1..a15c5a1016 100644 --- a/hipamd/include/hip/hip_texture.h +++ b/hipamd/include/hip/hip_texture.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,10 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - - -#ifndef HIP_HIP_TEXTURE_H -#define HIP_HIP_TEXTURE_H +#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_H +#define HIP_INCLUDE_HIP_HIP_TEXTURE_H #if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__) #include diff --git a/hipamd/include/hip/hip_vector_types.h b/hipamd/include/hip/hip_vector_types.h index 33827e4d96..1d3d6b92f6 100644 --- a/hipamd/include/hip/hip_vector_types.h +++ b/hipamd/include/hip/hip_vector_types.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,7 +22,8 @@ THE SOFTWARE. //! hip_vector_types.h : Defines the HIP vector types. -#pragma once +#ifndef HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H +#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H #include @@ -36,3 +37,5 @@ THE SOFTWARE. #else #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); #endif + +#endif diff --git a/hipamd/include/hip/math_functions.h b/hipamd/include/hip/math_functions.h index ebcdc26749..6f47b5e0e2 100644 --- a/hipamd/include/hip/math_functions.h +++ b/hipamd/include/hip/math_functions.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#ifndef HIP_INCLUDE_HIP_MATH_FUNCTIONS_H +#define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H // Some standard header files, these are included by hc.hpp and so want to make them avail on both // paths to provide a consistent include env and avoid "missing symbol" errors that only appears @@ -34,3 +35,5 @@ THE SOFTWARE. #else #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); #endif + +#endif diff --git a/hipamd/include/hip/texture_types.h b/hipamd/include/hip/texture_types.h index 2561e12eb5..ca6101cf79 100644 --- a/hipamd/include/hip/texture_types.h +++ b/hipamd/include/hip/texture_types.h @@ -1,13 +1,16 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -17,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#ifndef HIP_INCLUDE_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_TEXTURE_TYPES_H #include @@ -28,3 +32,5 @@ THE SOFTWARE. #else #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); #endif + +#endif diff --git a/hipamd/samples/0_Intro/module_api/runKernel.cpp b/hipamd/samples/0_Intro/module_api/runKernel.cpp index 201892a4a1..e7d54beb54 100644 --- a/hipamd/samples/0_Intro/module_api/runKernel.cpp +++ b/hipamd/samples/0_Intro/module_api/runKernel.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,9 +22,10 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "hip/hip_runtime_api.h" -#include -#include -#include +#include +#include +#include +#include #define LEN 64 #define SIZE LEN<<2 @@ -43,10 +44,10 @@ int main(){ B[i] = 0.0f; } - hipInit(0); - hipDevice_t device; - hipCtx_t context; - hipDeviceGet(&device, 0); + hipInit(0); + hipDevice_t device; + hipCtx_t context; + hipDeviceGet(&device, 0); hipCtxCreate(&context, 0, device); hipMalloc((void**)&Ad, SIZE); @@ -101,6 +102,7 @@ int main(){ hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config); hipMemcpyDtoH(B, Bd, SIZE); + int mismatchCount = 0; for(uint32_t i=0;i #include "hip/hip_runtime.h" -#include "hip_hcc.h" +#include "hip_hcc_internal.h" #include "trace_helper.h" // Stack of contexts diff --git a/hipamd/src/hip_device.cpp b/hipamd/src/hip_device.cpp index d3c68e6fdf..88d94411e8 100644 --- a/hipamd/src/hip_device.cpp +++ b/hipamd/src/hip_device.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include "hip/hip_runtime.h" -#include "hip_hcc.h" +#include "hip_hcc_internal.h" #include "trace_helper.h" #include "device_util.h" diff --git a/hipamd/src/hip_error.cpp b/hipamd/src/hip_error.cpp index 4c14ba4156..21d5b6aa85 100644 --- a/hipamd/src/hip_error.cpp +++ b/hipamd/src/hip_error.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include "hip/hip_runtime.h" -#include "hip_hcc.h" +#include "hip_hcc_internal.h" #include "trace_helper.h" //------------------------------------------------------------------------------------------------- diff --git a/hipamd/src/hip_event.cpp b/hipamd/src/hip_event.cpp index 5a0ed9d8f8..d44f201db5 100644 --- a/hipamd/src/hip_event.cpp +++ b/hipamd/src/hip_event.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include "hip/hip_runtime.h" -#include "hip_hcc.h" +#include "hip_hcc_internal.h" #include "trace_helper.h" //------------------------------------------------------------------------------------------------- diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index e422a6d4db..374840f91f 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -43,7 +43,7 @@ THE SOFTWARE. #include "hsa/hsa_ext_amd.h" #include "hip/hip_runtime.h" -#include "hip_hcc.h" +#include "hip_hcc_internal.h" #include "trace_helper.h" #include "env.h" @@ -762,7 +762,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, &(prop->name)); char archName[256]; err = hsa_agent_get_info(_hsaAgent, HSA_AGENT_INFO_NAME, &archName); - + if(strcmp(archName,"gfx701")==0){ prop->gcnArch = 701; } @@ -1805,7 +1805,7 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo) { tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n", - tag, ptr, + tag, ptr, ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes, ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged); } diff --git a/hipamd/src/hip_hcc_internal.h b/hipamd/src/hip_hcc_internal.h new file mode 100644 index 0000000000..245f154305 --- /dev/null +++ b/hipamd/src/hip_hcc_internal.h @@ -0,0 +1,885 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_HCC_H +#define HIP_HCC_H + +#include +#include +#include "hsa/hsa_ext_amd.h" + +#include "hip/hip_runtime.h" +#include "hip_util.h" +#include "env.h" + + +#if defined(__HCC__) && (__hcc_workweek__ < 16354) +#error("This version of HIP requires a newer version of HCC."); +#endif + +#define USE_IPC 1 + +//--- +// Environment variables: + +// Intended to distinguish whether an environment variable should be visible only in debug mode, or in debug+release. +//static const int debug = 0; +extern const int release; + +// TODO - this blocks both kernels and memory ops. Perhaps should have separate env var for kernels? +extern int HIP_LAUNCH_BLOCKING; +extern int HIP_API_BLOCKING; + +extern int HIP_PRINT_ENV; +extern int HIP_PROFILE_API; +//extern int HIP_TRACE_API; +extern int HIP_ATP; +extern int HIP_DB; +extern int HIP_STAGING_SIZE; /* size of staging buffers, in KB */ +extern int HIP_STREAM_SIGNALS; /* number of signals to allocate at stream creation */ +extern int HIP_VISIBLE_DEVICES; /* Contains a comma-separated sequence of GPU identifiers */ +extern int HIP_FORCE_P2P_HOST; + +extern int HIP_COHERENT_HOST_ALLOC; + + +//--- +// Chicken bits for disabling functionality to work around potential issues: +extern int HIP_SYNC_HOST_ALLOC; + +// TODO - remove when this is standard behavior. +extern int HCC_OPT_FLUSH; + + +// Class to assign a short TID to each new thread, for HIP debugging purposes. +class TidInfo { +public: + + TidInfo() ; + + int tid() const { return _shortTid; }; + uint64_t incApiSeqNum() { return ++_apiSeqNum; }; + uint64_t apiSeqNum() const { return _apiSeqNum; }; + +private: + int _shortTid; + + // monotonically increasing API sequence number for this threa. + uint64_t _apiSeqNum; +}; + +struct ProfTrigger { + + static const uint64_t MAX_TRIGGER = std::numeric_limits::max(); + + void print (int tid) { + std::cout << "Enabling tracing for "; + for (auto iter=_profTrigger.begin(); iter != _profTrigger.end(); iter++) { + std::cout << "tid:" << tid << "." << *iter << ","; + } + std::cout << "\n"; + }; + + uint64_t nextTrigger() { return _profTrigger.empty() ? MAX_TRIGGER : _profTrigger.back(); }; + void add(uint64_t trigger) { _profTrigger.push_back(trigger); }; + void sort() { std::sort (_profTrigger.begin(), _profTrigger.end(), std::greater()); }; +private: + std::vector _profTrigger; +}; + + + +//--- +//Extern tls +extern thread_local hipError_t tls_lastHipError; +extern thread_local TidInfo tls_tidInfo; + +extern std::vector g_dbStartTriggers; +extern std::vector g_dbStopTriggers; + +//--- +//Forward defs: +class ihipStream_t; +class ihipDevice_t; +class ihipCtx_t; + +// Color defs for debug messages: +#define KNRM "\x1B[0m" +#define KRED "\x1B[31m" +#define KGRN "\x1B[32m" +#define KYEL "\x1B[33m" +#define KBLU "\x1B[34m" +#define KMAG "\x1B[35m" +#define KCYN "\x1B[36m" +#define KWHT "\x1B[37m" + +extern const char *API_COLOR; +extern const char *API_COLOR_END; + + +// If set, thread-safety is enforced on all stream functions. +// Stream functions will acquire a mutex before entering critical sections. +#define STREAM_THREAD_SAFE 1 + + +#define CTX_THREAD_SAFE 1 + +#define DEVICE_THREAD_SAFE 1 + + +// Compile debug trace mode - this prints debug messages to stderr when env var HIP_DB is set. +// May be set to 0 to remove debug if checks - possible code size and performance difference? +#define COMPILE_HIP_DB 1 + + +// Compile HIP tracing capability. +// 0x1 = print a string at function entry with arguments. +// 0x2 = prints a simple message with function name + return code when function exits. +// 0x3 = print both. +// Must be enabled at runtime with HIP_TRACE_API +#define COMPILE_HIP_TRACE_API 0x3 + + +// Compile code that generates trace markers for CodeXL ATP at HIP function begin/end. +// ATP is standard CodeXL format that includes timestamps for kernels, HSA RT APIs, and HIP APIs. +#ifndef COMPILE_HIP_ATP_MARKER +#define COMPILE_HIP_ATP_MARKER 0 +#endif + + + + +// Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function boundary. +// TODO - currently we print the trace message at the beginning. if we waited, we could also tls_tidInfo return codes, and any values returned +// through ptr-to-args (ie the pointers allocated by hipMalloc). +#if COMPILE_HIP_ATP_MARKER +#include "CXLActivityLogger.h" +#define MARKER_BEGIN(markerName,group) amdtBeginMarker(markerName, group, nullptr); +#define MARKER_END() amdtEndMarker(); +#define RESUME_PROFILING amdtResumeProfiling(AMDT_ALL_PROFILING); +#define STOP_PROFILING amdtStopProfiling(AMDT_ALL_PROFILING); +#else +// Swallow scoped markers: +#define MARKER_BEGIN(markerName,group) +#define MARKER_END() +#define RESUME_PROFILING +#define STOP_PROFILING +#endif + + +//--- +//HIP Trace modes +#define TRACE_ALL 0 // 0x1 +#define TRACE_CMD 1 // 0x2 +#define TRACE_MEM 2 // 0x4 + + +//--- +//HIP_DB Debug flags: +#define DB_API 0 /* 0x01 - shortcut to enable HIP_TRACE_API on single switch */ +#define DB_SYNC 1 /* 0x02 - trace synchronization pieces */ +#define DB_MEM 2 /* 0x04 - trace memory allocation / deallocation */ +#define DB_COPY 3 /* 0x08 - trace memory copy and peer commands. . */ +#define DB_MAX_FLAG 4 +// When adding a new debug flag, also add to the char name table below. +// +// + +struct DbName { + const char *_color; + const char *_shortName; +}; + +// This table must be kept in-sync with the defines above. +static const DbName dbName [] = +{ + {KGRN, "api"}, // not used, + {KYEL, "sync"}, + {KCYN, "mem"}, + {KMAG, "copy"}, +}; + + + +#if COMPILE_HIP_DB +#define tprintf(trace_level, ...) {\ + if (HIP_DB & (1<<(trace_level))) {\ + char msgStr[1000];\ + snprintf(msgStr, 2000, __VA_ARGS__);\ + fprintf (stderr, " %ship-%s tid:%d:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tls_tidInfo.tid(), msgStr, KNRM); \ + }\ +} +#else +/* Compile to empty code */ +#define tprintf(trace_level, ...) +#endif + + + + +//--- +extern void recordApiTrace(std::string *fullStr, const std::string &apiStr); + +#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1) +#define API_TRACE(forceTrace, ...)\ +{\ + tls_tidInfo.incApiSeqNum();\ + if (forceTrace || (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1<>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\ + }\ + if (HIP_PROFILE_API) { MARKER_END(); }\ + localHipStatus;\ + }) + + + + + + +class ihipException : public std::exception +{ +public: + ihipException(hipError_t e) : _code(e) {}; + + hipError_t _code; +}; + + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef __cplusplus +} +#endif + +const hipStream_t hipStreamNull = 0x0; + + +/** + * HIP IPC Handle Size + */ +#define HIP_IPC_RESERVED_SIZE 24 +class ihipIpcMemHandle_t +{ +public: +#if USE_IPC + hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr +#endif + size_t psize; + char reserved[HIP_IPC_RESERVED_SIZE]; +}; + + +class ihipModule_t { +public: + hsa_executable_t executable; + hsa_code_object_t object; + std::string fileName; + void *ptr; + size_t size; + std::list funcTrack; + ihipModule_t() : executable(), object(), fileName(), ptr(nullptr), size(0) {} +}; + + +//--- +// Used to remove lock, for performance or stimulating bugs. +class FakeMutex +{ + public: + void lock() { } + bool try_lock() {return true; } + void unlock() { } +}; + + +#if STREAM_THREAD_SAFE +typedef std::mutex StreamMutex; +#else +#warning "Stream thread-safe disabled" +typedef FakeMutex StreamMutex; +#endif + +// Pair Device and Ctx together, these could also be toggled separately if desired. +#if CTX_THREAD_SAFE +typedef std::mutex CtxMutex; +#else +typedef FakeMutex CtxMutex; +#warning "Ctx thread-safe disabled" +#endif + +#if DEVICE_THREAD_SAFE +typedef std::mutex DeviceMutex; +#else +typedef FakeMutex DeviceMutex; +#warning "Device thread-safe disabled" +#endif + +// +//--- +// Protects access to the member _data with a lock acquired on contruction/destruction. +// T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock) +template +class LockedAccessor +{ +public: + LockedAccessor(T &criticalData, bool autoUnlock=true) : + _criticalData(&criticalData), + _autoUnlock(autoUnlock) + + { + tprintf(DB_SYNC, "locking criticalData=%p for %s..\n", _criticalData, ToString(_criticalData->_parent).c_str()); + _criticalData->_mutex.lock(); + }; + + ~LockedAccessor() + { + if (_autoUnlock) { + tprintf(DB_SYNC, "auto-unlocking criticalData=%p for %s...\n", _criticalData, ToString(_criticalData->_parent).c_str()); + _criticalData->_mutex.unlock(); + } + } + + void unlock() + { + tprintf(DB_SYNC, "unlocking criticalData=%p for %s...\n", _criticalData, ToString(_criticalData->_parent).c_str()); + _criticalData->_mutex.unlock(); + } + + // Syntactic sugar so -> can be used to get the underlying type. + T *operator->() { return _criticalData; }; + +private: + T *_criticalData; + bool _autoUnlock; +}; + + +template +struct LockedBase { + + // Experts-only interface for explicit locking. + // Most uses should use the lock-accessor. + void lock() { _mutex.lock(); } + void unlock() { _mutex.unlock(); } + bool try_lock() { return _mutex.try_lock(); } + + MUTEX_TYPE _mutex; +}; + + +template +class ihipStreamCriticalBase_t : public LockedBase +{ +public: + ihipStreamCriticalBase_t(ihipStream_t *parentStream, hc::accelerator_view av) : + _kernelCnt(0), + _av(av), + _hasQueue(true), + _parent(parentStream) + { + }; + + ~ihipStreamCriticalBase_t() { + } + + ihipStreamCriticalBase_t * mlock() { LockedBase::lock(); return this;}; + + void munlock() { + tprintf(DB_SYNC, "munlocking criticalData=%p for %s...\n", this, ToString(this->_parent).c_str()); + LockedBase::unlock(); + }; + + ihipStreamCriticalBase_t * mtry_lock() { + bool gotLock = LockedBase::try_lock() ; + tprintf(DB_SYNC, "mtry_locking=%d criticalData=%p for %s...\n", gotLock, this, ToString(this->_parent).c_str()); + return gotLock ? this: nullptr; + }; + +public: + ihipStream_t * _parent; + uint32_t _kernelCnt; // Count of inflight kernels in this stream. Reset at ::wait(). + + hc::accelerator_view _av; + + // True if the stream has an allocated queue (accelerato_view) for its use: + // Always true at ihipStream creation but queue may later be stolen. + // This acts as a valid bit for the _av. + bool _hasQueue; +private: +}; + + +// if HIP code needs to acquire locks for both ihipCtx_t and ihipStream_t, it should first acquire the lock +// for the ihipCtx_t and then for the individual streams. The locks should not be acquired in reverse order +// or deadlock may occur. In some cases, it may be possible to reduce the range where the locks must be held. +// HIP routines should avoid acquiring and releasing the same lock during the execution of a single HIP API. +// Another option is to use try_lock in the innermost lock query. + + +typedef ihipStreamCriticalBase_t ihipStreamCritical_t; +typedef LockedAccessor LockedAccessor_StreamCrit_t; + +//--- +// Internal stream structure. +class ihipStream_t { +public: + enum ScheduleMode {Auto, Spin, Yield}; + typedef uint64_t SeqNum_t ; + + // TODOD -make av a reference to avoid shared_ptr overhead? + ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags); + ~ihipStream_t(); + + // kind is hipMemcpyKind + void locked_copySync (void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn = true); + void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind); + + void lockedSymbolCopySync(hc::accelerator &acc, void *dst, void* src, size_t sizeBytes, size_t offset, unsigned kind); + void lockedSymbolCopyAsync(hc::accelerator &acc, void *dst, void* src, size_t sizeBytes, size_t offset, unsigned kind); + + //--- + // Member functions that begin with locked_ are thread-safe accessors - these acquire / release the critical mutex. + LockedAccessor_StreamCrit_t lockopen_preKernelCommand(); + void lockclose_postKernelCommand(const char *kernelName, hc::accelerator_view *av); + + + void locked_wait(); + + hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); }; + + void locked_waitEvent(hipEvent_t event); + void locked_recordEvent(hipEvent_t event); + + + //--- + + // Use this if we already have the stream critical data mutex: + void wait(LockedAccessor_StreamCrit_t &crit); + + void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t groupSegmentSize, uint32_t sharedMemBytes, + void *kernarg, size_t kernSize, uint64_t kernel); + + + + //-- Non-racy accessors: + // These functions access fields set at initialization time and are non-racy (so do not acquire mutex) + const ihipDevice_t * getDevice() const; + ihipCtx_t * getCtx() const; + + void ensureHaveQueue(LockedAccessor_StreamCrit_t &streamCrit); + +public: + //--- + //Public member vars - these are set at initialization and never change: + SeqNum_t _id; // monotonic sequence ID + unsigned _flags; + + +private: + + + // The unsigned return is hipMemcpyKind + unsigned resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem); + void resolveHcMemcpyDirection(unsigned hipMemKind, + const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo, + hc::hcCommandKind *hcCopyDir, + ihipCtx_t **copyDevice, + bool *forceUnpinnedCopy); + + bool canSeeMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo); + + void addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes); + +public: // TODO - move private + // Critical Data - MUST be accessed through LockedAccessor_StreamCrit_t + ihipStreamCritical_t _criticalData; + +private: // Data + + std::mutex _hasQueueLock; + + ihipCtx_t *_ctx; // parent context that owns this stream. + + // Friends: + friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s); + friend hipError_t hipStreamQuery(hipStream_t); + + ScheduleMode _scheduleMode; +}; + + + +//---- +// Internal event structure: +enum hipEventStatus_t { + hipEventStatusUnitialized = 0, // event is unutilized, must be "Created" before use. + hipEventStatusCreated = 1, + hipEventStatusRecording = 2, // event has been enqueued to record something. + hipEventStatusRecorded = 3, // event has been recorded - timestamps are valid. +} ; + + +// internal hip event structure. +struct ihipEvent_t { + hipEventStatus_t _state; + + hipStream_t _stream; // Stream where the event is recorded, or NULL if all streams. + unsigned _flags; + + hc::completion_future _marker; + uint64_t _timestamp; // store timestamp, may be set on host or by marker. +} ; + + + +//============================================================================= +//class ihipDeviceCriticalBase_t +template +class ihipDeviceCriticalBase_t : LockedBase +{ +public: + ihipDeviceCriticalBase_t(ihipDevice_t *parentDevice) : + _parent(parentDevice) + { + }; + + ~ihipDeviceCriticalBase_t() { + + } + + // Contexts: + void addContext(ihipCtx_t *ctx); + void removeContext(ihipCtx_t *ctx); + std::list &ctxs() { return _ctxs; }; + const std::list &const_ctxs() const { return _ctxs; }; + int getcount() {return _ctxCount;}; + friend class LockedAccessor; +private: + ihipDevice_t *_parent; + + //--- Context Tracker: + std::list< ihipCtx_t* > _ctxs; // contexts associated with this device across all threads. + + int _ctxCount; +}; + +typedef ihipDeviceCriticalBase_t ihipDeviceCritical_t; + +typedef LockedAccessor LockedAccessor_DeviceCrit_t; + +//---- +// Properties of the HIP device. +// Multiple contexts can point to same device. +class ihipDevice_t +{ +public: + ihipDevice_t(unsigned deviceId, unsigned deviceCnt, hc::accelerator &acc); + ~ihipDevice_t(); + + // Accessors: + ihipCtx_t *getPrimaryCtx() const { return _primaryCtx; }; + void locked_removeContext(ihipCtx_t *c); + void locked_reset(); + ihipDeviceCritical_t &criticalData() { return _criticalData; }; +public: + unsigned _deviceId; // device ID + + hc::accelerator _acc; + hsa_agent_t _hsaAgent; // hsa agent handle + + //! Number of compute units supported by the device: + unsigned _computeUnits; + hipDeviceProp_t _props; // saved device properties. + + // TODO - report this through device properties, base on HCC API call. + int _isLargeBar; + + ihipCtx_t *_primaryCtx; + + int _state; //1 if device is set otherwise 0 + +private: + hipError_t initProperties(hipDeviceProp_t* prop); +private: + ihipDeviceCritical_t _criticalData; +}; +//============================================================================= + + + +//============================================================================= +//class ihipCtxCriticalBase_t +template +class ihipCtxCriticalBase_t : LockedBase +{ +public: + ihipCtxCriticalBase_t(ihipCtx_t *parentCtx, unsigned deviceCnt) : + _parent(parentCtx), + _peerCnt(0) + { + _peerAgents = new hsa_agent_t[deviceCnt]; + }; + + ~ihipCtxCriticalBase_t() { + if (_peerAgents != nullptr) { + delete _peerAgents; + _peerAgents = nullptr; + } + _peerCnt = 0; + } + + // Streams: + void addStream(ihipStream_t *stream); + std::list &streams() { return _streams; }; + const std::list &const_streams() const { return _streams; }; + + + + // Peer Accessor classes: + bool isPeerWatcher(const ihipCtx_t *peer); // returns True if peer has access to memory physically located on this device. + bool addPeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer); + bool removePeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer); + void resetPeerWatchers(ihipCtx_t *thisDevice); + void printPeerWatchers(FILE *f) const; + + uint32_t peerCnt() const { return _peerCnt; }; + hsa_agent_t *peerAgents() const { return _peerAgents; }; + + + // TODO - move private + std::list _peers; // list of enabled peer devices. + + friend class LockedAccessor; +private: + ihipCtx_t * _parent; + + //--- Stream Tracker: + std::list< ihipStream_t* > _streams; // streams associated with this device. + + + //--- Peer Tracker: + // These reflect the currently Enabled set of peers for this GPU: + // Enabled peers have permissions to access the memory physically allocated on this device. + // Note the peers always contain the self agent for easy interfacing with HSA APIs. + uint32_t _peerCnt; // number of enabled peers + hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.) +private: + void recomputePeerAgents(); +}; +// Note Mutex type Real/Fake selected based on CtxMutex +typedef ihipCtxCriticalBase_t ihipCtxCritical_t; + +// This type is used by functions that need access to the critical device structures. +typedef LockedAccessor LockedAccessor_CtxCrit_t; +//============================================================================= + + +//============================================================================= +//class ihipCtx_t: +// A HIP CTX (context) points at one of the existing devices and contains the streams, +// peer-to-peer mappings, creation flags. Multiple contexts can point to the same +// device. +// +class ihipCtx_t +{ +public: // Functions: + ihipCtx_t(ihipDevice_t *device, unsigned deviceCnt, unsigned flags); // note: calls constructor for _criticalData + ~ihipCtx_t(); + + // Functions which read or write the critical data are named locked_. + // (might be better called "locking_" + // ihipCtx_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function. + // External functions which call several locked_ functions will acquire and release the lock for each function. if this occurs in + // performance-sensitive code we may want to refactor by adding non-locked functions and creating a new locked_ member function to call them all. + void locked_removeStream(ihipStream_t *s); + void locked_reset(); + void locked_waitAllStreams(); + void locked_syncDefaultStream(bool waitOnSelf); + + // Will allocate a queue and assign it to the needyStream: + hc::accelerator_view stealActiveQueue(LockedAccessor_CtxCrit_t &ctxCrit, ihipStream_t *needyStream); + hc::accelerator_view createOrStealQueue(LockedAccessor_CtxCrit_t &ctxCrit); + + ihipCtxCritical_t &criticalData() { return _criticalData; }; + + const ihipDevice_t *getDevice() const { return _device; }; + int getDeviceNum() const { return _device->_deviceId; }; + + // TODO - review uses of getWriteableDevice(), can these be converted to getDevice() + ihipDevice_t *getWriteableDevice() const { return _device; }; + + std::string toString() const; + +public: // Data + // The NULL stream is used if no other stream is specified. + // Default stream has special synchronization properties with other streams. + ihipStream_t *_defaultStream; + + // Flags specified when the context is created: + unsigned _ctxFlags; + +private: + ihipDevice_t *_device; + + +private: // Critical data, protected with locked access: + // Members of _protected data MUST be accessed through the LockedAccessor. + // Search for LockedAccessor for examples; do not access _criticalData directly. + ihipCtxCritical_t _criticalData; + +}; + + + +//================================================================================================= +// Global variable definition: +extern std::once_flag hip_initialized; +extern unsigned g_deviceCnt; +extern hsa_agent_t g_cpu_agent ; // the CPU agent. + +//================================================================================================= +// Extern functions: +extern void ihipInit(); +extern const char *ihipErrorString(hipError_t); +extern ihipCtx_t *ihipGetTlsDefaultCtx(); +extern void ihipSetTlsDefaultCtx(ihipCtx_t *ctx); +extern hipError_t ihipSynchronize(void); +extern void ihipCtxStackUpdate(); +extern hipError_t ihipDeviceSetState(); + +extern ihipDevice_t *ihipGetDevice(int); +ihipCtx_t * ihipGetPrimaryCtx(unsigned deviceIndex); + +extern void ihipSetTs(hipEvent_t e); + + +hipStream_t ihipSyncAndResolveStream(hipStream_t); + +// Stream printf functions: +inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s) +{ + os << "stream:"; + os << s.getDevice()->_deviceId;; + os << '.'; + os << s._id; + return os; +} + +inline std::ostream & operator<<(std::ostream& os, const dim3& s) +{ + os << '{'; + os << s.x; + os << ','; + os << s.y; + os << ','; + os << s.z; + os << '}'; + return os; +} + +inline std::ostream & operator<<(std::ostream& os, const gl_dim3& s) +{ + os << '{'; + os << s.x; + os << ','; + os << s.y; + os << ','; + os << s.z; + os << '}'; + return os; +} + +// Stream printf functions: +inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e) +{ + os << "event:" << std::hex << static_cast (e); + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const ihipCtx_t* c) +{ + os << "ctx:" << static_cast (c) + << ".dev:" << c->getDevice()->_deviceId; + return os; +} + + +// Helper functions that are used across src files: +namespace hip_internal { + hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream); +}; + + +#endif diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 4684287076..805fc9efc0 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -25,7 +25,7 @@ THE SOFTWARE. #include "hsa/hsa_ext_amd.h" #include "hip/hip_runtime.h" -#include "hip_hcc.h" +#include "hip_hcc_internal.h" #include "trace_helper.h" #include "hip/hcc_detail/hip_texture.h" #include @@ -261,11 +261,11 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) auto device = ctx->getWriteableDevice(); unsigned amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned; - *ptr = hip_internal::allocAndSharePtr(HIP_COHERENT_HOST_ALLOC ? "finegrained_host":"pinned_host", + *ptr = hip_internal::allocAndSharePtr(HIP_COHERENT_HOST_ALLOC ? "finegrained_host":"pinned_host", sizeBytes, ctx, amFlags, flags); if(sizeBytes && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; - } + } } } @@ -314,7 +314,7 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height if (sizeBytes && (*ptr == NULL)) { hip_status = hipErrorMemoryAllocation; - } + } } else { hip_status = hipErrorMemoryAllocation; } @@ -372,7 +372,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, *ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, am_flags, 0); if (size && (*ptr == NULL)) { hip_status = hipErrorMemoryAllocation; - } + } } else { hip_status = hipErrorMemoryAllocation; diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index c4b6cb8e08..554e13387a 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,15 +27,13 @@ THE SOFTWARE. #include #include #include -#include "AMDGPUPTNote.h" -#include "AMDGPURuntimeMetadata.h" #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" #include "hsa/amd_hsa_kernel_code.h" #include "hip/hip_runtime.h" -#include "hip_hcc.h" +#include "hip_hcc_internal.h" #include "trace_helper.h" //TODO Use Pool APIs from HCC to get memory regions. @@ -365,16 +363,12 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, } -hipError_t hipModuleLaunchKernel(hipFunction_t f, - uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, - uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, - uint32_t sharedMemBytes, hipStream_t hStream, +hipError_t ihipModuleLaunchKernel(hipFunction_t f, + uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ, + size_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra) { - HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, - blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, - kernelParams, extra); auto ctx = ihipGetTlsDefaultCtx(); hipError_t ret = hipSuccess; @@ -420,7 +414,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, */ grid_launch_parm lp; lp.dynamic_group_mem_bytes = sharedMemBytes; // TODO - this should be part of preLaunchKernel. - hStream = ihipPreLaunchKernel(hStream, dim3(gridDimX, gridDimY, gridDimZ), dim3(blockDimX, blockDimY, blockDimZ), &lp, f->_name.c_str()); + hStream = ihipPreLaunchKernel(hStream, dim3(globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ), dim3(localWorkSizeX, localWorkSizeY, localWorkSizeZ), &lp, f->_name.c_str()); hsa_kernel_dispatch_packet_t aql; @@ -430,12 +424,12 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, //aql.completion_signal._handle = 0; //aql.kernarg_address = 0; - aql.workgroup_size_x = blockDimX; - aql.workgroup_size_y = blockDimY; - aql.workgroup_size_z = blockDimZ; - aql.grid_size_x = blockDimX * gridDimX; - aql.grid_size_y = blockDimY * gridDimY; - aql.grid_size_z = blockDimZ * gridDimZ; + aql.workgroup_size_x = localWorkSizeX; + aql.workgroup_size_y = localWorkSizeY; + aql.workgroup_size_z = localWorkSizeZ; + aql.grid_size_x = globalWorkSizeX; + aql.grid_size_y = globalWorkSizeY; + aql.grid_size_z = globalWorkSizeZ; aql.group_segment_size = f->_groupSegmentSize + sharedMemBytes; aql.private_segment_size = f->_privateSegmentSize; aql.kernel_object = f->_object; @@ -459,9 +453,40 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, ihipPostLaunchKernel(f->_name.c_str(), hStream, lp); } - return ihipLogStatus(ret); + return ret; } +hipError_t hipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra) +{ + HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra); + return ihipLogStatus(ihipModuleLaunchKernel(f, + blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra)); +} + + +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, + uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ, + size_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra) +{ + HIP_INIT_API(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, + sharedMemBytes, hStream, + kernelParams, extra); + return ihipLogStatus(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, + sharedMemBytes, hStream, kernelParams, extra)); +} hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char* name) diff --git a/hipamd/src/hip_peer.cpp b/hipamd/src/hip_peer.cpp index e57665be0c..984110a6b5 100644 --- a/hipamd/src/hip_peer.cpp +++ b/hipamd/src/hip_peer.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,7 @@ THE SOFTWARE. #include #include "hip/hip_runtime.h" -#include "hip_hcc.h" +#include "hip_hcc_internal.h" #include "trace_helper.h" diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp index 594fb6e860..d7f8717725 100644 --- a/hipamd/src/hip_stream.cpp +++ b/hipamd/src/hip_stream.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include "hip/hip_runtime.h" -#include "hip_hcc.h" +#include "hip_hcc_internal.h" #include "trace_helper.h"