added new api hipHccModuleLaunchKernel
1. hipHccModuleLaunchKernel is same as hipModuleLaunchKernel with OpenCL workitem model
2. Added copy right
3. Fixed header naming
Change-Id: I6a7c35a3566e2f8d3f5056613e34193775d4b236
[ROCm/clr commit: 1cead6a4cd]
Этот коммит содержится в:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifndef HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H
|
||||
#define HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H
|
||||
|
||||
// Some standard header files, these are included by hc.hpp and so want to make them avail on both
|
||||
// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
|
||||
@@ -34,3 +35,5 @@ THE SOFTWARE.
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -17,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_DEVICE_FUNCTIONS_H
|
||||
#define HIP_DEVICE_FUNCTIONS_H
|
||||
#ifndef HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H
|
||||
#define HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H
|
||||
|
||||
#include<hip/hip_common.h>
|
||||
|
||||
|
||||
@@ -1,22 +1,27 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifndef HIP_INCLUDE_HIP_DRIVER_TYPES_H
|
||||
#define HIP_INCLUDE_HIP_DRIVER_TYPES_H
|
||||
|
||||
#include <hip/hip_common.h>
|
||||
|
||||
@@ -27,3 +32,5 @@ THE SOFTWARE.
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H
|
||||
#define HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H
|
||||
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H
|
||||
#define HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H
|
||||
|
||||
#include<hip/hcc_detail/driver_types.h>
|
||||
#include<hip/hcc_detail/hip_vector_types.h>
|
||||
|
||||
@@ -40,6 +40,19 @@ hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc);
|
||||
* @return #hipSuccess
|
||||
*/
|
||||
hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av);
|
||||
|
||||
|
||||
hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY,
|
||||
uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX,
|
||||
uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t hStream,
|
||||
void **kernelParams,
|
||||
void **extra);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_HCC_H
|
||||
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_HCC_H
|
||||
|
||||
#include "hip/hip_runtime_api.h"
|
||||
|
||||
#if __cplusplus
|
||||
#ifdef __HCC__
|
||||
#include <hc.hpp>
|
||||
/**
|
||||
* @brief Return hc::accelerator associated with the specified deviceId
|
||||
* @return #hipSuccess, #hipErrorInvalidDevice
|
||||
*/
|
||||
hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc);
|
||||
|
||||
/**
|
||||
* @brief Return hc::accelerator_view associated with the specified stream
|
||||
*
|
||||
* If stream is 0, the accelerator_view for the default stream is returned.
|
||||
* @return #hipSuccess
|
||||
*/
|
||||
hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av);
|
||||
|
||||
|
||||
#endif // #ifdef __HCC__
|
||||
|
||||
hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY,
|
||||
uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX,
|
||||
uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t hStream,
|
||||
void **kernelParams,
|
||||
void **extra);
|
||||
|
||||
#endif // #if __cplusplus
|
||||
|
||||
#endif //
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_COMMON_H
|
||||
#define HIP_INCLUDE_HIP_HIP_COMMON_H
|
||||
|
||||
// Common code included at start of every hip file.
|
||||
// Auto enable __HIP_PLATFORM_HCC__ if compiling with HCC
|
||||
@@ -73,3 +74,5 @@ THE SOFTWARE.
|
||||
#define __HIP_ARCH_HAS_3DGRID__ (0)
|
||||
#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_COMPLEX_H
|
||||
#define HIP_INCLUDE_HIP_HIP_COMPLEX_H
|
||||
|
||||
#include <hip/hip_common.h>
|
||||
|
||||
@@ -31,3 +32,5 @@ THE SOFTWARE.
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifdef HIP_INCLUDE_HIP_HIP_FP16_H
|
||||
#define HIP_INCLUDE_HIP_HIP_FP16_H
|
||||
|
||||
#include <hip/hip_common.h>
|
||||
|
||||
@@ -31,3 +32,5 @@ THE SOFTWARE.
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
+4
-4
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,11 +20,11 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_HCC_H
|
||||
#define HIP_HCC_H
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_HCC_H
|
||||
#define HIP_INCLUDE_HIP_HIP_HCC_H
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__)
|
||||
#include "hip/hcc_detail/hcc_acc.h"
|
||||
#include "hip/hcc_detail/hip_hcc.h"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_PROFILE_H
|
||||
#define HIP_INCLUDE_HIP_HIP_PROFILE_H
|
||||
|
||||
#if not defined (ENABLE_HIP_PROFILE)
|
||||
#define ENABLE_HIP_PROFILE 1
|
||||
@@ -36,3 +37,5 @@ THE SOFTWARE.
|
||||
#define HIP_BEGIN_MARKER(markerName, group)
|
||||
#define HIP_END_MARKER()
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -33,7 +33,8 @@ THE SOFTWARE.
|
||||
//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch macros.
|
||||
//! hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ features.
|
||||
|
||||
#pragma once
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H
|
||||
#define HIP_INCLUDE_HIP_HIP_RUNTIME_H
|
||||
|
||||
// Some standard header files, these are included by hc.hpp and so want to make them avail on both
|
||||
// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
|
||||
@@ -61,3 +62,5 @@ THE SOFTWARE.
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_vector_types.h>
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -27,7 +27,8 @@ THE SOFTWARE.
|
||||
* This file can be compiled with a standard compiler.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
|
||||
#define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
|
||||
|
||||
|
||||
#include <string.h> // for getDeviceProp
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,10 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef HIP_HIP_TEXTURE_H
|
||||
#define HIP_HIP_TEXTURE_H
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_H
|
||||
#define HIP_INCLUDE_HIP_HIP_TEXTURE_H
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__)
|
||||
#include <hip/hcc_detail/hip_texture.h>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -22,7 +22,8 @@ THE SOFTWARE.
|
||||
|
||||
//! hip_vector_types.h : Defines the HIP vector types.
|
||||
|
||||
#pragma once
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H
|
||||
#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H
|
||||
|
||||
#include <hip/hip_common.h>
|
||||
|
||||
@@ -36,3 +37,5 @@ THE SOFTWARE.
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifndef HIP_INCLUDE_HIP_MATH_FUNCTIONS_H
|
||||
#define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H
|
||||
|
||||
// Some standard header files, these are included by hc.hpp and so want to make them avail on both
|
||||
// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
|
||||
@@ -34,3 +35,5 @@ THE SOFTWARE.
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -17,7 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifndef HIP_INCLUDE_HIP_TEXTURE_TYPES_H
|
||||
#define HIP_INCLUDE_HIP_TEXTURE_TYPES_H
|
||||
|
||||
#include <hip/hip_common.h>
|
||||
|
||||
@@ -28,3 +32,5 @@ THE SOFTWARE.
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -22,9 +22,10 @@ THE SOFTWARE.
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include<iostream>
|
||||
#include<fstream>
|
||||
#include<vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <hip/hip_hcc.h>
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN<<2
|
||||
@@ -43,10 +44,10 @@ int main(){
|
||||
B[i] = 0.0f;
|
||||
}
|
||||
|
||||
hipInit(0);
|
||||
hipDevice_t device;
|
||||
hipCtx_t context;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipInit(0);
|
||||
hipDevice_t device;
|
||||
hipCtx_t context;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipCtxCreate(&context, 0, device);
|
||||
|
||||
hipMalloc((void**)&Ad, SIZE);
|
||||
@@ -101,6 +102,7 @@ int main(){
|
||||
hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config);
|
||||
|
||||
hipMemcpyDtoH(B, Bd, SIZE);
|
||||
|
||||
int mismatchCount = 0;
|
||||
for(uint32_t i=0;i<LEN;i++){
|
||||
if (A[i] != B[i]) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -27,4 +27,3 @@ extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b)
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = a[tx];
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,26 @@
|
||||
#include "hip_hcc.h"
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
#include "env.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -26,7 +26,7 @@ THE SOFTWARE.
|
||||
#include <stack>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
// Stack of contexts
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
#include "device_util.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -43,7 +43,7 @@ THE SOFTWARE.
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
#include "env.h"
|
||||
|
||||
@@ -762,7 +762,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, &(prop->name));
|
||||
char archName[256];
|
||||
err = hsa_agent_get_info(_hsaAgent, HSA_AGENT_INFO_NAME, &archName);
|
||||
|
||||
|
||||
if(strcmp(archName,"gfx701")==0){
|
||||
prop->gcnArch = 701;
|
||||
}
|
||||
@@ -1805,7 +1805,7 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
|
||||
void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo)
|
||||
{
|
||||
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n",
|
||||
tag, ptr,
|
||||
tag, ptr,
|
||||
ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes,
|
||||
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,885 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_HCC_H
|
||||
#define HIP_HCC_H
|
||||
|
||||
#include <hc.hpp>
|
||||
#include <hsa/hsa.h>
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_util.h"
|
||||
#include "env.h"
|
||||
|
||||
|
||||
#if defined(__HCC__) && (__hcc_workweek__ < 16354)
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
#endif
|
||||
|
||||
#define USE_IPC 1
|
||||
|
||||
//---
|
||||
// Environment variables:
|
||||
|
||||
// Intended to distinguish whether an environment variable should be visible only in debug mode, or in debug+release.
|
||||
//static const int debug = 0;
|
||||
extern const int release;
|
||||
|
||||
// TODO - this blocks both kernels and memory ops. Perhaps should have separate env var for kernels?
|
||||
extern int HIP_LAUNCH_BLOCKING;
|
||||
extern int HIP_API_BLOCKING;
|
||||
|
||||
extern int HIP_PRINT_ENV;
|
||||
extern int HIP_PROFILE_API;
|
||||
//extern int HIP_TRACE_API;
|
||||
extern int HIP_ATP;
|
||||
extern int HIP_DB;
|
||||
extern int HIP_STAGING_SIZE; /* size of staging buffers, in KB */
|
||||
extern int HIP_STREAM_SIGNALS; /* number of signals to allocate at stream creation */
|
||||
extern int HIP_VISIBLE_DEVICES; /* Contains a comma-separated sequence of GPU identifiers */
|
||||
extern int HIP_FORCE_P2P_HOST;
|
||||
|
||||
extern int HIP_COHERENT_HOST_ALLOC;
|
||||
|
||||
|
||||
//---
|
||||
// Chicken bits for disabling functionality to work around potential issues:
|
||||
extern int HIP_SYNC_HOST_ALLOC;
|
||||
|
||||
// TODO - remove when this is standard behavior.
|
||||
extern int HCC_OPT_FLUSH;
|
||||
|
||||
|
||||
// Class to assign a short TID to each new thread, for HIP debugging purposes.
|
||||
class TidInfo {
|
||||
public:
|
||||
|
||||
TidInfo() ;
|
||||
|
||||
int tid() const { return _shortTid; };
|
||||
uint64_t incApiSeqNum() { return ++_apiSeqNum; };
|
||||
uint64_t apiSeqNum() const { return _apiSeqNum; };
|
||||
|
||||
private:
|
||||
int _shortTid;
|
||||
|
||||
// monotonically increasing API sequence number for this threa.
|
||||
uint64_t _apiSeqNum;
|
||||
};
|
||||
|
||||
struct ProfTrigger {
|
||||
|
||||
static const uint64_t MAX_TRIGGER = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
void print (int tid) {
|
||||
std::cout << "Enabling tracing for ";
|
||||
for (auto iter=_profTrigger.begin(); iter != _profTrigger.end(); iter++) {
|
||||
std::cout << "tid:" << tid << "." << *iter << ",";
|
||||
}
|
||||
std::cout << "\n";
|
||||
};
|
||||
|
||||
uint64_t nextTrigger() { return _profTrigger.empty() ? MAX_TRIGGER : _profTrigger.back(); };
|
||||
void add(uint64_t trigger) { _profTrigger.push_back(trigger); };
|
||||
void sort() { std::sort (_profTrigger.begin(), _profTrigger.end(), std::greater<int>()); };
|
||||
private:
|
||||
std::vector<uint64_t> _profTrigger;
|
||||
};
|
||||
|
||||
|
||||
|
||||
//---
|
||||
//Extern tls
|
||||
extern thread_local hipError_t tls_lastHipError;
|
||||
extern thread_local TidInfo tls_tidInfo;
|
||||
|
||||
extern std::vector<ProfTrigger> g_dbStartTriggers;
|
||||
extern std::vector<ProfTrigger> g_dbStopTriggers;
|
||||
|
||||
//---
|
||||
//Forward defs:
|
||||
class ihipStream_t;
|
||||
class ihipDevice_t;
|
||||
class ihipCtx_t;
|
||||
|
||||
// Color defs for debug messages:
|
||||
#define KNRM "\x1B[0m"
|
||||
#define KRED "\x1B[31m"
|
||||
#define KGRN "\x1B[32m"
|
||||
#define KYEL "\x1B[33m"
|
||||
#define KBLU "\x1B[34m"
|
||||
#define KMAG "\x1B[35m"
|
||||
#define KCYN "\x1B[36m"
|
||||
#define KWHT "\x1B[37m"
|
||||
|
||||
extern const char *API_COLOR;
|
||||
extern const char *API_COLOR_END;
|
||||
|
||||
|
||||
// If set, thread-safety is enforced on all stream functions.
|
||||
// Stream functions will acquire a mutex before entering critical sections.
|
||||
#define STREAM_THREAD_SAFE 1
|
||||
|
||||
|
||||
#define CTX_THREAD_SAFE 1
|
||||
|
||||
#define DEVICE_THREAD_SAFE 1
|
||||
|
||||
|
||||
// Compile debug trace mode - this prints debug messages to stderr when env var HIP_DB is set.
|
||||
// May be set to 0 to remove debug if checks - possible code size and performance difference?
|
||||
#define COMPILE_HIP_DB 1
|
||||
|
||||
|
||||
// Compile HIP tracing capability.
|
||||
// 0x1 = print a string at function entry with arguments.
|
||||
// 0x2 = prints a simple message with function name + return code when function exits.
|
||||
// 0x3 = print both.
|
||||
// Must be enabled at runtime with HIP_TRACE_API
|
||||
#define COMPILE_HIP_TRACE_API 0x3
|
||||
|
||||
|
||||
// Compile code that generates trace markers for CodeXL ATP at HIP function begin/end.
|
||||
// ATP is standard CodeXL format that includes timestamps for kernels, HSA RT APIs, and HIP APIs.
|
||||
#ifndef COMPILE_HIP_ATP_MARKER
|
||||
#define COMPILE_HIP_ATP_MARKER 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
// Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function boundary.
|
||||
// TODO - currently we print the trace message at the beginning. if we waited, we could also tls_tidInfo return codes, and any values returned
|
||||
// through ptr-to-args (ie the pointers allocated by hipMalloc).
|
||||
#if COMPILE_HIP_ATP_MARKER
|
||||
#include "CXLActivityLogger.h"
|
||||
#define MARKER_BEGIN(markerName,group) amdtBeginMarker(markerName, group, nullptr);
|
||||
#define MARKER_END() amdtEndMarker();
|
||||
#define RESUME_PROFILING amdtResumeProfiling(AMDT_ALL_PROFILING);
|
||||
#define STOP_PROFILING amdtStopProfiling(AMDT_ALL_PROFILING);
|
||||
#else
|
||||
// Swallow scoped markers:
|
||||
#define MARKER_BEGIN(markerName,group)
|
||||
#define MARKER_END()
|
||||
#define RESUME_PROFILING
|
||||
#define STOP_PROFILING
|
||||
#endif
|
||||
|
||||
|
||||
//---
|
||||
//HIP Trace modes
|
||||
#define TRACE_ALL 0 // 0x1
|
||||
#define TRACE_CMD 1 // 0x2
|
||||
#define TRACE_MEM 2 // 0x4
|
||||
|
||||
|
||||
//---
|
||||
//HIP_DB Debug flags:
|
||||
#define DB_API 0 /* 0x01 - shortcut to enable HIP_TRACE_API on single switch */
|
||||
#define DB_SYNC 1 /* 0x02 - trace synchronization pieces */
|
||||
#define DB_MEM 2 /* 0x04 - trace memory allocation / deallocation */
|
||||
#define DB_COPY 3 /* 0x08 - trace memory copy and peer commands. . */
|
||||
#define DB_MAX_FLAG 4
|
||||
// When adding a new debug flag, also add to the char name table below.
|
||||
//
|
||||
//
|
||||
|
||||
struct DbName {
|
||||
const char *_color;
|
||||
const char *_shortName;
|
||||
};
|
||||
|
||||
// This table must be kept in-sync with the defines above.
|
||||
static const DbName dbName [] =
|
||||
{
|
||||
{KGRN, "api"}, // not used,
|
||||
{KYEL, "sync"},
|
||||
{KCYN, "mem"},
|
||||
{KMAG, "copy"},
|
||||
};
|
||||
|
||||
|
||||
|
||||
#if COMPILE_HIP_DB
|
||||
#define tprintf(trace_level, ...) {\
|
||||
if (HIP_DB & (1<<(trace_level))) {\
|
||||
char msgStr[1000];\
|
||||
snprintf(msgStr, 2000, __VA_ARGS__);\
|
||||
fprintf (stderr, " %ship-%s tid:%d:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tls_tidInfo.tid(), msgStr, KNRM); \
|
||||
}\
|
||||
}
|
||||
#else
|
||||
/* Compile to empty code */
|
||||
#define tprintf(trace_level, ...)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
//---
|
||||
extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
|
||||
|
||||
#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
|
||||
#define API_TRACE(forceTrace, ...)\
|
||||
{\
|
||||
tls_tidInfo.incApiSeqNum();\
|
||||
if (forceTrace || (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1<<TRACE_ALL))))) {\
|
||||
std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
|
||||
std::string fullStr;\
|
||||
recordApiTrace(&fullStr, apiStr);\
|
||||
if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\
|
||||
else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\
|
||||
}\
|
||||
}
|
||||
#else
|
||||
// Swallow API_TRACE
|
||||
#define API_TRACE(IS_CMD, ...)\
|
||||
tls_tidInfo.incApiSeqNum();
|
||||
#endif
|
||||
|
||||
|
||||
// Just initialize the HIP runtime, but don't log any trace information.
|
||||
#define HIP_INIT()\
|
||||
std::call_once(hip_initialized, ihipInit);\
|
||||
ihipCtxStackUpdate();
|
||||
#define HIP_SET_DEVICE()\
|
||||
ihipDeviceSetState();
|
||||
|
||||
|
||||
|
||||
// This macro should be called at the beginning of every HIP API.
|
||||
// It initializes the hip runtime (exactly once), and
|
||||
// generates a trace string that can be output to stderr or to ATP file.
|
||||
#define HIP_INIT_API(...) \
|
||||
HIP_INIT()\
|
||||
API_TRACE(0, __VA_ARGS__);
|
||||
|
||||
|
||||
// Like above, but will trace with DB_CMD.
|
||||
// Replace HIP_INIT_API with this call inside important APIs that launch work on the GPU:
|
||||
// kernel launches, copy commands, memory sets, etc.
|
||||
#define HIP_INIT_CMD_API(...) \
|
||||
HIP_INIT()\
|
||||
API_TRACE((HIP_TRACE_API&(1<<TRACE_CMD)), __VA_ARGS__);
|
||||
|
||||
// This macro should be called at the end of every HIP API, and only at the end of top-level hip APIS (not internal hip)
|
||||
// It has dual function: logs the last error returned for use by hipGetLastError,
|
||||
// and also prints the closing message when the debug trace is enabled.
|
||||
#define ihipLogStatus(hipStatus) \
|
||||
({\
|
||||
hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \
|
||||
tls_lastHipError = localHipStatus;\
|
||||
\
|
||||
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1<<TRACE_ALL)) {\
|
||||
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
|
||||
}\
|
||||
if (HIP_PROFILE_API) { MARKER_END(); }\
|
||||
localHipStatus;\
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class ihipException : public std::exception
|
||||
{
|
||||
public:
|
||||
ihipException(hipError_t e) : _code(e) {};
|
||||
|
||||
hipError_t _code;
|
||||
};
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
const hipStream_t hipStreamNull = 0x0;
|
||||
|
||||
|
||||
/**
|
||||
* HIP IPC Handle Size
|
||||
*/
|
||||
#define HIP_IPC_RESERVED_SIZE 24
|
||||
class ihipIpcMemHandle_t
|
||||
{
|
||||
public:
|
||||
#if USE_IPC
|
||||
hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr
|
||||
#endif
|
||||
size_t psize;
|
||||
char reserved[HIP_IPC_RESERVED_SIZE];
|
||||
};
|
||||
|
||||
|
||||
class ihipModule_t {
|
||||
public:
|
||||
hsa_executable_t executable;
|
||||
hsa_code_object_t object;
|
||||
std::string fileName;
|
||||
void *ptr;
|
||||
size_t size;
|
||||
std::list<hipFunction_t> funcTrack;
|
||||
ihipModule_t() : executable(), object(), fileName(), ptr(nullptr), size(0) {}
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
// Used to remove lock, for performance or stimulating bugs.
|
||||
class FakeMutex
|
||||
{
|
||||
public:
|
||||
void lock() { }
|
||||
bool try_lock() {return true; }
|
||||
void unlock() { }
|
||||
};
|
||||
|
||||
|
||||
#if STREAM_THREAD_SAFE
|
||||
typedef std::mutex StreamMutex;
|
||||
#else
|
||||
#warning "Stream thread-safe disabled"
|
||||
typedef FakeMutex StreamMutex;
|
||||
#endif
|
||||
|
||||
// Pair Device and Ctx together, these could also be toggled separately if desired.
|
||||
#if CTX_THREAD_SAFE
|
||||
typedef std::mutex CtxMutex;
|
||||
#else
|
||||
typedef FakeMutex CtxMutex;
|
||||
#warning "Ctx thread-safe disabled"
|
||||
#endif
|
||||
|
||||
#if DEVICE_THREAD_SAFE
|
||||
typedef std::mutex DeviceMutex;
|
||||
#else
|
||||
typedef FakeMutex DeviceMutex;
|
||||
#warning "Device thread-safe disabled"
|
||||
#endif
|
||||
|
||||
//
|
||||
//---
|
||||
// Protects access to the member _data with a lock acquired on contruction/destruction.
|
||||
// T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock)
|
||||
template<typename T>
|
||||
class LockedAccessor
|
||||
{
|
||||
public:
|
||||
LockedAccessor(T &criticalData, bool autoUnlock=true) :
|
||||
_criticalData(&criticalData),
|
||||
_autoUnlock(autoUnlock)
|
||||
|
||||
{
|
||||
tprintf(DB_SYNC, "locking criticalData=%p for %s..\n", _criticalData, ToString(_criticalData->_parent).c_str());
|
||||
_criticalData->_mutex.lock();
|
||||
};
|
||||
|
||||
~LockedAccessor()
|
||||
{
|
||||
if (_autoUnlock) {
|
||||
tprintf(DB_SYNC, "auto-unlocking criticalData=%p for %s...\n", _criticalData, ToString(_criticalData->_parent).c_str());
|
||||
_criticalData->_mutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
void unlock()
|
||||
{
|
||||
tprintf(DB_SYNC, "unlocking criticalData=%p for %s...\n", _criticalData, ToString(_criticalData->_parent).c_str());
|
||||
_criticalData->_mutex.unlock();
|
||||
}
|
||||
|
||||
// Syntactic sugar so -> can be used to get the underlying type.
|
||||
T *operator->() { return _criticalData; };
|
||||
|
||||
private:
|
||||
T *_criticalData;
|
||||
bool _autoUnlock;
|
||||
};
|
||||
|
||||
|
||||
template <typename MUTEX_TYPE>
|
||||
struct LockedBase {
|
||||
|
||||
// Experts-only interface for explicit locking.
|
||||
// Most uses should use the lock-accessor.
|
||||
void lock() { _mutex.lock(); }
|
||||
void unlock() { _mutex.unlock(); }
|
||||
bool try_lock() { return _mutex.try_lock(); }
|
||||
|
||||
MUTEX_TYPE _mutex;
|
||||
};
|
||||
|
||||
|
||||
template <typename MUTEX_TYPE>
|
||||
class ihipStreamCriticalBase_t : public LockedBase<MUTEX_TYPE>
|
||||
{
|
||||
public:
|
||||
ihipStreamCriticalBase_t(ihipStream_t *parentStream, hc::accelerator_view av) :
|
||||
_kernelCnt(0),
|
||||
_av(av),
|
||||
_hasQueue(true),
|
||||
_parent(parentStream)
|
||||
{
|
||||
};
|
||||
|
||||
~ihipStreamCriticalBase_t() {
|
||||
}
|
||||
|
||||
ihipStreamCriticalBase_t<StreamMutex> * mlock() { LockedBase<MUTEX_TYPE>::lock(); return this;};
|
||||
|
||||
void munlock() {
|
||||
tprintf(DB_SYNC, "munlocking criticalData=%p for %s...\n", this, ToString(this->_parent).c_str());
|
||||
LockedBase<MUTEX_TYPE>::unlock();
|
||||
};
|
||||
|
||||
ihipStreamCriticalBase_t<StreamMutex> * mtry_lock() {
|
||||
bool gotLock = LockedBase<MUTEX_TYPE>::try_lock() ;
|
||||
tprintf(DB_SYNC, "mtry_locking=%d criticalData=%p for %s...\n", gotLock, this, ToString(this->_parent).c_str());
|
||||
return gotLock ? this: nullptr;
|
||||
};
|
||||
|
||||
public:
|
||||
ihipStream_t * _parent;
|
||||
uint32_t _kernelCnt; // Count of inflight kernels in this stream. Reset at ::wait().
|
||||
|
||||
hc::accelerator_view _av;
|
||||
|
||||
// True if the stream has an allocated queue (accelerato_view) for its use:
|
||||
// Always true at ihipStream creation but queue may later be stolen.
|
||||
// This acts as a valid bit for the _av.
|
||||
bool _hasQueue;
|
||||
private:
|
||||
};
|
||||
|
||||
|
||||
// if HIP code needs to acquire locks for both ihipCtx_t and ihipStream_t, it should first acquire the lock
|
||||
// for the ihipCtx_t and then for the individual streams. The locks should not be acquired in reverse order
|
||||
// or deadlock may occur. In some cases, it may be possible to reduce the range where the locks must be held.
|
||||
// HIP routines should avoid acquiring and releasing the same lock during the execution of a single HIP API.
|
||||
// Another option is to use try_lock in the innermost lock query.
|
||||
|
||||
|
||||
typedef ihipStreamCriticalBase_t<StreamMutex> ihipStreamCritical_t;
|
||||
typedef LockedAccessor<ihipStreamCritical_t> LockedAccessor_StreamCrit_t;
|
||||
|
||||
//---
|
||||
// Internal stream structure.
|
||||
class ihipStream_t {
|
||||
public:
|
||||
enum ScheduleMode {Auto, Spin, Yield};
|
||||
typedef uint64_t SeqNum_t ;
|
||||
|
||||
// TODOD -make av a reference to avoid shared_ptr overhead?
|
||||
ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags);
|
||||
~ihipStream_t();
|
||||
|
||||
// kind is hipMemcpyKind
|
||||
void locked_copySync (void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn = true);
|
||||
void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
|
||||
|
||||
void lockedSymbolCopySync(hc::accelerator &acc, void *dst, void* src, size_t sizeBytes, size_t offset, unsigned kind);
|
||||
void lockedSymbolCopyAsync(hc::accelerator &acc, void *dst, void* src, size_t sizeBytes, size_t offset, unsigned kind);
|
||||
|
||||
//---
|
||||
// Member functions that begin with locked_ are thread-safe accessors - these acquire / release the critical mutex.
|
||||
LockedAccessor_StreamCrit_t lockopen_preKernelCommand();
|
||||
void lockclose_postKernelCommand(const char *kernelName, hc::accelerator_view *av);
|
||||
|
||||
|
||||
void locked_wait();
|
||||
|
||||
hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); };
|
||||
|
||||
void locked_waitEvent(hipEvent_t event);
|
||||
void locked_recordEvent(hipEvent_t event);
|
||||
|
||||
|
||||
//---
|
||||
|
||||
// Use this if we already have the stream critical data mutex:
|
||||
void wait(LockedAccessor_StreamCrit_t &crit);
|
||||
|
||||
void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t groupSegmentSize, uint32_t sharedMemBytes,
|
||||
void *kernarg, size_t kernSize, uint64_t kernel);
|
||||
|
||||
|
||||
|
||||
//-- Non-racy accessors:
|
||||
// These functions access fields set at initialization time and are non-racy (so do not acquire mutex)
|
||||
const ihipDevice_t * getDevice() const;
|
||||
ihipCtx_t * getCtx() const;
|
||||
|
||||
void ensureHaveQueue(LockedAccessor_StreamCrit_t &streamCrit);
|
||||
|
||||
public:
|
||||
//---
|
||||
//Public member vars - these are set at initialization and never change:
|
||||
SeqNum_t _id; // monotonic sequence ID
|
||||
unsigned _flags;
|
||||
|
||||
|
||||
private:
|
||||
|
||||
|
||||
// The unsigned return is hipMemcpyKind
|
||||
unsigned resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem);
|
||||
void resolveHcMemcpyDirection(unsigned hipMemKind,
|
||||
const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
|
||||
hc::hcCommandKind *hcCopyDir,
|
||||
ihipCtx_t **copyDevice,
|
||||
bool *forceUnpinnedCopy);
|
||||
|
||||
bool canSeeMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo);
|
||||
|
||||
void addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes);
|
||||
|
||||
public: // TODO - move private
|
||||
// Critical Data - MUST be accessed through LockedAccessor_StreamCrit_t
|
||||
ihipStreamCritical_t _criticalData;
|
||||
|
||||
private: // Data
|
||||
|
||||
std::mutex _hasQueueLock;
|
||||
|
||||
ihipCtx_t *_ctx; // parent context that owns this stream.
|
||||
|
||||
// Friends:
|
||||
friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s);
|
||||
friend hipError_t hipStreamQuery(hipStream_t);
|
||||
|
||||
ScheduleMode _scheduleMode;
|
||||
};
|
||||
|
||||
|
||||
|
||||
//----
|
||||
// Internal event structure:
|
||||
enum hipEventStatus_t {
|
||||
hipEventStatusUnitialized = 0, // event is unutilized, must be "Created" before use.
|
||||
hipEventStatusCreated = 1,
|
||||
hipEventStatusRecording = 2, // event has been enqueued to record something.
|
||||
hipEventStatusRecorded = 3, // event has been recorded - timestamps are valid.
|
||||
} ;
|
||||
|
||||
|
||||
// internal hip event structure.
|
||||
struct ihipEvent_t {
|
||||
hipEventStatus_t _state;
|
||||
|
||||
hipStream_t _stream; // Stream where the event is recorded, or NULL if all streams.
|
||||
unsigned _flags;
|
||||
|
||||
hc::completion_future _marker;
|
||||
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
|
||||
} ;
|
||||
|
||||
|
||||
|
||||
//=============================================================================
|
||||
//class ihipDeviceCriticalBase_t
|
||||
template <typename MUTEX_TYPE>
|
||||
class ihipDeviceCriticalBase_t : LockedBase<MUTEX_TYPE>
|
||||
{
|
||||
public:
|
||||
ihipDeviceCriticalBase_t(ihipDevice_t *parentDevice) :
|
||||
_parent(parentDevice)
|
||||
{
|
||||
};
|
||||
|
||||
~ihipDeviceCriticalBase_t() {
|
||||
|
||||
}
|
||||
|
||||
// Contexts:
|
||||
void addContext(ihipCtx_t *ctx);
|
||||
void removeContext(ihipCtx_t *ctx);
|
||||
std::list<ihipCtx_t*> &ctxs() { return _ctxs; };
|
||||
const std::list<ihipCtx_t*> &const_ctxs() const { return _ctxs; };
|
||||
int getcount() {return _ctxCount;};
|
||||
friend class LockedAccessor<ihipDeviceCriticalBase_t>;
|
||||
private:
|
||||
ihipDevice_t *_parent;
|
||||
|
||||
//--- Context Tracker:
|
||||
std::list< ihipCtx_t* > _ctxs; // contexts associated with this device across all threads.
|
||||
|
||||
int _ctxCount;
|
||||
};
|
||||
|
||||
typedef ihipDeviceCriticalBase_t<DeviceMutex> ihipDeviceCritical_t;
|
||||
|
||||
typedef LockedAccessor<ihipDeviceCritical_t> LockedAccessor_DeviceCrit_t;
|
||||
|
||||
//----
|
||||
// Properties of the HIP device.
|
||||
// Multiple contexts can point to same device.
|
||||
class ihipDevice_t
|
||||
{
|
||||
public:
|
||||
ihipDevice_t(unsigned deviceId, unsigned deviceCnt, hc::accelerator &acc);
|
||||
~ihipDevice_t();
|
||||
|
||||
// Accessors:
|
||||
ihipCtx_t *getPrimaryCtx() const { return _primaryCtx; };
|
||||
void locked_removeContext(ihipCtx_t *c);
|
||||
void locked_reset();
|
||||
ihipDeviceCritical_t &criticalData() { return _criticalData; };
|
||||
public:
|
||||
unsigned _deviceId; // device ID
|
||||
|
||||
hc::accelerator _acc;
|
||||
hsa_agent_t _hsaAgent; // hsa agent handle
|
||||
|
||||
//! Number of compute units supported by the device:
|
||||
unsigned _computeUnits;
|
||||
hipDeviceProp_t _props; // saved device properties.
|
||||
|
||||
// TODO - report this through device properties, base on HCC API call.
|
||||
int _isLargeBar;
|
||||
|
||||
ihipCtx_t *_primaryCtx;
|
||||
|
||||
int _state; //1 if device is set otherwise 0
|
||||
|
||||
private:
|
||||
hipError_t initProperties(hipDeviceProp_t* prop);
|
||||
private:
|
||||
ihipDeviceCritical_t _criticalData;
|
||||
};
|
||||
//=============================================================================
|
||||
|
||||
|
||||
|
||||
//=============================================================================
|
||||
//class ihipCtxCriticalBase_t
|
||||
template <typename MUTEX_TYPE>
|
||||
class ihipCtxCriticalBase_t : LockedBase<MUTEX_TYPE>
|
||||
{
|
||||
public:
|
||||
ihipCtxCriticalBase_t(ihipCtx_t *parentCtx, unsigned deviceCnt) :
|
||||
_parent(parentCtx),
|
||||
_peerCnt(0)
|
||||
{
|
||||
_peerAgents = new hsa_agent_t[deviceCnt];
|
||||
};
|
||||
|
||||
~ihipCtxCriticalBase_t() {
|
||||
if (_peerAgents != nullptr) {
|
||||
delete _peerAgents;
|
||||
_peerAgents = nullptr;
|
||||
}
|
||||
_peerCnt = 0;
|
||||
}
|
||||
|
||||
// Streams:
|
||||
void addStream(ihipStream_t *stream);
|
||||
std::list<ihipStream_t*> &streams() { return _streams; };
|
||||
const std::list<ihipStream_t*> &const_streams() const { return _streams; };
|
||||
|
||||
|
||||
|
||||
// Peer Accessor classes:
|
||||
bool isPeerWatcher(const ihipCtx_t *peer); // returns True if peer has access to memory physically located on this device.
|
||||
bool addPeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer);
|
||||
bool removePeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer);
|
||||
void resetPeerWatchers(ihipCtx_t *thisDevice);
|
||||
void printPeerWatchers(FILE *f) const;
|
||||
|
||||
uint32_t peerCnt() const { return _peerCnt; };
|
||||
hsa_agent_t *peerAgents() const { return _peerAgents; };
|
||||
|
||||
|
||||
// TODO - move private
|
||||
std::list<ihipCtx_t*> _peers; // list of enabled peer devices.
|
||||
|
||||
friend class LockedAccessor<ihipCtxCriticalBase_t>;
|
||||
private:
|
||||
ihipCtx_t * _parent;
|
||||
|
||||
//--- Stream Tracker:
|
||||
std::list< ihipStream_t* > _streams; // streams associated with this device.
|
||||
|
||||
|
||||
//--- Peer Tracker:
|
||||
// These reflect the currently Enabled set of peers for this GPU:
|
||||
// Enabled peers have permissions to access the memory physically allocated on this device.
|
||||
// Note the peers always contain the self agent for easy interfacing with HSA APIs.
|
||||
uint32_t _peerCnt; // number of enabled peers
|
||||
hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.)
|
||||
private:
|
||||
void recomputePeerAgents();
|
||||
};
|
||||
// Note Mutex type Real/Fake selected based on CtxMutex
|
||||
typedef ihipCtxCriticalBase_t<CtxMutex> ihipCtxCritical_t;
|
||||
|
||||
// This type is used by functions that need access to the critical device structures.
|
||||
typedef LockedAccessor<ihipCtxCritical_t> LockedAccessor_CtxCrit_t;
|
||||
//=============================================================================
|
||||
|
||||
|
||||
//=============================================================================
|
||||
//class ihipCtx_t:
|
||||
// A HIP CTX (context) points at one of the existing devices and contains the streams,
|
||||
// peer-to-peer mappings, creation flags. Multiple contexts can point to the same
|
||||
// device.
|
||||
//
|
||||
class ihipCtx_t
|
||||
{
|
||||
public: // Functions:
|
||||
ihipCtx_t(ihipDevice_t *device, unsigned deviceCnt, unsigned flags); // note: calls constructor for _criticalData
|
||||
~ihipCtx_t();
|
||||
|
||||
// Functions which read or write the critical data are named locked_.
|
||||
// (might be better called "locking_"
|
||||
// ihipCtx_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function.
|
||||
// External functions which call several locked_ functions will acquire and release the lock for each function. if this occurs in
|
||||
// performance-sensitive code we may want to refactor by adding non-locked functions and creating a new locked_ member function to call them all.
|
||||
void locked_removeStream(ihipStream_t *s);
|
||||
void locked_reset();
|
||||
void locked_waitAllStreams();
|
||||
void locked_syncDefaultStream(bool waitOnSelf);
|
||||
|
||||
// Will allocate a queue and assign it to the needyStream:
|
||||
hc::accelerator_view stealActiveQueue(LockedAccessor_CtxCrit_t &ctxCrit, ihipStream_t *needyStream);
|
||||
hc::accelerator_view createOrStealQueue(LockedAccessor_CtxCrit_t &ctxCrit);
|
||||
|
||||
ihipCtxCritical_t &criticalData() { return _criticalData; };
|
||||
|
||||
const ihipDevice_t *getDevice() const { return _device; };
|
||||
int getDeviceNum() const { return _device->_deviceId; };
|
||||
|
||||
// TODO - review uses of getWriteableDevice(), can these be converted to getDevice()
|
||||
ihipDevice_t *getWriteableDevice() const { return _device; };
|
||||
|
||||
std::string toString() const;
|
||||
|
||||
public: // Data
|
||||
// The NULL stream is used if no other stream is specified.
|
||||
// Default stream has special synchronization properties with other streams.
|
||||
ihipStream_t *_defaultStream;
|
||||
|
||||
// Flags specified when the context is created:
|
||||
unsigned _ctxFlags;
|
||||
|
||||
private:
|
||||
ihipDevice_t *_device;
|
||||
|
||||
|
||||
private: // Critical data, protected with locked access:
|
||||
// Members of _protected data MUST be accessed through the LockedAccessor.
|
||||
// Search for LockedAccessor<ihipCtxCritical_t> for examples; do not access _criticalData directly.
|
||||
ihipCtxCritical_t _criticalData;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
// Global variable definition:
|
||||
extern std::once_flag hip_initialized;
|
||||
extern unsigned g_deviceCnt;
|
||||
extern hsa_agent_t g_cpu_agent ; // the CPU agent.
|
||||
|
||||
//=================================================================================================
|
||||
// Extern functions:
|
||||
extern void ihipInit();
|
||||
extern const char *ihipErrorString(hipError_t);
|
||||
extern ihipCtx_t *ihipGetTlsDefaultCtx();
|
||||
extern void ihipSetTlsDefaultCtx(ihipCtx_t *ctx);
|
||||
extern hipError_t ihipSynchronize(void);
|
||||
extern void ihipCtxStackUpdate();
|
||||
extern hipError_t ihipDeviceSetState();
|
||||
|
||||
extern ihipDevice_t *ihipGetDevice(int);
|
||||
ihipCtx_t * ihipGetPrimaryCtx(unsigned deviceIndex);
|
||||
|
||||
extern void ihipSetTs(hipEvent_t e);
|
||||
|
||||
|
||||
hipStream_t ihipSyncAndResolveStream(hipStream_t);
|
||||
|
||||
// Stream printf functions:
|
||||
inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s)
|
||||
{
|
||||
os << "stream:";
|
||||
os << s.getDevice()->_deviceId;;
|
||||
os << '.';
|
||||
os << s._id;
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream & operator<<(std::ostream& os, const dim3& s)
|
||||
{
|
||||
os << '{';
|
||||
os << s.x;
|
||||
os << ',';
|
||||
os << s.y;
|
||||
os << ',';
|
||||
os << s.z;
|
||||
os << '}';
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream & operator<<(std::ostream& os, const gl_dim3& s)
|
||||
{
|
||||
os << '{';
|
||||
os << s.x;
|
||||
os << ',';
|
||||
os << s.y;
|
||||
os << ',';
|
||||
os << s.z;
|
||||
os << '}';
|
||||
return os;
|
||||
}
|
||||
|
||||
// Stream printf functions:
|
||||
inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e)
|
||||
{
|
||||
os << "event:" << std::hex << static_cast<void*> (e);
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const ihipCtx_t* c)
|
||||
{
|
||||
os << "ctx:" << static_cast<const void*> (c)
|
||||
<< ".dev:" << c->getDevice()->_deviceId;
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
// Helper functions that are used across src files:
|
||||
namespace hip_internal {
|
||||
hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream);
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -25,7 +25,7 @@ THE SOFTWARE.
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
#include "hip/hcc_detail/hip_texture.h"
|
||||
#include <hc_am.hpp>
|
||||
@@ -261,11 +261,11 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
auto device = ctx->getWriteableDevice();
|
||||
unsigned amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned;
|
||||
|
||||
*ptr = hip_internal::allocAndSharePtr(HIP_COHERENT_HOST_ALLOC ? "finegrained_host":"pinned_host",
|
||||
*ptr = hip_internal::allocAndSharePtr(HIP_COHERENT_HOST_ALLOC ? "finegrained_host":"pinned_host",
|
||||
sizeBytes, ctx, amFlags, flags);
|
||||
if(sizeBytes && (*ptr == NULL)){
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -314,7 +314,7 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
|
||||
|
||||
if (sizeBytes && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
@@ -372,7 +372,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, am_flags, 0);
|
||||
if (size && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -27,15 +27,13 @@ THE SOFTWARE.
|
||||
#include <elf.h>
|
||||
#include <gelf.h>
|
||||
#include <map>
|
||||
#include "AMDGPUPTNote.h"
|
||||
#include "AMDGPURuntimeMetadata.h"
|
||||
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
#include "hsa/amd_hsa_kernel_code.h"
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
//TODO Use Pool APIs from HCC to get memory regions.
|
||||
@@ -365,16 +363,12 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
{
|
||||
HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra);
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
hipError_t ret = hipSuccess;
|
||||
@@ -420,7 +414,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
*/
|
||||
grid_launch_parm lp;
|
||||
lp.dynamic_group_mem_bytes = sharedMemBytes; // TODO - this should be part of preLaunchKernel.
|
||||
hStream = ihipPreLaunchKernel(hStream, dim3(gridDimX, gridDimY, gridDimZ), dim3(blockDimX, blockDimY, blockDimZ), &lp, f->_name.c_str());
|
||||
hStream = ihipPreLaunchKernel(hStream, dim3(globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ), dim3(localWorkSizeX, localWorkSizeY, localWorkSizeZ), &lp, f->_name.c_str());
|
||||
|
||||
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
@@ -430,12 +424,12 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
//aql.completion_signal._handle = 0;
|
||||
//aql.kernarg_address = 0;
|
||||
|
||||
aql.workgroup_size_x = blockDimX;
|
||||
aql.workgroup_size_y = blockDimY;
|
||||
aql.workgroup_size_z = blockDimZ;
|
||||
aql.grid_size_x = blockDimX * gridDimX;
|
||||
aql.grid_size_y = blockDimY * gridDimY;
|
||||
aql.grid_size_z = blockDimZ * gridDimZ;
|
||||
aql.workgroup_size_x = localWorkSizeX;
|
||||
aql.workgroup_size_y = localWorkSizeY;
|
||||
aql.workgroup_size_z = localWorkSizeZ;
|
||||
aql.grid_size_x = globalWorkSizeX;
|
||||
aql.grid_size_y = globalWorkSizeY;
|
||||
aql.grid_size_z = globalWorkSizeZ;
|
||||
aql.group_segment_size = f->_groupSegmentSize + sharedMemBytes;
|
||||
aql.private_segment_size = f->_privateSegmentSize;
|
||||
aql.kernel_object = f->_object;
|
||||
@@ -459,9 +453,40 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
ihipPostLaunchKernel(f->_name.c_str(), hStream, lp);
|
||||
}
|
||||
|
||||
return ihipLogStatus(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
{
|
||||
HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(f,
|
||||
blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
{
|
||||
HIP_INIT_API(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra));
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
hipModule_t hmod, const char* name)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -23,7 +23,7 @@ THE SOFTWARE.
|
||||
#include <hc_am.hpp>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user