Merge 'master' into 'amd-master'
Change-Id: I18e9dcf3007abff98f52a93487607725b2b95a4f
This commit is contained in:
@@ -50,10 +50,16 @@ THE SOFTWARE.
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
|
||||
// define HIP_ENABLE_PRINTF to enable printf
|
||||
#ifdef HIP_ENABLE_PRINTF
|
||||
#define HCC_ENABLE_ACCELERATOR_PRINTF 1
|
||||
#endif
|
||||
|
||||
//---
|
||||
// Remainder of this file only compiles with HCC
|
||||
#if defined __HCC__
|
||||
#include <grid_launch.h>
|
||||
#include "hc_printf.hpp"
|
||||
//TODO-HCC-GL - change this to typedef.
|
||||
//typedef grid_launch_parm hipLaunchParm ;
|
||||
|
||||
@@ -108,13 +114,12 @@ extern int HIP_TRACE_API;
|
||||
#if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)
|
||||
// Device compile and not host compile:
|
||||
|
||||
//TODO-HCC enable __HIP_ARCH_HAS_ATOMICS__ when HCC supports these.
|
||||
// 32-bit Atomics:
|
||||
#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
|
||||
#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
|
||||
#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
|
||||
#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
|
||||
#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
|
||||
#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1)
|
||||
|
||||
// 64-bit Atomics:
|
||||
#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
|
||||
@@ -420,6 +425,20 @@ static inline __device__ void* memset(void* ptr, int val, size_t size)
|
||||
}
|
||||
|
||||
|
||||
#ifdef __HCC_ACCELERATOR__
|
||||
|
||||
#ifdef HC_FEATURE_PRINTF
|
||||
template <typename... All>
|
||||
static inline __device__ void printf(const char* format, All... all) {
|
||||
hc::printf(format, all...);
|
||||
}
|
||||
#else
|
||||
template <typename... All>
|
||||
static inline __device__ void printf(const char* format, All... all) { }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
|
||||
|
||||
|
||||
+17
-19
@@ -616,7 +616,7 @@ void ihipDevice_t::locked_reset()
|
||||
|
||||
//FIXME - Calling am_memtracker_reset is really bad since it destroyed all buffers allocated by the HCC runtime as well
|
||||
//such as the printf buffer. Re-initialze the printf buffer as a workaround for now.
|
||||
#if (__hcc_workweek__ >= 17423)
|
||||
#ifdef HC_FEATURE_PRINTF
|
||||
Kalmar::getContext()->initPrintfBuffer();
|
||||
#endif
|
||||
};
|
||||
@@ -700,26 +700,25 @@ int checkAccess(hsa_agent_t agent, hsa_amd_memory_pool_t pool)
|
||||
return access;
|
||||
}
|
||||
|
||||
hsa_status_t get_region_info(hsa_region_t region, void* data)
|
||||
hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data)
|
||||
{
|
||||
hsa_status_t err;
|
||||
hipDeviceProp_t* p_prop = reinterpret_cast<hipDeviceProp_t*>(data);
|
||||
uint32_t region_segment;
|
||||
|
||||
// Get region segment
|
||||
err = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, ®ion_segment);
|
||||
// Get pool segment
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, ®ion_segment);
|
||||
ErrorCheck(err);
|
||||
|
||||
switch(region_segment) {
|
||||
case HSA_REGION_SEGMENT_READONLY:
|
||||
err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->totalConstMem)); break;
|
||||
/* case HSA_REGION_SEGMENT_PRIVATE:
|
||||
cout<<"PRIVATE"<<endl; private segment cannot be queried */
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->totalConstMem)); break;
|
||||
case HSA_REGION_SEGMENT_GROUP:
|
||||
err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->sharedMemPerBlock)); break;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->sharedMemPerBlock));
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -750,11 +749,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
hipError_t e = hipSuccess;
|
||||
hsa_status_t err;
|
||||
|
||||
// Set some defaults in case we don't find the appropriate regions:
|
||||
prop->totalGlobalMem = 0;
|
||||
prop->totalConstMem = 0;
|
||||
prop-> maxThreadsPerMultiProcessor = 0;
|
||||
prop->regsPerBlock = 0;
|
||||
memset(prop, 0, sizeof(hipDeviceProp_t));
|
||||
|
||||
if (_hsaAgent.handle == -1) {
|
||||
return hipErrorInvalidDevice;
|
||||
@@ -854,15 +849,18 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
prop-> maxThreadsPerMultiProcessor = prop->warpSize*max_waves_per_cu;
|
||||
|
||||
// Get memory properties
|
||||
err = hsa_agent_iterate_regions(_hsaAgent, get_region_info, prop);
|
||||
err = hsa_amd_agent_iterate_memory_pools(_hsaAgent, get_pool_info, prop);
|
||||
if (err == HSA_STATUS_INFO_BREAK) {
|
||||
err = HSA_STATUS_SUCCESS;
|
||||
}
|
||||
DeviceErrorCheck(err);
|
||||
|
||||
// Get the size of the region we are using for Accelerator Memory allocations:
|
||||
// Get the size of the pool we are using for Accelerator Memory allocations:
|
||||
hsa_region_t *am_region = static_cast<hsa_region_t*>(_acc.get_hsa_am_region());
|
||||
err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem);
|
||||
DeviceErrorCheck(err);
|
||||
// maxSharedMemoryPerMultiProcessor should be as the same as group memory size.
|
||||
// Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size.
|
||||
// Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group pool size.
|
||||
prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem;
|
||||
|
||||
// Get Max memory clock frequency
|
||||
@@ -882,7 +880,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
prop->arch.hasGlobalFloatAtomicExch = 1;
|
||||
prop->arch.hasSharedInt32Atomics = 1;
|
||||
prop->arch.hasSharedFloatAtomicExch = 1;
|
||||
prop->arch.hasFloatAtomicAdd = 0;
|
||||
prop->arch.hasFloatAtomicAdd = 1; // supported with CAS loop, but is supported
|
||||
prop->arch.hasGlobalInt64Atomics = 1;
|
||||
prop->arch.hasSharedInt64Atomics = 1;
|
||||
prop->arch.hasDoubles = 1;
|
||||
@@ -890,7 +888,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
prop->arch.hasWarpBallot = 1;
|
||||
prop->arch.hasWarpShuffle = 1;
|
||||
prop->arch.hasFunnelShift = 0; // TODO-hcc
|
||||
prop->arch.hasThreadFenceSystem = 0; // TODO-hcc
|
||||
prop->arch.hasThreadFenceSystem = 1;
|
||||
prop->arch.hasSyncThreadsExt = 0; // TODO-hcc
|
||||
prop->arch.hasSurfaceFuncs = 0; // TODO-hcc
|
||||
prop->arch.has3dGrid = 1;
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#define HIP_ENABLE_PRINTF
|
||||
|
||||
#include"test_common.h"
|
||||
|
||||
__global__ void run_printf(hipLaunchParm lp){
|
||||
printf("Hello World\n");
|
||||
}
|
||||
|
||||
int main(){
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(run_printf), dim3(1), dim3(1), 0, 0);
|
||||
hipDeviceSynchronize();
|
||||
passed();
|
||||
}
|
||||
Reference in New Issue
Block a user