Merge branch 'privatestaging' into p2p
Conflicts:
include/hcc_detail/hip_hcc.h
src/hip_hcc.cpp
[ROCm/clr commit: 8d26dfcde3]
This commit is contained in:
@@ -451,7 +451,7 @@ private:
|
||||
void waitCopy(LockedAccessor_StreamCrit_t &crit, ihipSignal_t *signal);
|
||||
|
||||
// The unsigned return is hipMemcpyKind
|
||||
unsigned resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem);
|
||||
unsigned resolveMemcpyDirection(bool srcTracked, bool dstTracked, bool srcInDeviceMem, bool dstInDeviceMem);
|
||||
void setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType, hsa_agent_t *srcAgent, hsa_agent_t *dstAgent);
|
||||
|
||||
unsigned _device_index; // index into the g_device array
|
||||
|
||||
@@ -401,6 +401,13 @@ __device__ int __all( int input);
|
||||
__device__ int __any( int input);
|
||||
__device__ unsigned long long int __ballot( int input);
|
||||
|
||||
// __ldg function
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T __ldg( const T * addr)
|
||||
{
|
||||
return *addr;
|
||||
}
|
||||
|
||||
// warp shuffle functions
|
||||
#ifdef __cplusplus
|
||||
|
||||
|
||||
@@ -1150,22 +1150,30 @@ void ihipSetTs(hipEvent_t e)
|
||||
|
||||
|
||||
// Resolve hipMemcpyDefault to a known type.
|
||||
unsigned ihipStream_t::resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem)
|
||||
unsigned ihipStream_t::resolveMemcpyDirection(bool srcTracked, bool dstTracked, bool srcInDeviceMem, bool dstInDeviceMem)
|
||||
{
|
||||
hipMemcpyKind kind = hipMemcpyDefault;
|
||||
|
||||
if (!srcInDeviceMem && !dstInDeviceMem) {
|
||||
if(!srcTracked && !dstTracked)
|
||||
{
|
||||
kind = hipMemcpyHostToHost;
|
||||
} else if (!srcInDeviceMem && dstInDeviceMem) {
|
||||
kind = hipMemcpyHostToDevice;
|
||||
} else if (srcInDeviceMem && !dstInDeviceMem) {
|
||||
kind = hipMemcpyDeviceToHost;
|
||||
} else if (srcInDeviceMem && dstInDeviceMem) {
|
||||
kind = hipMemcpyDeviceToDevice;
|
||||
}
|
||||
if(!srcTracked && dstTracked)
|
||||
{
|
||||
if(dstInDeviceMem) { kind = hipMemcpyHostToDevice; }
|
||||
else{ kind = hipMemcpyHostToHost; }
|
||||
}
|
||||
if (srcTracked && !dstTracked) {
|
||||
if(srcInDeviceMem) { kind = hipMemcpyDeviceToHost; }
|
||||
else { kind = hipMemcpyHostToHost; }
|
||||
}
|
||||
if (srcTracked && dstTracked) {
|
||||
if(srcInDeviceMem && dstInDeviceMem) { kind = hipMemcpyDeviceToDevice; }
|
||||
if(srcInDeviceMem && !dstInDeviceMem) { kind = hipMemcpyDeviceToHost; }
|
||||
if(!srcInDeviceMem && !dstInDeviceMem) { kind = hipMemcpyHostToHost; }
|
||||
if(!srcInDeviceMem && dstInDeviceMem) { kind = hipMemcpyHostToDevice; }
|
||||
}
|
||||
|
||||
assert (kind != hipMemcpyDefault);
|
||||
|
||||
return kind;
|
||||
}
|
||||
|
||||
@@ -1208,12 +1216,11 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS);
|
||||
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
|
||||
|
||||
|
||||
// Resolve default to a specific Kind so we know which algorithm to use:
|
||||
if (kind == hipMemcpyDefault) {
|
||||
bool srcInDeviceMem = (srcTracked && srcPtrInfo._isInDeviceMem);
|
||||
bool dstInDeviceMem = (dstTracked && dstPtrInfo._isInDeviceMem);
|
||||
kind = resolveMemcpyDirection(srcInDeviceMem, dstInDeviceMem);
|
||||
kind = resolveMemcpyDirection(srcTracked, dstTracked, srcPtrInfo._isInDeviceMem, dstPtrInfo._isInDeviceMem);
|
||||
};
|
||||
|
||||
hsa_signal_t depSignal;
|
||||
@@ -1229,7 +1236,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
#endif
|
||||
}
|
||||
|
||||
if ((kind == hipMemcpyHostToDevice) && (!srcTracked)) {
|
||||
if (kind == hipMemcpyHostToDevice) {
|
||||
int depSignalCnt = preCopyCommand(crit, NULL, &depSignal, ihipCommandCopyH2D);
|
||||
if (HIP_STAGING_BUFFERS) {
|
||||
tprintf(DB_COPY1, "D2H && !dstTracked: staged copy H2D dst=%p src=%p sz=%zu\n", dst, src, sizeBytes);
|
||||
@@ -1251,7 +1258,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
hc::am_copy(dst, src, sizeBytes);
|
||||
#endif
|
||||
}
|
||||
} else if ((kind == hipMemcpyDeviceToHost) && (!dstTracked)) {
|
||||
} else if (kind == hipMemcpyDeviceToHost) {
|
||||
int depSignalCnt = preCopyCommand(crit, NULL, &depSignal, ihipCommandCopyD2H);
|
||||
if (HIP_STAGING_BUFFERS) {
|
||||
tprintf(DB_COPY1, "D2H && !dstTracked: staged copy D2H dst=%p src=%p sz=%zu\n", dst, src, sizeBytes);
|
||||
@@ -1374,7 +1381,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
|
||||
if (kind == hipMemcpyDefault) {
|
||||
bool srcInDeviceMem = (srcTracked && srcPtrInfo._isInDeviceMem);
|
||||
bool dstInDeviceMem = (dstTracked && dstPtrInfo._isInDeviceMem);
|
||||
kind = resolveMemcpyDirection(srcInDeviceMem, dstInDeviceMem);
|
||||
kind = resolveMemcpyDirection(srcTracked, dstTracked, srcPtrInfo._isInDeviceMem, dstPtrInfo._isInDeviceMem);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -156,10 +156,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
if(device){
|
||||
if(flags == hipHostMallocDefault){
|
||||
*ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
|
||||
if(sizeBytes && (*ptr == NULL)){
|
||||
if(sizeBytes < 1 && (*ptr == NULL)){
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}else{
|
||||
hc::am_memtracker_update(*ptr, device->_device_index, 0);
|
||||
hc::am_memtracker_update(*ptr, device->_device_index, amHostPinned);
|
||||
}
|
||||
tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr);
|
||||
} else if(flags & hipHostMallocMapped){
|
||||
|
||||
@@ -41,7 +41,7 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe
|
||||
*canAccessPeer = 0;
|
||||
} else {
|
||||
#if USE_PEER_TO_PEER>=2
|
||||
*canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc);
|
||||
*canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc);
|
||||
#else
|
||||
*canAccessPeer = 0;
|
||||
#endif
|
||||
@@ -68,10 +68,11 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId)
|
||||
auto peerDevice = ihipGetDevice(peerDeviceId);
|
||||
if ((thisDevice != NULL) && (peerDevice != NULL)) {
|
||||
#if USE_PEER_TO_PEER>=2
|
||||
bool canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc);
|
||||
bool canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc);
|
||||
#else
|
||||
bool canAccessPeer = 0;
|
||||
#endif
|
||||
|
||||
if (! canAccessPeer) {
|
||||
err = hipErrorInvalidDevice; // P2P not allowed between these devices.
|
||||
} else if (thisDevice == peerDevice) {
|
||||
|
||||
@@ -145,6 +145,7 @@ make_hip_executable (hip_popc hip_popc.cpp)
|
||||
make_hip_executable (hip_clz hip_clz.cpp)
|
||||
make_hip_executable (hip_brev hip_brev.cpp)
|
||||
make_hip_executable (hip_ffs hip_ffs.cpp)
|
||||
make_hip_executable (hip_ldg hip_ldg.cpp)
|
||||
make_hip_executable (hipGetDeviceAttribute hipGetDeviceAttribute.cpp)
|
||||
make_hip_executable (hipEnvVar hipEnvVar.cpp)
|
||||
make_hip_executable (hipEnvVarDriver hipEnvVarDriver.cpp)
|
||||
@@ -175,7 +176,7 @@ make_hip_executable (hipFuncGetDevice hipFuncGetDevice.cpp)
|
||||
make_hip_executable (hipFuncSetDevice hipFuncSetDevice.cpp)
|
||||
make_hip_executable (hipFuncDeviceSynchronize hipFuncDeviceSynchronize.cpp)
|
||||
make_hip_executable (hipPeerToPeer_simple hipPeerToPeer_simple.cpp)
|
||||
|
||||
make_hip_executable (hipMemcpyAll hipMemcpyAll.cpp)
|
||||
make_hip_executable (hipMultiThreadDevice hipMultiThreadDevice.cpp)
|
||||
|
||||
make_test(hip_ballot " " )
|
||||
@@ -184,6 +185,7 @@ make_test(hip_popc " " )
|
||||
make_test(hip_brev " " )
|
||||
make_test(hip_clz " " )
|
||||
make_test(hip_ffs " " )
|
||||
make_test(hip_ldg " " )
|
||||
make_test(hipEventRecord --iterations 10)
|
||||
make_test(hipMemset " " )
|
||||
make_test(hipMemset --N 10 --memsetval 0x42 ) # small copy, just 10 bytes.
|
||||
@@ -191,6 +193,7 @@ make_test(hipMemset --N 10013 --memsetval 0x5a ) # oddball size.
|
||||
make_test(hipMemset --N 256M --memsetval 0xa6 ) # big copy
|
||||
make_test(hipGridLaunch " " )
|
||||
make_test(hipEnvVarDriver " " )
|
||||
make_test(hipMemcpyAll " ")
|
||||
#TODO -reenable
|
||||
#make_test(hipPointerAttrib " " )
|
||||
#make_test(hipMultiThreadStreams1 " " ) Fails if 0x3 specified, passes otherwise.
|
||||
|
||||
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include<hip_runtime.h>
|
||||
#include<iostream>
|
||||
#include<assert.h>
|
||||
#include"test_common.h"
|
||||
|
||||
#define len 1024*1024
|
||||
#define size len * sizeof(float)
|
||||
|
||||
template<typename T>
|
||||
void hmemset(T *ptr, T value)
|
||||
{
|
||||
for(int i=0;i<len;i++){
|
||||
ptr[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
|
||||
int num;
|
||||
hipGetDeviceCount(&num);
|
||||
if(num < 2)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
float *h0, *h1;
|
||||
float *ph0, *ph1;
|
||||
float *d0, *d1;
|
||||
h0 = new float[len];
|
||||
h1 = new float[len];
|
||||
hmemset(h0, 1.0f);
|
||||
int gpu0 = 0, gpu1 = 1;
|
||||
hipSetDevice(gpu0);
|
||||
hipHostMalloc((void**)&ph0, size);
|
||||
hipMalloc(&d0, size);
|
||||
hipSetDevice(gpu1);
|
||||
hipHostMalloc((void**)&ph1, size);
|
||||
hipMalloc(&d1, size);
|
||||
hipSetDevice(gpu0);
|
||||
|
||||
|
||||
|
||||
hipMemcpy(h1, h0, size, hipMemcpyDefault);
|
||||
hipMemcpy(ph0, h1, size, hipMemcpyDefault);
|
||||
hipMemcpy(ph1, ph0, size, hipMemcpyDefault);
|
||||
assert(h0[0] == ph1[0]);
|
||||
hmemset(ph1, 0.0f);
|
||||
hipMemcpy(h0, ph1, size, hipMemcpyDefault);
|
||||
assert(h0[0] == 0.0f);
|
||||
|
||||
|
||||
|
||||
|
||||
hipSetDevice(gpu0);
|
||||
hmemset(ph0, 2.0f);
|
||||
hipMemcpy(d0, ph0, size, hipMemcpyDefault);
|
||||
hipMemcpy(h0, d0, size, hipMemcpyDefault);
|
||||
|
||||
assert(h0[0] == ph0[0]);
|
||||
hmemset(h0, 3.0f);
|
||||
hipMemcpy(d0, h0, size, hipMemcpyDefault);
|
||||
|
||||
hipMemcpy(ph0, d0, size, hipMemcpyDefault);
|
||||
|
||||
assert(h0[0] == ph0[0]);
|
||||
|
||||
hipSetDevice(gpu1);
|
||||
hmemset(ph1, 2.0f);
|
||||
hipMemcpy(d1, ph1, size, hipMemcpyDefault);
|
||||
|
||||
hipMemcpy(h1, d1, size, hipMemcpyDefault);
|
||||
|
||||
assert(h1[0] == ph1[0]);
|
||||
hmemset(h1, 3.0f);
|
||||
hipMemcpy(d1, h1, size, hipMemcpyDefault);
|
||||
|
||||
hipMemcpy(ph1, d1, size, hipMemcpyDefault);
|
||||
|
||||
assert(h1[0] == ph1[0]);
|
||||
|
||||
hipSetDevice(gpu0);
|
||||
hmemset(ph0, 4.0f);
|
||||
hipMemcpy(d0, ph0, size, hipMemcpyDefault);
|
||||
|
||||
hipMemcpy(ph0, d0, size, hipMemcpyDefault);
|
||||
|
||||
hipMemcpy(h0, d0, size, hipMemcpyDefault);
|
||||
|
||||
assert(ph0[0] == 4.0f);
|
||||
assert(h0[0] == 4.0f);
|
||||
|
||||
hipSetDevice(gpu1);
|
||||
hmemset(ph1, 5.0f);
|
||||
hipMemcpy(d1, ph1, size, hipMemcpyDefault);
|
||||
|
||||
hipMemcpy(ph1, d1, size, hipMemcpyDefault);
|
||||
|
||||
hipMemcpy(h1, d1, size, hipMemcpyDefault);
|
||||
|
||||
assert(ph1[0] == 5.0f);
|
||||
assert(h1[0] == 5.0f);
|
||||
|
||||
hipSetDevice(gpu0);
|
||||
hipMemcpy(d0, ph1, size, hipMemcpyDefault);
|
||||
|
||||
hipMemcpy(d1, d0, size, hipMemcpyDefault);
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include <stdlib.h>
|
||||
#include<iostream>
|
||||
#include "hip_runtime.h"
|
||||
|
||||
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
|
||||
#define WIDTH 1024
|
||||
#define HEIGHT 1024
|
||||
|
||||
#define NUM (WIDTH*HEIGHT)
|
||||
|
||||
#define THREADS_PER_BLOCK_X 16
|
||||
#define THREADS_PER_BLOCK_Y 16
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
__global__ void
|
||||
vectoradd_float(hipLaunchParm lp,
|
||||
float* a, const float* bm, const float* cm, int width, int height)
|
||||
|
||||
{
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
a[i] = __ldg(&bm[i]) + __ldg(&cm[i]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#if 0
|
||||
__kernel__ void vectoradd_float(float* a, const float* b, const float* c, int width, int height) {
|
||||
|
||||
|
||||
int x = blockDimX * blockIdx.x + threadIdx.x;
|
||||
int y = blockDimY * blockIdy.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
a[i] = b[i] + c[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
|
||||
float* hostA;
|
||||
float* hostB;
|
||||
float* hostC;
|
||||
|
||||
float* deviceA;
|
||||
float* deviceB;
|
||||
float* deviceC;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
cout << " System minor " << devProp.minor << endl;
|
||||
cout << " System major " << devProp.major << endl;
|
||||
cout << " agent prop name " << devProp.name << endl;
|
||||
|
||||
|
||||
|
||||
cout << "__ldg " << endl ;
|
||||
|
||||
|
||||
int i;
|
||||
int errors;
|
||||
|
||||
hostA = (float*)malloc(NUM * sizeof(float));
|
||||
hostB = (float*)malloc(NUM * sizeof(float));
|
||||
hostC = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
hostB[i] = (float)i;
|
||||
hostC[i] = (float)i*100.0f;
|
||||
}
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(float)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(float)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(float)));
|
||||
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(float), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
hipLaunchKernel(vectoradd_float,
|
||||
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0,
|
||||
deviceA ,deviceB ,deviceC ,WIDTH ,HEIGHT);
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
// verify the results
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (hostA[i] != (hostB[i] + hostC[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
} else {
|
||||
printf ("PASSED!\n");
|
||||
}
|
||||
|
||||
HIP_ASSERT(hipFree(deviceA));
|
||||
HIP_ASSERT(hipFree(deviceB));
|
||||
HIP_ASSERT(hipFree(deviceC));
|
||||
|
||||
free(hostA);
|
||||
free(hostB);
|
||||
free(hostC);
|
||||
|
||||
//hipResetDefaultAccelerator();
|
||||
|
||||
return errors;
|
||||
}
|
||||
Reference in New Issue
Block a user