@@ -101,6 +101,7 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
src/hip_device.cpp
|
||||
src/hip_error.cpp
|
||||
src/hip_event.cpp
|
||||
src/hip_ldg.cpp
|
||||
src/hip_memory.cpp
|
||||
src/hip_peer.cpp
|
||||
src/hip_stream.cpp
|
||||
|
||||
@@ -200,7 +200,7 @@ if ($needHipHcc) {
|
||||
if ($HIP_USE_SHARED_LIBRARY) {
|
||||
$HIPLDFLAGS .= " -L$HIP_PATH/lib -Wl,--rpath=$HIP_PATH/lib -lhip_hcc";
|
||||
} else {
|
||||
$HIPLDFLAGS .= " $HIP_PATH/lib/device_util.cpp.o $HIP_PATH/lib/hip_device.cpp.o $HIP_PATH/lib/hip_error.cpp.o $HIP_PATH/lib/hip_event.cpp.o $HIP_PATH/lib/hip_hcc.cpp.o $HIP_PATH/lib/hip_memory.cpp.o $HIP_PATH/lib/hip_peer.cpp.o $HIP_PATH/lib/hip_stream.cpp.o $HIP_PATH/lib/staging_buffer.cpp.o";
|
||||
$HIPLDFLAGS .= " $HIP_PATH/lib/device_util.cpp.o $HIP_PATH/lib/hip_device.cpp.o $HIP_PATH/lib/hip_error.cpp.o $HIP_PATH/lib/hip_event.cpp.o $HIP_PATH/lib/hip_hcc.cpp.o $HIP_PATH/lib/hip_memory.cpp.o $HIP_PATH/lib/hip_peer.cpp.o $HIP_PATH/lib/hip_stream.cpp.o $HIP_PATH/lib/staging_buffer.cpp.o $HIP_PATH/lib/hip_ldg.cpp.o";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
#ifndef HIP_LDG_H
|
||||
#define HIP_LDG_H
|
||||
|
||||
#if __HCC__
|
||||
#include"hip_vector_types.h"
|
||||
#include"host_defines.h"
|
||||
__device__ char __ldg(const char* );
|
||||
__device__ signed char __ldg(const signed char* );
|
||||
__device__ short __ldg(const short* );
|
||||
__device__ int __ldg(const int* );
|
||||
__device__ long __ldg(const long* );
|
||||
__device__ long long __ldg(const long long* );
|
||||
__device__ char2 __ldg(const char2* );
|
||||
__device__ char4 __ldg(const char4* );
|
||||
__device__ short2 __ldg(const short2* );
|
||||
__device__ short4 __ldg(const short4* );
|
||||
__device__ int2 __ldg(const int2* );
|
||||
__device__ int4 __ldg(const int4* );
|
||||
__device__ longlong2 __ldg(const longlong2* );
|
||||
__device__ unsigned char __ldg(const unsigned char* );
|
||||
__device__ unsigned short __ldg(const unsigned short* );
|
||||
__device__ unsigned int __ldg(const unsigned int* );
|
||||
__device__ unsigned long __ldg(const unsigned long* );
|
||||
__device__ unsigned long long __ldg(const unsigned long long* );
|
||||
__device__ uchar2 __ldg(const uchar2* );
|
||||
__device__ uchar4 __ldg(const uchar4* );
|
||||
__device__ ushort2 __ldg(const ushort2* );
|
||||
__device__ uint2 __ldg(const uint2* );
|
||||
__device__ uint4 __ldg(const uint4* );
|
||||
__device__ ulonglong2 __ldg(const ulonglong2* );
|
||||
__device__ float __ldg(const float* );
|
||||
__device__ double __ldg(const double* );
|
||||
__device__ float2 __ldg(const float2* );
|
||||
__device__ float4 __ldg(const float4* );
|
||||
__device__ double2 __ldg(const double2* );
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -56,6 +56,7 @@ extern int HIP_TRACE_API;
|
||||
#define hipLaunchParm grid_launch_parm
|
||||
#ifdef __cplusplus
|
||||
#include <hcc_detail/hip_texture.h>
|
||||
#include <hcc_detail/hip_ldg.h>
|
||||
#endif
|
||||
#include <hcc_detail/host_defines.h>
|
||||
// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
|
||||
|
||||
@@ -1236,12 +1236,12 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
|
||||
bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS);
|
||||
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
|
||||
bool srcInDeviceMem = srcPtrInfo._isInDeviceMem;
|
||||
bool dstInDeviceMem = dstPtrInfo._isInDeviceMem;
|
||||
|
||||
// Resolve default to a specific Kind so we know which algorithm to use:
|
||||
if (kind == hipMemcpyDefault) {
|
||||
bool srcInDeviceMem = (srcTracked && srcPtrInfo._isInDeviceMem);
|
||||
bool dstInDeviceMem = (dstTracked && dstPtrInfo._isInDeviceMem);
|
||||
kind = resolveMemcpyDirection(srcTracked, dstTracked, srcPtrInfo._isInDeviceMem, dstPtrInfo._isInDeviceMem);
|
||||
kind = resolveMemcpyDirection(srcTracked, dstTracked, srcInDeviceMem, dstInDeviceMem);
|
||||
};
|
||||
|
||||
hsa_signal_t depSignal;
|
||||
@@ -1259,26 +1259,26 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
|
||||
if (kind == hipMemcpyHostToDevice) {
|
||||
int depSignalCnt = preCopyCommand(crit, NULL, &depSignal, ihipCommandCopyH2D);
|
||||
if (HIP_STAGING_BUFFERS) {
|
||||
tprintf(DB_COPY1, "D2H && !dstTracked: staged copy H2D dst=%p src=%p sz=%zu\n", dst, src, sizeBytes);
|
||||
if (HIP_STAGING_BUFFERS) {
|
||||
tprintf(DB_COPY1, "D2H && !dstTracked: staged copy H2D dst=%p src=%p sz=%zu\n", dst, src, sizeBytes);
|
||||
|
||||
if (HIP_PININPLACE) {
|
||||
device->_staging_buffer[0]->CopyHostToDevicePinInPlace(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
|
||||
} else {
|
||||
device->_staging_buffer[0]->CopyHostToDevice(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
|
||||
}
|
||||
if (HIP_PININPLACE) {
|
||||
device->_staging_buffer[0]->CopyHostToDevicePinInPlace(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
|
||||
} else {
|
||||
device->_staging_buffer[0]->CopyHostToDevice(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
|
||||
}
|
||||
|
||||
// The copy waits for inputs and then completes before returning so can reset queue to empty:
|
||||
this->wait(crit, true);
|
||||
} else {
|
||||
// TODO - remove, slow path.
|
||||
tprintf(DB_COPY1, "H2D && ! srcTracked: am_copy dst=%p src=%p sz=%zu\n", dst, src, sizeBytes);
|
||||
// The copy waits for inputs and then completes before returning so can reset queue to empty:
|
||||
this->wait(crit, true);
|
||||
} else {
|
||||
// TODO - remove, slow path.
|
||||
tprintf(DB_COPY1, "H2D && ! srcTracked: am_copy dst=%p src=%p sz=%zu\n", dst, src, sizeBytes);
|
||||
#if USE_AV_COPY
|
||||
_av.copy(src,dst,sizeBytes);
|
||||
_av.copy(src,dst,sizeBytes);
|
||||
#else
|
||||
hc::am_copy(dst, src, sizeBytes);
|
||||
hc::am_copy(dst, src, sizeBytes);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
} else if (kind == hipMemcpyDeviceToHost) {
|
||||
int depSignalCnt = preCopyCommand(crit, NULL, &depSignal, ihipCommandCopyD2H);
|
||||
if (HIP_STAGING_BUFFERS) {
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
#include"hcc_detail/hip_ldg.h"
|
||||
|
||||
__device__ char __ldg(const char* ptr)
|
||||
{
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
__device__ signed char __ldg(const signed char* ptr)
|
||||
{
|
||||
return ptr[0];
|
||||
}
|
||||
|
||||
__device__ short __ldg(const short* ptr)
|
||||
{
|
||||
return ptr[0];
|
||||
}
|
||||
|
||||
__device__ int __ldg(const int* ptr)
|
||||
{
|
||||
return ptr[0];
|
||||
}
|
||||
|
||||
__device__ long long __ldg(const long long* ptr)
|
||||
{
|
||||
return ptr[0];
|
||||
}
|
||||
|
||||
|
||||
__device__ int2 __ldg(const int2* ptr)
|
||||
{
|
||||
return ptr[0];
|
||||
}
|
||||
|
||||
__device__ int4 __ldg(const int4* ptr)
|
||||
{
|
||||
return ptr[0];
|
||||
}
|
||||
|
||||
__device__ float __ldg(const float* ptr)
|
||||
{
|
||||
return ptr[0];
|
||||
}
|
||||
@@ -9,13 +9,13 @@ OBJECTS=$(SOURCES:.cpp=.o)
|
||||
EXECUTABLE=hipMemset
|
||||
|
||||
$(EXECUTABLE): $(HIP_DEPS) $(OBJECTS)
|
||||
$(HCC) $(HLDFLAGS) $(OBJECTS) -o $@
|
||||
$(HCC) $(HLDFLAGS) $(OBJECTS) -o $@
|
||||
|
||||
.cpp.o:
|
||||
$(HCC) $(HCFLAGS) -c $< -o $@
|
||||
@$(CC) -MM -MT $@ $(CFLAGS) -c $< > $(@:.o=.d)
|
||||
$(HCC) $(HCFLAGS) -c $< -o $@
|
||||
@$(CC) -MM -MT $@ $(CFLAGS) -c $< > $(@:.o=.d)
|
||||
|
||||
clean: hip_clean
|
||||
rm -rf $(EXECUTABLE) $(OBJECTS)
|
||||
rm -rf $(EXECUTABLE) $(OBJECTS)
|
||||
|
||||
include $(HIP_PATH)/examples/common/hip.epilogue.make
|
||||
|
||||
@@ -30,18 +30,19 @@ THE SOFTWARE.
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
|
||||
#define WIDTH 1024
|
||||
#define HEIGHT 1024
|
||||
#define WIDTH 8
|
||||
#define HEIGHT 8
|
||||
|
||||
#define NUM (WIDTH*HEIGHT)
|
||||
|
||||
#define THREADS_PER_BLOCK_X 16
|
||||
#define THREADS_PER_BLOCK_Y 16
|
||||
#define THREADS_PER_BLOCK_X 8
|
||||
#define THREADS_PER_BLOCK_Y 8
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
template<typename T>
|
||||
__global__ void
|
||||
vectoradd_float(hipLaunchParm lp,
|
||||
float* a, const float* bm, const float* cm, int width, int height)
|
||||
T* a, const T* bm, const T* cm, int width, int height)
|
||||
|
||||
{
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
@@ -72,46 +73,35 @@ __kernel__ void vectoradd_float(float* a, const float* b, const float* c, int wi
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
|
||||
float* hostA;
|
||||
float* hostB;
|
||||
float* hostC;
|
||||
|
||||
float* deviceA;
|
||||
float* deviceB;
|
||||
float* deviceC;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
cout << " System minor " << devProp.minor << endl;
|
||||
cout << " System major " << devProp.major << endl;
|
||||
cout << " agent prop name " << devProp.name << endl;
|
||||
|
||||
|
||||
|
||||
cout << "__ldg " << endl ;
|
||||
template<typename T>
|
||||
bool dataTypesRun(){
|
||||
T* hostA;
|
||||
T* hostB;
|
||||
T* hostC;
|
||||
|
||||
T* deviceA;
|
||||
T* deviceB;
|
||||
T* deviceC;
|
||||
|
||||
int i;
|
||||
int errors;
|
||||
|
||||
hostA = (float*)malloc(NUM * sizeof(float));
|
||||
hostB = (float*)malloc(NUM * sizeof(float));
|
||||
hostC = (float*)malloc(NUM * sizeof(float));
|
||||
hostA = (T*)malloc(NUM * sizeof(T));
|
||||
hostB = (T*)malloc(NUM * sizeof(T));
|
||||
hostC = (T*)malloc(NUM * sizeof(T));
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
hostB[i] = (float)i;
|
||||
hostC[i] = (float)i*100.0f;
|
||||
hostB[i] = (T)i;
|
||||
hostC[i] = (T)i;
|
||||
}
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(float)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(float)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(float)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T)));
|
||||
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(float), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(float), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(T), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(T), hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
hipLaunchKernel(vectoradd_float,
|
||||
@@ -121,8 +111,9 @@ int main() {
|
||||
deviceA ,deviceB ,deviceC ,WIDTH ,HEIGHT);
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(float), hipMemcpyDeviceToHost));
|
||||
HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(T), hipMemcpyDeviceToHost));
|
||||
|
||||
bool ret = false;
|
||||
// verify the results
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
@@ -132,8 +123,10 @@ int main() {
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
ret = false;
|
||||
} else {
|
||||
printf ("PASSED!\n");
|
||||
ret = true;
|
||||
}
|
||||
|
||||
HIP_ASSERT(hipFree(deviceA));
|
||||
@@ -144,6 +137,25 @@ int main() {
|
||||
free(hostB);
|
||||
free(hostC);
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
cout << " System minor " << devProp.minor << endl;
|
||||
cout << " System major " << devProp.major << endl;
|
||||
cout << " agent prop name " << devProp.name << endl;
|
||||
|
||||
int errors;
|
||||
errors = dataTypesRun<char>();
|
||||
errors = dataTypesRun<signed char>();
|
||||
errors = dataTypesRun<short>();
|
||||
errors = dataTypesRun<int>();
|
||||
cout << "__ldg " << endl ;
|
||||
//hipResetDefaultAccelerator();
|
||||
|
||||
return errors;
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador