SWDEV-286322 - Fix EOL in files
Change-Id: I1746e80aceeeaa4fc5df65c858f4816d99010186
Bu işleme şunda yer alıyor:
@@ -1,308 +1,308 @@
|
||||
/*
|
||||
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include "test_common.h"
|
||||
#include <hip/hip_vector_types.h>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define NUM_TYPES 3
|
||||
vector<string> types= {"float", "float2", "float4"};
|
||||
vector<unsigned int> typeSizes = {4, 8, 16};
|
||||
|
||||
#define NUM_SIZES 12
|
||||
vector<unsigned int> sizes = {1, 2, 4, 8, 16, 32,
|
||||
64, 128, 256, 512, 1024, 2048};
|
||||
|
||||
#define NUM_BUFS 6
|
||||
#define MAX_BUFS (1 << (NUM_BUFS - 1))
|
||||
|
||||
template <typename T>
|
||||
__global__ void sampleRate(T * outBuffer, unsigned int inBufSize, unsigned int writeIt,
|
||||
T **inBuffer, int numBufs) {
|
||||
|
||||
uint gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
uint inputIdx = gid % inBufSize;
|
||||
|
||||
T tmp = (T)0.0f;
|
||||
for(int i = 0; i < numBufs; i++) {
|
||||
tmp += *(*(inBuffer+i)+inputIdx);
|
||||
}
|
||||
|
||||
if (writeIt*(unsigned int)tmp.x) {
|
||||
outBuffer[gid] = tmp;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__global__ void sampleRateFloat(T * outBuffer, unsigned int inBufSize, unsigned int writeIt,
|
||||
T ** inBuffer, int numBufs) {
|
||||
|
||||
uint gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
uint inputIdx = gid % inBufSize;
|
||||
|
||||
T tmp = (T)0.0f;
|
||||
|
||||
for(int i = 0; i < numBufs; i++) {
|
||||
tmp += *((*inBuffer+i)+inputIdx);
|
||||
}
|
||||
|
||||
if (writeIt*(unsigned int)tmp) {
|
||||
outBuffer[gid] = tmp;
|
||||
}
|
||||
};
|
||||
|
||||
class hipPerfSampleRate {
|
||||
public:
|
||||
hipPerfSampleRate();
|
||||
~hipPerfSampleRate();
|
||||
|
||||
void open(void);
|
||||
void run(unsigned int testCase);
|
||||
void close(void);
|
||||
|
||||
// array of funtion pointers
|
||||
typedef void (hipPerfSampleRate::*funPtr)(void * outBuffer, unsigned int
|
||||
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
|
||||
int threads_per_block);
|
||||
|
||||
// Wrappers
|
||||
void float_kernel(void * outBuffer, unsigned int
|
||||
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
|
||||
int threads_per_block);
|
||||
|
||||
void float2_kernel(void * outBuffer, unsigned int
|
||||
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
|
||||
int threads_per_block);
|
||||
|
||||
void float4_kernel(void * outBuffer, unsigned int
|
||||
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
|
||||
int threads_per_block);
|
||||
|
||||
private:
|
||||
void setData(void *ptr, unsigned int value);
|
||||
void checkData(uint *ptr);
|
||||
|
||||
unsigned int width_;
|
||||
unsigned int bufSize_;
|
||||
unsigned long long totalIters = 0;
|
||||
int numCUs;
|
||||
|
||||
unsigned int outBufSize_;
|
||||
static const unsigned int MAX_ITERATIONS = 25;
|
||||
unsigned int numBufs_;
|
||||
unsigned int typeIdx_;
|
||||
};
|
||||
|
||||
|
||||
hipPerfSampleRate::hipPerfSampleRate() {}
|
||||
|
||||
hipPerfSampleRate::~hipPerfSampleRate() {}
|
||||
|
||||
void hipPerfSampleRate::open(void) {
|
||||
|
||||
int nGpu = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&nGpu));
|
||||
if (nGpu < 1) {
|
||||
std::cout << "info: didn't find any GPU! skipping the test!\n";
|
||||
passed();
|
||||
return;
|
||||
}
|
||||
|
||||
int deviceId = 0;
|
||||
hipDeviceProp_t props = {0};
|
||||
props = {0};
|
||||
HIPCHECK(hipSetDevice(deviceId));
|
||||
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
|
||||
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
|
||||
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
|
||||
<< std::endl;
|
||||
numCUs = props.multiProcessorCount;
|
||||
}
|
||||
|
||||
|
||||
void hipPerfSampleRate::close() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Wrappers for the kernel launches
|
||||
void hipPerfSampleRate::float_kernel(void * outBuffer, unsigned int inBufSize,
|
||||
unsigned int writeIt, void **inBuffer,
|
||||
int numBufs, int grids, int blocks, int threads_per_block) {
|
||||
|
||||
hipLaunchKernelGGL(sampleRateFloat<float>, dim3(grids, grids, grids), dim3 (blocks), 0, 0,
|
||||
(float*)outBuffer, inBufSize, writeIt, (float**)inBuffer, numBufs);
|
||||
|
||||
}
|
||||
|
||||
void hipPerfSampleRate::float2_kernel(void * outBuffer, unsigned int inBufSize,
|
||||
unsigned int writeIt, void **inBuffer,
|
||||
int grids, int blocks, int threads_per_block, int numBufs) {
|
||||
|
||||
hipLaunchKernelGGL(sampleRate<float2>, dim3(grids, grids, grids), dim3(blocks), 0, 0,
|
||||
(float2 *)outBuffer, inBufSize, writeIt, (float2**)inBuffer, numBufs);
|
||||
}
|
||||
|
||||
void hipPerfSampleRate::float4_kernel(void * outBuffer, unsigned int inBufSize,
|
||||
unsigned int writeIt, void **inBuffer,
|
||||
int grids, int blocks, int threads_per_block, int numBufs) {
|
||||
|
||||
hipLaunchKernelGGL(sampleRate<float4>, dim3(grids, grids, grids), dim3(blocks), 0, 0,
|
||||
(float4 *) outBuffer, inBufSize, writeIt, (float4**)inBuffer, numBufs);
|
||||
}
|
||||
|
||||
void hipPerfSampleRate::run(unsigned int test) {
|
||||
|
||||
funPtr p[] = {&hipPerfSampleRate::float_kernel, &hipPerfSampleRate::float2_kernel,
|
||||
&hipPerfSampleRate::float4_kernel};
|
||||
|
||||
// We compute a square domain
|
||||
width_ = sizes[test % NUM_SIZES];
|
||||
typeIdx_ = (test / NUM_SIZES) % NUM_TYPES;
|
||||
bufSize_ = width_ * width_ * typeSizes[typeIdx_];
|
||||
numBufs_ = (1 << (test / (NUM_SIZES * NUM_TYPES)));
|
||||
|
||||
void * hOutPtr;
|
||||
void * dOutPtr;
|
||||
void * hInPtr[numBufs_];
|
||||
void ** dPtr;
|
||||
void * dInPtr[numBufs_];
|
||||
|
||||
outBufSize_ =
|
||||
sizes[NUM_SIZES - 1] * sizes[NUM_SIZES - 1] * typeSizes[NUM_TYPES - 1];
|
||||
|
||||
// Allocate memory on the host and device
|
||||
HIPCHECK(hipHostMalloc((void **)&hOutPtr, outBufSize_, hipHostMallocDefault));
|
||||
setData((void *)hOutPtr, 0xdeadbeef);
|
||||
HIPCHECK(hipMalloc((uint **)&dOutPtr, outBufSize_));
|
||||
|
||||
// Allocate 2D array in Device
|
||||
hipMalloc((void **)&dPtr, numBufs_* sizeof(void *));
|
||||
|
||||
for (uint i = 0; i < numBufs_; i++) {
|
||||
HIPCHECK(hipHostMalloc((void **)&hInPtr[i], bufSize_, hipHostMallocDefault));
|
||||
HIPCHECK(hipMalloc((uint **)&dInPtr[i], bufSize_));
|
||||
setData(hInPtr[i], 0x3f800000);
|
||||
}
|
||||
|
||||
// Populate array of pointers with array addresses
|
||||
hipMemcpy(dPtr, dInPtr, numBufs_* sizeof(void *), hipMemcpyHostToDevice);
|
||||
|
||||
// Copy memory from host to device
|
||||
for (uint i = 0; i < numBufs_; i++) {
|
||||
HIPCHECK(hipMemcpy(dInPtr[i], hInPtr[i], bufSize_, hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
HIPCHECK(hipMemcpy(dOutPtr, hOutPtr, outBufSize_, hipMemcpyHostToDevice));
|
||||
|
||||
// Prepare kernel launch parameters
|
||||
// outBufSize_/sizeof(uint) - Grid size in 3D
|
||||
int grids = 64;
|
||||
int blocks = 64;
|
||||
int threads_per_block = 1;
|
||||
|
||||
unsigned int maxIter = MAX_ITERATIONS * (MAX_BUFS / numBufs_);
|
||||
unsigned int sizeDW = width_ * width_;
|
||||
unsigned int writeIt = 0;
|
||||
|
||||
int idx = 0;
|
||||
|
||||
if (!types[typeIdx_].compare("float")) {
|
||||
idx = 0;
|
||||
}
|
||||
else if(!types[typeIdx_].compare("float2")) {
|
||||
idx = 1;
|
||||
}
|
||||
else if(!types[typeIdx_].compare("float4")) {
|
||||
idx = 2;
|
||||
}
|
||||
|
||||
|
||||
// Time the kernel execution
|
||||
auto all_start = std::chrono::steady_clock::now();
|
||||
for (uint i = 0; i < maxIter; i++) {
|
||||
(this->*p[idx]) ((void *)dOutPtr, sizeDW, writeIt, dPtr, numBufs_, grids, blocks,
|
||||
threads_per_block);
|
||||
}
|
||||
|
||||
hipDeviceSynchronize();
|
||||
auto all_end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> all_kernel_time = all_end - all_start;
|
||||
|
||||
double perf = ((double)outBufSize_ * numBufs_ * (double)maxIter * (double)(1e-09)) /
|
||||
all_kernel_time.count();
|
||||
|
||||
cout << "Domain " << sizes[NUM_SIZES - 1] << "x"<< sizes[NUM_SIZES - 1] << " bufs "
|
||||
<< numBufs_ << " " << types[typeIdx_] << " " << width_<<"x"<<width_<< " (GB/s) "
|
||||
<< perf << endl;
|
||||
|
||||
HIPCHECK(hipFree(dOutPtr));
|
||||
|
||||
// Free host and device memory
|
||||
for (uint i = 0; i < numBufs_; i++) {
|
||||
HIPCHECK(hipFree(hInPtr[i]));
|
||||
HIPCHECK(hipFree(dInPtr[i]));
|
||||
}
|
||||
|
||||
HIPCHECK(hipFree(hOutPtr));
|
||||
HIPCHECK(hipFree(dPtr));
|
||||
}
|
||||
|
||||
|
||||
void hipPerfSampleRate::setData(void *ptr, unsigned int value) {
|
||||
unsigned int *ptr2 = (unsigned int *)ptr;
|
||||
for (unsigned int i = 0; i < bufSize_ / sizeof(unsigned int); i++) {
|
||||
ptr2[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void hipPerfSampleRate::checkData(uint *ptr) {
|
||||
for (unsigned int i = 0; i < outBufSize_ / sizeof(float); i++) {
|
||||
if (ptr[i] != (float)numBufs_) {
|
||||
cout << "Data validation failed at "<< i << " Got "<< ptr[i] << ", expected "
|
||||
<< (float)numBufs_;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
hipPerfSampleRate sampleTypes;
|
||||
|
||||
sampleTypes.open();
|
||||
|
||||
for (unsigned int testCase = 0; testCase < 216 ; testCase+=36) {
|
||||
sampleTypes.run(testCase);
|
||||
}
|
||||
|
||||
|
||||
passed();
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include "test_common.h"
|
||||
#include <hip/hip_vector_types.h>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define NUM_TYPES 3
|
||||
vector<string> types= {"float", "float2", "float4"};
|
||||
vector<unsigned int> typeSizes = {4, 8, 16};
|
||||
|
||||
#define NUM_SIZES 12
|
||||
vector<unsigned int> sizes = {1, 2, 4, 8, 16, 32,
|
||||
64, 128, 256, 512, 1024, 2048};
|
||||
|
||||
#define NUM_BUFS 6
|
||||
#define MAX_BUFS (1 << (NUM_BUFS - 1))
|
||||
|
||||
template <typename T>
|
||||
__global__ void sampleRate(T * outBuffer, unsigned int inBufSize, unsigned int writeIt,
|
||||
T **inBuffer, int numBufs) {
|
||||
|
||||
uint gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
uint inputIdx = gid % inBufSize;
|
||||
|
||||
T tmp = (T)0.0f;
|
||||
for(int i = 0; i < numBufs; i++) {
|
||||
tmp += *(*(inBuffer+i)+inputIdx);
|
||||
}
|
||||
|
||||
if (writeIt*(unsigned int)tmp.x) {
|
||||
outBuffer[gid] = tmp;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__global__ void sampleRateFloat(T * outBuffer, unsigned int inBufSize, unsigned int writeIt,
|
||||
T ** inBuffer, int numBufs) {
|
||||
|
||||
uint gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
uint inputIdx = gid % inBufSize;
|
||||
|
||||
T tmp = (T)0.0f;
|
||||
|
||||
for(int i = 0; i < numBufs; i++) {
|
||||
tmp += *((*inBuffer+i)+inputIdx);
|
||||
}
|
||||
|
||||
if (writeIt*(unsigned int)tmp) {
|
||||
outBuffer[gid] = tmp;
|
||||
}
|
||||
};
|
||||
|
||||
class hipPerfSampleRate {
|
||||
public:
|
||||
hipPerfSampleRate();
|
||||
~hipPerfSampleRate();
|
||||
|
||||
void open(void);
|
||||
void run(unsigned int testCase);
|
||||
void close(void);
|
||||
|
||||
// array of funtion pointers
|
||||
typedef void (hipPerfSampleRate::*funPtr)(void * outBuffer, unsigned int
|
||||
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
|
||||
int threads_per_block);
|
||||
|
||||
// Wrappers
|
||||
void float_kernel(void * outBuffer, unsigned int
|
||||
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
|
||||
int threads_per_block);
|
||||
|
||||
void float2_kernel(void * outBuffer, unsigned int
|
||||
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
|
||||
int threads_per_block);
|
||||
|
||||
void float4_kernel(void * outBuffer, unsigned int
|
||||
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
|
||||
int threads_per_block);
|
||||
|
||||
private:
|
||||
void setData(void *ptr, unsigned int value);
|
||||
void checkData(uint *ptr);
|
||||
|
||||
unsigned int width_;
|
||||
unsigned int bufSize_;
|
||||
unsigned long long totalIters = 0;
|
||||
int numCUs;
|
||||
|
||||
unsigned int outBufSize_;
|
||||
static const unsigned int MAX_ITERATIONS = 25;
|
||||
unsigned int numBufs_;
|
||||
unsigned int typeIdx_;
|
||||
};
|
||||
|
||||
|
||||
hipPerfSampleRate::hipPerfSampleRate() {}
|
||||
|
||||
hipPerfSampleRate::~hipPerfSampleRate() {}
|
||||
|
||||
void hipPerfSampleRate::open(void) {
|
||||
|
||||
int nGpu = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&nGpu));
|
||||
if (nGpu < 1) {
|
||||
std::cout << "info: didn't find any GPU! skipping the test!\n";
|
||||
passed();
|
||||
return;
|
||||
}
|
||||
|
||||
int deviceId = 0;
|
||||
hipDeviceProp_t props = {0};
|
||||
props = {0};
|
||||
HIPCHECK(hipSetDevice(deviceId));
|
||||
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
|
||||
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
|
||||
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
|
||||
<< std::endl;
|
||||
numCUs = props.multiProcessorCount;
|
||||
}
|
||||
|
||||
|
||||
void hipPerfSampleRate::close() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Wrappers for the kernel launches
|
||||
void hipPerfSampleRate::float_kernel(void * outBuffer, unsigned int inBufSize,
|
||||
unsigned int writeIt, void **inBuffer,
|
||||
int numBufs, int grids, int blocks, int threads_per_block) {
|
||||
|
||||
hipLaunchKernelGGL(sampleRateFloat<float>, dim3(grids, grids, grids), dim3 (blocks), 0, 0,
|
||||
(float*)outBuffer, inBufSize, writeIt, (float**)inBuffer, numBufs);
|
||||
|
||||
}
|
||||
|
||||
void hipPerfSampleRate::float2_kernel(void * outBuffer, unsigned int inBufSize,
|
||||
unsigned int writeIt, void **inBuffer,
|
||||
int grids, int blocks, int threads_per_block, int numBufs) {
|
||||
|
||||
hipLaunchKernelGGL(sampleRate<float2>, dim3(grids, grids, grids), dim3(blocks), 0, 0,
|
||||
(float2 *)outBuffer, inBufSize, writeIt, (float2**)inBuffer, numBufs);
|
||||
}
|
||||
|
||||
void hipPerfSampleRate::float4_kernel(void * outBuffer, unsigned int inBufSize,
|
||||
unsigned int writeIt, void **inBuffer,
|
||||
int grids, int blocks, int threads_per_block, int numBufs) {
|
||||
|
||||
hipLaunchKernelGGL(sampleRate<float4>, dim3(grids, grids, grids), dim3(blocks), 0, 0,
|
||||
(float4 *) outBuffer, inBufSize, writeIt, (float4**)inBuffer, numBufs);
|
||||
}
|
||||
|
||||
void hipPerfSampleRate::run(unsigned int test) {
|
||||
|
||||
funPtr p[] = {&hipPerfSampleRate::float_kernel, &hipPerfSampleRate::float2_kernel,
|
||||
&hipPerfSampleRate::float4_kernel};
|
||||
|
||||
// We compute a square domain
|
||||
width_ = sizes[test % NUM_SIZES];
|
||||
typeIdx_ = (test / NUM_SIZES) % NUM_TYPES;
|
||||
bufSize_ = width_ * width_ * typeSizes[typeIdx_];
|
||||
numBufs_ = (1 << (test / (NUM_SIZES * NUM_TYPES)));
|
||||
|
||||
void * hOutPtr;
|
||||
void * dOutPtr;
|
||||
void * hInPtr[numBufs_];
|
||||
void ** dPtr;
|
||||
void * dInPtr[numBufs_];
|
||||
|
||||
outBufSize_ =
|
||||
sizes[NUM_SIZES - 1] * sizes[NUM_SIZES - 1] * typeSizes[NUM_TYPES - 1];
|
||||
|
||||
// Allocate memory on the host and device
|
||||
HIPCHECK(hipHostMalloc((void **)&hOutPtr, outBufSize_, hipHostMallocDefault));
|
||||
setData((void *)hOutPtr, 0xdeadbeef);
|
||||
HIPCHECK(hipMalloc((uint **)&dOutPtr, outBufSize_));
|
||||
|
||||
// Allocate 2D array in Device
|
||||
hipMalloc((void **)&dPtr, numBufs_* sizeof(void *));
|
||||
|
||||
for (uint i = 0; i < numBufs_; i++) {
|
||||
HIPCHECK(hipHostMalloc((void **)&hInPtr[i], bufSize_, hipHostMallocDefault));
|
||||
HIPCHECK(hipMalloc((uint **)&dInPtr[i], bufSize_));
|
||||
setData(hInPtr[i], 0x3f800000);
|
||||
}
|
||||
|
||||
// Populate array of pointers with array addresses
|
||||
hipMemcpy(dPtr, dInPtr, numBufs_* sizeof(void *), hipMemcpyHostToDevice);
|
||||
|
||||
// Copy memory from host to device
|
||||
for (uint i = 0; i < numBufs_; i++) {
|
||||
HIPCHECK(hipMemcpy(dInPtr[i], hInPtr[i], bufSize_, hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
HIPCHECK(hipMemcpy(dOutPtr, hOutPtr, outBufSize_, hipMemcpyHostToDevice));
|
||||
|
||||
// Prepare kernel launch parameters
|
||||
// outBufSize_/sizeof(uint) - Grid size in 3D
|
||||
int grids = 64;
|
||||
int blocks = 64;
|
||||
int threads_per_block = 1;
|
||||
|
||||
unsigned int maxIter = MAX_ITERATIONS * (MAX_BUFS / numBufs_);
|
||||
unsigned int sizeDW = width_ * width_;
|
||||
unsigned int writeIt = 0;
|
||||
|
||||
int idx = 0;
|
||||
|
||||
if (!types[typeIdx_].compare("float")) {
|
||||
idx = 0;
|
||||
}
|
||||
else if(!types[typeIdx_].compare("float2")) {
|
||||
idx = 1;
|
||||
}
|
||||
else if(!types[typeIdx_].compare("float4")) {
|
||||
idx = 2;
|
||||
}
|
||||
|
||||
|
||||
// Time the kernel execution
|
||||
auto all_start = std::chrono::steady_clock::now();
|
||||
for (uint i = 0; i < maxIter; i++) {
|
||||
(this->*p[idx]) ((void *)dOutPtr, sizeDW, writeIt, dPtr, numBufs_, grids, blocks,
|
||||
threads_per_block);
|
||||
}
|
||||
|
||||
hipDeviceSynchronize();
|
||||
auto all_end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> all_kernel_time = all_end - all_start;
|
||||
|
||||
double perf = ((double)outBufSize_ * numBufs_ * (double)maxIter * (double)(1e-09)) /
|
||||
all_kernel_time.count();
|
||||
|
||||
cout << "Domain " << sizes[NUM_SIZES - 1] << "x"<< sizes[NUM_SIZES - 1] << " bufs "
|
||||
<< numBufs_ << " " << types[typeIdx_] << " " << width_<<"x"<<width_<< " (GB/s) "
|
||||
<< perf << endl;
|
||||
|
||||
HIPCHECK(hipFree(dOutPtr));
|
||||
|
||||
// Free host and device memory
|
||||
for (uint i = 0; i < numBufs_; i++) {
|
||||
HIPCHECK(hipFree(hInPtr[i]));
|
||||
HIPCHECK(hipFree(dInPtr[i]));
|
||||
}
|
||||
|
||||
HIPCHECK(hipFree(hOutPtr));
|
||||
HIPCHECK(hipFree(dPtr));
|
||||
}
|
||||
|
||||
|
||||
void hipPerfSampleRate::setData(void *ptr, unsigned int value) {
|
||||
unsigned int *ptr2 = (unsigned int *)ptr;
|
||||
for (unsigned int i = 0; i < bufSize_ / sizeof(unsigned int); i++) {
|
||||
ptr2[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void hipPerfSampleRate::checkData(uint *ptr) {
|
||||
for (unsigned int i = 0; i < outBufSize_ / sizeof(float); i++) {
|
||||
if (ptr[i] != (float)numBufs_) {
|
||||
cout << "Data validation failed at "<< i << " Got "<< ptr[i] << ", expected "
|
||||
<< (float)numBufs_;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
hipPerfSampleRate sampleTypes;
|
||||
|
||||
sampleTypes.open();
|
||||
|
||||
for (unsigned int testCase = 0; testCase < 216 ; testCase+=36) {
|
||||
sampleTypes.run(testCase);
|
||||
}
|
||||
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -1,289 +1,289 @@
|
||||
/*
|
||||
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include "test_common.h"
|
||||
|
||||
typedef struct {
|
||||
double x;
|
||||
double y;
|
||||
double width;
|
||||
} coordRec;
|
||||
|
||||
static coordRec coords[] = {
|
||||
{0.0, 0.0, 0.00001}, // All black
|
||||
};
|
||||
|
||||
static unsigned int numCoords = sizeof(coords) / sizeof(coordRec);
|
||||
|
||||
__global__ void mandelbrot(uint *out, uint width, float xPos, float yPos, float xStep,
|
||||
float yStep, uint maxIter) {
|
||||
|
||||
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int i = tid % width;
|
||||
int j = tid / width;
|
||||
float x0 = (float)(xPos + xStep*i);
|
||||
float y0 = (float)(yPos + yStep*j);
|
||||
|
||||
float x = x0;
|
||||
float y = y0;
|
||||
|
||||
uint iter = 0;
|
||||
float tmp;
|
||||
for (iter = 0; (x*x + y*y <= 4.0f) && (iter < maxIter); iter++) {
|
||||
tmp = x;
|
||||
x = fma(-y,y,fma(x,x,x0));
|
||||
y = fma(2.0f*tmp,y,y0);
|
||||
}
|
||||
|
||||
out[tid] = iter;
|
||||
};
|
||||
|
||||
class hipPerfDeviceConcurrency {
|
||||
public:
|
||||
hipPerfDeviceConcurrency();
|
||||
~hipPerfDeviceConcurrency();
|
||||
|
||||
void setNumGpus(unsigned int num) {
|
||||
numDevices = num;
|
||||
}
|
||||
unsigned int getNumGpus() {
|
||||
return numDevices;
|
||||
}
|
||||
|
||||
void open(void);
|
||||
void close(void);
|
||||
void run(unsigned int testCase, int numGpus);
|
||||
|
||||
private:
|
||||
void setData(void *ptr, unsigned int value);
|
||||
void checkData(uint *ptr);
|
||||
|
||||
unsigned int numDevices;
|
||||
unsigned int width_;
|
||||
unsigned int bufSize;
|
||||
unsigned int coordIdx;
|
||||
unsigned long long totalIters = 0;
|
||||
};
|
||||
|
||||
|
||||
hipPerfDeviceConcurrency::hipPerfDeviceConcurrency() {}
|
||||
|
||||
hipPerfDeviceConcurrency::~hipPerfDeviceConcurrency() {}
|
||||
|
||||
void hipPerfDeviceConcurrency::open(void) {
|
||||
|
||||
|
||||
int nGpu = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&nGpu));
|
||||
setNumGpus(nGpu);
|
||||
if (nGpu < 1) {
|
||||
std::cout << "info: didn't find any GPU! skipping the test!\n";
|
||||
passed();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void hipPerfDeviceConcurrency::close() {
|
||||
}
|
||||
|
||||
void hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
|
||||
|
||||
|
||||
static int deviceId;
|
||||
uint * hPtr[numGpus];
|
||||
uint * dPtr[numGpus];
|
||||
hipStream_t streams[numGpus];
|
||||
int numCUs[numGpus];
|
||||
unsigned int maxIter[numGpus];
|
||||
unsigned long long expectedIters[numGpus];
|
||||
|
||||
int threads, threads_per_block, blocks;
|
||||
float xStep, yStep, xPos, yPos;
|
||||
|
||||
for(int i = 0; i < numGpus; i++) {
|
||||
|
||||
if(testCase != 0) {
|
||||
deviceId = i;
|
||||
}
|
||||
|
||||
HIPCHECK(hipSetDevice(deviceId));
|
||||
|
||||
hipDeviceProp_t props = {0};
|
||||
HIPCHECK(hipGetDeviceProperties(&props, i));
|
||||
|
||||
if (testCase != 0) {
|
||||
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
|
||||
<< " with " << props.multiProcessorCount << " CUs" << " and device ID: "
|
||||
<< i << std::endl;
|
||||
}
|
||||
|
||||
numCUs[i] = props.multiProcessorCount;
|
||||
int clkFrequency = 0;
|
||||
HIPCHECK(hipDeviceGetAttribute(&clkFrequency, hipDeviceAttributeClockRate, i));
|
||||
|
||||
clkFrequency =(unsigned int)clkFrequency/1000;
|
||||
|
||||
// Maximum iteration count
|
||||
// maxIter = 8388608 * (engine_clock / 1000).serial execution
|
||||
maxIter[i] = (unsigned int)(((8388608 * ((float)clkFrequency / 1000)) * numCUs[i]) / 128);
|
||||
maxIter[i] = (maxIter[i] + 15) & ~15;
|
||||
|
||||
// Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once.
|
||||
width_ = 256;
|
||||
|
||||
bufSize = width_ * width_ * sizeof(uint);
|
||||
|
||||
// Create streams for concurrency
|
||||
HIPCHECK(hipStreamCreate(&streams[i]));
|
||||
|
||||
// Allocate memory on the host and device
|
||||
HIPCHECK(hipHostMalloc((void **)&hPtr[i], bufSize, hipHostMallocDefault));
|
||||
setData(hPtr[i], 0xdeadbeef);
|
||||
HIPCHECK(hipMalloc((uint **)&dPtr[i], bufSize))
|
||||
|
||||
// Prepare kernel launch parameters
|
||||
threads = (bufSize/sizeof(uint));
|
||||
threads_per_block = 64;
|
||||
blocks = (threads/threads_per_block) + (threads % threads_per_block);
|
||||
|
||||
coordIdx = testCase % numCoords;
|
||||
xStep = (float)(coords[coordIdx].width / (double)width_);
|
||||
yStep = (float)(-coords[coordIdx].width / (double)width_);
|
||||
xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width);
|
||||
yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width);
|
||||
|
||||
// Copy memory from host to device
|
||||
HIPCHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice));
|
||||
|
||||
}
|
||||
|
||||
// Time the kernel execution
|
||||
auto all_start = std::chrono::steady_clock::now();
|
||||
|
||||
for(int i = 0; i < numGpus; i++) {
|
||||
|
||||
if(testCase != 0) {
|
||||
deviceId = i;
|
||||
}
|
||||
|
||||
HIPCHECK(hipSetDevice(deviceId));
|
||||
|
||||
hipLaunchKernelGGL(mandelbrot, dim3(blocks), dim3(threads_per_block), 0, streams[i],
|
||||
dPtr[i], width_, xPos, yPos, xStep, yStep, maxIter[i]);
|
||||
|
||||
}
|
||||
|
||||
for(int i = 0; i < numGpus; i++) {
|
||||
HIPCHECK(hipStreamSynchronize(0));
|
||||
}
|
||||
|
||||
|
||||
auto all_end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> all_kernel_time = all_end - all_start;
|
||||
|
||||
for(int i = 0; i < numGpus; i++) {
|
||||
|
||||
if(testCase != 0) {
|
||||
deviceId = i;
|
||||
}
|
||||
HIPCHECK(hipSetDevice(deviceId));
|
||||
|
||||
// Copy data back from device to the host
|
||||
HIPCHECK(hipMemcpy(hPtr[i], dPtr[i], bufSize, hipMemcpyDeviceToHost));
|
||||
|
||||
checkData(hPtr[i]);
|
||||
expectedIters[i] = width_ * width_ * (unsigned long long) maxIter[i];
|
||||
|
||||
if (testCase != 0) {
|
||||
checkData(hPtr[i]);
|
||||
if(totalIters != expectedIters[i]) {
|
||||
std::cout << "Incorrect iteration count detected" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
HIPCHECK(hipStreamDestroy(streams[i]));
|
||||
|
||||
// Free host and device memory
|
||||
HIPCHECK(hipFree(hPtr[i]));
|
||||
HIPCHECK(hipFree(dPtr[i]));
|
||||
}
|
||||
|
||||
if (testCase != 0) {
|
||||
std::cout << '\n' << "Measured time for kernel computation on " << numGpus << " device (s): "
|
||||
<< all_kernel_time.count() << " (s) " << '\n' << std::endl;
|
||||
}
|
||||
|
||||
if(testCase == 0) {
|
||||
deviceId++;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void hipPerfDeviceConcurrency::setData(void *ptr, unsigned int value) {
|
||||
unsigned int *ptr2 = (unsigned int *)ptr;
|
||||
for (unsigned int i = 0; i < width_ * width_ ; i++) {
|
||||
ptr2[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void hipPerfDeviceConcurrency::checkData(uint *ptr) {
|
||||
totalIters = 0;
|
||||
for (unsigned int i = 0; i < width_ * width_; i++) {
|
||||
totalIters += ptr[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
hipPerfDeviceConcurrency deviceConcurrency;
|
||||
|
||||
deviceConcurrency.open();
|
||||
|
||||
int nGpu = deviceConcurrency.getNumGpus();
|
||||
|
||||
// testCase = 0 refers to warmup kernel run
|
||||
int testCase = 0;
|
||||
|
||||
for (int i = 0; i < nGpu; i++) {
|
||||
// Warm-up kernel on all devices
|
||||
deviceConcurrency.run(testCase, 1);
|
||||
}
|
||||
|
||||
// Time for kernel on 1 device
|
||||
deviceConcurrency.run(++testCase, 1);
|
||||
|
||||
// Time for kernel on all available devices
|
||||
deviceConcurrency.run(++testCase, nGpu);
|
||||
|
||||
passed();
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include "test_common.h"
|
||||
|
||||
typedef struct {
|
||||
double x;
|
||||
double y;
|
||||
double width;
|
||||
} coordRec;
|
||||
|
||||
static coordRec coords[] = {
|
||||
{0.0, 0.0, 0.00001}, // All black
|
||||
};
|
||||
|
||||
static unsigned int numCoords = sizeof(coords) / sizeof(coordRec);
|
||||
|
||||
__global__ void mandelbrot(uint *out, uint width, float xPos, float yPos, float xStep,
|
||||
float yStep, uint maxIter) {
|
||||
|
||||
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int i = tid % width;
|
||||
int j = tid / width;
|
||||
float x0 = (float)(xPos + xStep*i);
|
||||
float y0 = (float)(yPos + yStep*j);
|
||||
|
||||
float x = x0;
|
||||
float y = y0;
|
||||
|
||||
uint iter = 0;
|
||||
float tmp;
|
||||
for (iter = 0; (x*x + y*y <= 4.0f) && (iter < maxIter); iter++) {
|
||||
tmp = x;
|
||||
x = fma(-y,y,fma(x,x,x0));
|
||||
y = fma(2.0f*tmp,y,y0);
|
||||
}
|
||||
|
||||
out[tid] = iter;
|
||||
};
|
||||
|
||||
class hipPerfDeviceConcurrency {
|
||||
public:
|
||||
hipPerfDeviceConcurrency();
|
||||
~hipPerfDeviceConcurrency();
|
||||
|
||||
void setNumGpus(unsigned int num) {
|
||||
numDevices = num;
|
||||
}
|
||||
unsigned int getNumGpus() {
|
||||
return numDevices;
|
||||
}
|
||||
|
||||
void open(void);
|
||||
void close(void);
|
||||
void run(unsigned int testCase, int numGpus);
|
||||
|
||||
private:
|
||||
void setData(void *ptr, unsigned int value);
|
||||
void checkData(uint *ptr);
|
||||
|
||||
unsigned int numDevices;
|
||||
unsigned int width_;
|
||||
unsigned int bufSize;
|
||||
unsigned int coordIdx;
|
||||
unsigned long long totalIters = 0;
|
||||
};
|
||||
|
||||
|
||||
hipPerfDeviceConcurrency::hipPerfDeviceConcurrency() {}
|
||||
|
||||
hipPerfDeviceConcurrency::~hipPerfDeviceConcurrency() {}
|
||||
|
||||
void hipPerfDeviceConcurrency::open(void) {
|
||||
|
||||
|
||||
int nGpu = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&nGpu));
|
||||
setNumGpus(nGpu);
|
||||
if (nGpu < 1) {
|
||||
std::cout << "info: didn't find any GPU! skipping the test!\n";
|
||||
passed();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void hipPerfDeviceConcurrency::close() {
|
||||
}
|
||||
|
||||
void hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
|
||||
|
||||
|
||||
static int deviceId;
|
||||
uint * hPtr[numGpus];
|
||||
uint * dPtr[numGpus];
|
||||
hipStream_t streams[numGpus];
|
||||
int numCUs[numGpus];
|
||||
unsigned int maxIter[numGpus];
|
||||
unsigned long long expectedIters[numGpus];
|
||||
|
||||
int threads, threads_per_block, blocks;
|
||||
float xStep, yStep, xPos, yPos;
|
||||
|
||||
for(int i = 0; i < numGpus; i++) {
|
||||
|
||||
if(testCase != 0) {
|
||||
deviceId = i;
|
||||
}
|
||||
|
||||
HIPCHECK(hipSetDevice(deviceId));
|
||||
|
||||
hipDeviceProp_t props = {0};
|
||||
HIPCHECK(hipGetDeviceProperties(&props, i));
|
||||
|
||||
if (testCase != 0) {
|
||||
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
|
||||
<< " with " << props.multiProcessorCount << " CUs" << " and device ID: "
|
||||
<< i << std::endl;
|
||||
}
|
||||
|
||||
numCUs[i] = props.multiProcessorCount;
|
||||
int clkFrequency = 0;
|
||||
HIPCHECK(hipDeviceGetAttribute(&clkFrequency, hipDeviceAttributeClockRate, i));
|
||||
|
||||
clkFrequency =(unsigned int)clkFrequency/1000;
|
||||
|
||||
// Maximum iteration count
|
||||
// maxIter = 8388608 * (engine_clock / 1000).serial execution
|
||||
maxIter[i] = (unsigned int)(((8388608 * ((float)clkFrequency / 1000)) * numCUs[i]) / 128);
|
||||
maxIter[i] = (maxIter[i] + 15) & ~15;
|
||||
|
||||
// Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once.
|
||||
width_ = 256;
|
||||
|
||||
bufSize = width_ * width_ * sizeof(uint);
|
||||
|
||||
// Create streams for concurrency
|
||||
HIPCHECK(hipStreamCreate(&streams[i]));
|
||||
|
||||
// Allocate memory on the host and device
|
||||
HIPCHECK(hipHostMalloc((void **)&hPtr[i], bufSize, hipHostMallocDefault));
|
||||
setData(hPtr[i], 0xdeadbeef);
|
||||
HIPCHECK(hipMalloc((uint **)&dPtr[i], bufSize))
|
||||
|
||||
// Prepare kernel launch parameters
|
||||
threads = (bufSize/sizeof(uint));
|
||||
threads_per_block = 64;
|
||||
blocks = (threads/threads_per_block) + (threads % threads_per_block);
|
||||
|
||||
coordIdx = testCase % numCoords;
|
||||
xStep = (float)(coords[coordIdx].width / (double)width_);
|
||||
yStep = (float)(-coords[coordIdx].width / (double)width_);
|
||||
xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width);
|
||||
yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width);
|
||||
|
||||
// Copy memory from host to device
|
||||
HIPCHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice));
|
||||
|
||||
}
|
||||
|
||||
// Time the kernel execution
|
||||
auto all_start = std::chrono::steady_clock::now();
|
||||
|
||||
for(int i = 0; i < numGpus; i++) {
|
||||
|
||||
if(testCase != 0) {
|
||||
deviceId = i;
|
||||
}
|
||||
|
||||
HIPCHECK(hipSetDevice(deviceId));
|
||||
|
||||
hipLaunchKernelGGL(mandelbrot, dim3(blocks), dim3(threads_per_block), 0, streams[i],
|
||||
dPtr[i], width_, xPos, yPos, xStep, yStep, maxIter[i]);
|
||||
|
||||
}
|
||||
|
||||
for(int i = 0; i < numGpus; i++) {
|
||||
HIPCHECK(hipStreamSynchronize(0));
|
||||
}
|
||||
|
||||
|
||||
auto all_end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> all_kernel_time = all_end - all_start;
|
||||
|
||||
for(int i = 0; i < numGpus; i++) {
|
||||
|
||||
if(testCase != 0) {
|
||||
deviceId = i;
|
||||
}
|
||||
HIPCHECK(hipSetDevice(deviceId));
|
||||
|
||||
// Copy data back from device to the host
|
||||
HIPCHECK(hipMemcpy(hPtr[i], dPtr[i], bufSize, hipMemcpyDeviceToHost));
|
||||
|
||||
checkData(hPtr[i]);
|
||||
expectedIters[i] = width_ * width_ * (unsigned long long) maxIter[i];
|
||||
|
||||
if (testCase != 0) {
|
||||
checkData(hPtr[i]);
|
||||
if(totalIters != expectedIters[i]) {
|
||||
std::cout << "Incorrect iteration count detected" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
HIPCHECK(hipStreamDestroy(streams[i]));
|
||||
|
||||
// Free host and device memory
|
||||
HIPCHECK(hipFree(hPtr[i]));
|
||||
HIPCHECK(hipFree(dPtr[i]));
|
||||
}
|
||||
|
||||
if (testCase != 0) {
|
||||
std::cout << '\n' << "Measured time for kernel computation on " << numGpus << " device (s): "
|
||||
<< all_kernel_time.count() << " (s) " << '\n' << std::endl;
|
||||
}
|
||||
|
||||
if(testCase == 0) {
|
||||
deviceId++;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void hipPerfDeviceConcurrency::setData(void *ptr, unsigned int value) {
|
||||
unsigned int *ptr2 = (unsigned int *)ptr;
|
||||
for (unsigned int i = 0; i < width_ * width_ ; i++) {
|
||||
ptr2[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void hipPerfDeviceConcurrency::checkData(uint *ptr) {
|
||||
totalIters = 0;
|
||||
for (unsigned int i = 0; i < width_ * width_; i++) {
|
||||
totalIters += ptr[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
hipPerfDeviceConcurrency deviceConcurrency;
|
||||
|
||||
deviceConcurrency.open();
|
||||
|
||||
int nGpu = deviceConcurrency.getNumGpus();
|
||||
|
||||
// testCase = 0 refers to warmup kernel run
|
||||
int testCase = 0;
|
||||
|
||||
for (int i = 0; i < nGpu; i++) {
|
||||
// Warm-up kernel on all devices
|
||||
deviceConcurrency.run(testCase, 1);
|
||||
}
|
||||
|
||||
// Time for kernel on 1 device
|
||||
deviceConcurrency.run(++testCase, 1);
|
||||
|
||||
// Time for kernel on all available devices
|
||||
deviceConcurrency.run(++testCase, nGpu);
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -1,157 +1,157 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __unix__
|
||||
|
||||
#include <string>
|
||||
#include <atomic>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
template <typename T>
|
||||
struct Shmem {
|
||||
std::atomic<T> handle_;
|
||||
std::atomic<int> done_counter_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct ShmemMeta {
|
||||
std::string shmem_name_;
|
||||
int shmem_fd_;
|
||||
Shmem<T>* shmem_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class MultiProcess {
|
||||
public:
|
||||
MultiProcess(size_t num_proc) : num_proc_(num_proc) {}
|
||||
~MultiProcess();
|
||||
|
||||
void DebugInfo(pid_t pid);
|
||||
|
||||
pid_t SpawnProcess(bool debug_bkpt);
|
||||
bool CreateShmem();
|
||||
|
||||
bool WriteHandleToShmem(T ipc_handle);
|
||||
bool WaitTillAllChildReads();
|
||||
|
||||
bool ReadHandleFromShmem(T& ipc_handle);
|
||||
bool NotifyParentDone();
|
||||
|
||||
private:
|
||||
const size_t num_proc_;
|
||||
bool debug_proc_;
|
||||
ShmemMeta<T> shmem_meta_obj_;
|
||||
};
|
||||
|
||||
// Template Implementations
|
||||
template <typename T>
|
||||
MultiProcess<T>::~MultiProcess() {
|
||||
if(munmap(shmem_meta_obj_.shmem_, sizeof(Shmem<T>)) < 0) {
|
||||
std::cout<<"Error Unmapping shared memory "<<std::endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void MultiProcess<T>::DebugInfo(pid_t pid) {
|
||||
const int delay = 1;
|
||||
|
||||
if (pid == 0) {
|
||||
std::cout<<" Child Process with ID: "<<getpid()<<std::endl;
|
||||
} else {
|
||||
std::cout<<" Parent Process with ID: "<<getpid()<<std::endl;
|
||||
}
|
||||
|
||||
volatile int flag = 0;
|
||||
while (!flag) {
|
||||
sleep(delay);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
pid_t MultiProcess<T>::SpawnProcess(bool debug_bkpt) {
|
||||
if (num_proc_ < 0) {
|
||||
std::cout<<"Num Process cannot be less than 1"<<std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
pid_t pid;
|
||||
for (size_t proc_idx = 0; proc_idx < num_proc_; ++proc_idx) {
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
std::cout<<"Fork Failed"<<std::endl;
|
||||
assert(false);
|
||||
} else if (pid == 0) {
|
||||
//Child Process, so break
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (debug_bkpt) {
|
||||
DebugInfo(pid);
|
||||
}
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MultiProcess<T>::CreateShmem() {
|
||||
if (num_proc_ < 0) {
|
||||
std::cout<<"Num Process cannot be less than 1"<<std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
char name_template[] = "/tmp/eventXXXXX";
|
||||
int temp_fd = mkstemp(name_template);
|
||||
shmem_meta_obj_.shmem_name_ = name_template;
|
||||
shmem_meta_obj_.shmem_name_.replace(0, 5, "/hip_");
|
||||
shmem_meta_obj_.shmem_fd_ = shm_open(shmem_meta_obj_.shmem_name_.c_str(),
|
||||
O_RDWR | O_CREAT, 0777);
|
||||
|
||||
if (ftruncate(shmem_meta_obj_.shmem_fd_, sizeof(ShmemMeta<T>)) != 0) {
|
||||
std::cout<<"Cannot FTruncate "<<std::endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
shmem_meta_obj_.shmem_ = (Shmem<T>*)mmap(0, sizeof(Shmem<T>), PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, shmem_meta_obj_.shmem_fd_, 0);
|
||||
memset(&shmem_meta_obj_.shmem_->handle_, 0x00, sizeof(T));
|
||||
shmem_meta_obj_.shmem_->done_counter_ = -1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MultiProcess<T>::WriteHandleToShmem(T ipc_handle) {
|
||||
memcpy(&shmem_meta_obj_.shmem_->handle_, &ipc_handle, sizeof(T));
|
||||
shmem_meta_obj_.shmem_->done_counter_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MultiProcess<T>::WaitTillAllChildReads() {
|
||||
size_t write_count = 0;
|
||||
while (shmem_meta_obj_.shmem_->done_counter_ != num_proc_) {
|
||||
++write_count;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MultiProcess<T>::ReadHandleFromShmem(T& ipc_handle) {
|
||||
size_t read_count = 0;
|
||||
while (shmem_meta_obj_.shmem_->done_counter_ == -1) {
|
||||
++read_count;
|
||||
}
|
||||
memcpy(&ipc_handle, &shmem_meta_obj_.shmem_->handle_, sizeof(T));
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MultiProcess<T>::NotifyParentDone() {
|
||||
++shmem_meta_obj_.shmem_->done_counter_;
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* __unix__ */
|
||||
#pragma once
|
||||
|
||||
#ifdef __unix__
|
||||
|
||||
#include <string>
|
||||
#include <atomic>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
template <typename T>
|
||||
struct Shmem {
|
||||
std::atomic<T> handle_;
|
||||
std::atomic<int> done_counter_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct ShmemMeta {
|
||||
std::string shmem_name_;
|
||||
int shmem_fd_;
|
||||
Shmem<T>* shmem_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class MultiProcess {
|
||||
public:
|
||||
MultiProcess(size_t num_proc) : num_proc_(num_proc) {}
|
||||
~MultiProcess();
|
||||
|
||||
void DebugInfo(pid_t pid);
|
||||
|
||||
pid_t SpawnProcess(bool debug_bkpt);
|
||||
bool CreateShmem();
|
||||
|
||||
bool WriteHandleToShmem(T ipc_handle);
|
||||
bool WaitTillAllChildReads();
|
||||
|
||||
bool ReadHandleFromShmem(T& ipc_handle);
|
||||
bool NotifyParentDone();
|
||||
|
||||
private:
|
||||
const size_t num_proc_;
|
||||
bool debug_proc_;
|
||||
ShmemMeta<T> shmem_meta_obj_;
|
||||
};
|
||||
|
||||
// Template Implementations
|
||||
template <typename T>
|
||||
MultiProcess<T>::~MultiProcess() {
|
||||
if(munmap(shmem_meta_obj_.shmem_, sizeof(Shmem<T>)) < 0) {
|
||||
std::cout<<"Error Unmapping shared memory "<<std::endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void MultiProcess<T>::DebugInfo(pid_t pid) {
|
||||
const int delay = 1;
|
||||
|
||||
if (pid == 0) {
|
||||
std::cout<<" Child Process with ID: "<<getpid()<<std::endl;
|
||||
} else {
|
||||
std::cout<<" Parent Process with ID: "<<getpid()<<std::endl;
|
||||
}
|
||||
|
||||
volatile int flag = 0;
|
||||
while (!flag) {
|
||||
sleep(delay);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
pid_t MultiProcess<T>::SpawnProcess(bool debug_bkpt) {
|
||||
if (num_proc_ < 0) {
|
||||
std::cout<<"Num Process cannot be less than 1"<<std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
pid_t pid;
|
||||
for (size_t proc_idx = 0; proc_idx < num_proc_; ++proc_idx) {
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
std::cout<<"Fork Failed"<<std::endl;
|
||||
assert(false);
|
||||
} else if (pid == 0) {
|
||||
//Child Process, so break
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (debug_bkpt) {
|
||||
DebugInfo(pid);
|
||||
}
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MultiProcess<T>::CreateShmem() {
|
||||
if (num_proc_ < 0) {
|
||||
std::cout<<"Num Process cannot be less than 1"<<std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
char name_template[] = "/tmp/eventXXXXX";
|
||||
int temp_fd = mkstemp(name_template);
|
||||
shmem_meta_obj_.shmem_name_ = name_template;
|
||||
shmem_meta_obj_.shmem_name_.replace(0, 5, "/hip_");
|
||||
shmem_meta_obj_.shmem_fd_ = shm_open(shmem_meta_obj_.shmem_name_.c_str(),
|
||||
O_RDWR | O_CREAT, 0777);
|
||||
|
||||
if (ftruncate(shmem_meta_obj_.shmem_fd_, sizeof(ShmemMeta<T>)) != 0) {
|
||||
std::cout<<"Cannot FTruncate "<<std::endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
shmem_meta_obj_.shmem_ = (Shmem<T>*)mmap(0, sizeof(Shmem<T>), PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, shmem_meta_obj_.shmem_fd_, 0);
|
||||
memset(&shmem_meta_obj_.shmem_->handle_, 0x00, sizeof(T));
|
||||
shmem_meta_obj_.shmem_->done_counter_ = -1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MultiProcess<T>::WriteHandleToShmem(T ipc_handle) {
|
||||
memcpy(&shmem_meta_obj_.shmem_->handle_, &ipc_handle, sizeof(T));
|
||||
shmem_meta_obj_.shmem_->done_counter_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MultiProcess<T>::WaitTillAllChildReads() {
|
||||
size_t write_count = 0;
|
||||
while (shmem_meta_obj_.shmem_->done_counter_ != num_proc_) {
|
||||
++write_count;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MultiProcess<T>::ReadHandleFromShmem(T& ipc_handle) {
|
||||
size_t read_count = 0;
|
||||
while (shmem_meta_obj_.shmem_->done_counter_ == -1) {
|
||||
++read_count;
|
||||
}
|
||||
memcpy(&ipc_handle, &shmem_meta_obj_.shmem_->handle_, sizeof(T));
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MultiProcess<T>::NotifyParentDone() {
|
||||
++shmem_meta_obj_.shmem_->done_counter_;
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* __unix__ */
|
||||
|
||||
@@ -1,126 +1,126 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "MultiProcess.h"
|
||||
|
||||
void multi_process(int num_process, bool debug_process) {
|
||||
|
||||
#ifdef __unix__
|
||||
|
||||
float *A_h, *B_h, *C_h;
|
||||
float *A_d, *B_d, *C_d;
|
||||
hipEvent_t start, stop;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
MultiProcess<hipIpcEventHandle_t>* mProcess = new MultiProcess<hipIpcEventHandle_t>(num_process);
|
||||
mProcess->CreateShmem();
|
||||
pid_t pid = mProcess->SpawnProcess(debug_process);
|
||||
|
||||
// Parent Process
|
||||
if (pid != 0) {
|
||||
|
||||
unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
||||
if (blocks > 1024) blocks = 1024;
|
||||
if (blocks == 0) blocks = 1;
|
||||
|
||||
printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N,
|
||||
((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations);
|
||||
printf("iterations=%d\n", iterations);
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
// NULL stream check:
|
||||
HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess));
|
||||
HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess));
|
||||
|
||||
|
||||
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
//--- START TIMED REGION
|
||||
long long hostStart = HipTest::get_time();
|
||||
// Record the start event
|
||||
HIPCHECK(hipEventRecord(start, NULL));
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
static_cast<const float*>(A_d), static_cast<const float*>(B_d), C_d, N);
|
||||
|
||||
|
||||
HIPCHECK(hipEventRecord(stop, NULL));
|
||||
HIPCHECK(hipEventSynchronize(stop));
|
||||
HIPCHECK(hipEventQuery(stop));
|
||||
long long hostStop = HipTest::get_time();
|
||||
//--- STOP TIMED REGION
|
||||
|
||||
float eventMs = 1.0f;
|
||||
// should fail
|
||||
HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop));
|
||||
float hostMs = HipTest::elapsed_time(hostStart, hostStop);
|
||||
|
||||
printf("host_time (gettimeofday) =%6.3fms\n", hostMs);
|
||||
printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);
|
||||
printf("\n");
|
||||
|
||||
}
|
||||
|
||||
hipIpcEventHandle_t ipc_handle;
|
||||
HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start));
|
||||
|
||||
mProcess->WriteHandleToShmem(ipc_handle);
|
||||
mProcess->WaitTillAllChildReads();
|
||||
|
||||
} else {
|
||||
hipEvent_t ipc_event;
|
||||
hipIpcEventHandle_t ipc_handle;
|
||||
mProcess->ReadHandleFromShmem(ipc_handle);
|
||||
HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle));
|
||||
|
||||
HIPCHECK(hipEventSynchronize(ipc_event));
|
||||
HIPCHECK(hipEventDestroy(ipc_event));
|
||||
mProcess->NotifyParentDone();
|
||||
}
|
||||
|
||||
if (pid != 0) {
|
||||
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
printf("check:\n");
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N, true);
|
||||
|
||||
HIPCHECK(hipEventDestroy(start));
|
||||
HIPCHECK(hipEventDestroy(stop));
|
||||
delete mProcess;
|
||||
}
|
||||
|
||||
#endif /* __unix__ */
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
multi_process((N < 64) ? N : 64, debug_test);
|
||||
passed();
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "MultiProcess.h"
|
||||
|
||||
void multi_process(int num_process, bool debug_process) {
|
||||
|
||||
#ifdef __unix__
|
||||
|
||||
float *A_h, *B_h, *C_h;
|
||||
float *A_d, *B_d, *C_d;
|
||||
hipEvent_t start, stop;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
MultiProcess<hipIpcEventHandle_t>* mProcess = new MultiProcess<hipIpcEventHandle_t>(num_process);
|
||||
mProcess->CreateShmem();
|
||||
pid_t pid = mProcess->SpawnProcess(debug_process);
|
||||
|
||||
// Parent Process
|
||||
if (pid != 0) {
|
||||
|
||||
unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
||||
if (blocks > 1024) blocks = 1024;
|
||||
if (blocks == 0) blocks = 1;
|
||||
|
||||
printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N,
|
||||
((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations);
|
||||
printf("iterations=%d\n", iterations);
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
// NULL stream check:
|
||||
HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess));
|
||||
HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess));
|
||||
|
||||
|
||||
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
//--- START TIMED REGION
|
||||
long long hostStart = HipTest::get_time();
|
||||
// Record the start event
|
||||
HIPCHECK(hipEventRecord(start, NULL));
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
static_cast<const float*>(A_d), static_cast<const float*>(B_d), C_d, N);
|
||||
|
||||
|
||||
HIPCHECK(hipEventRecord(stop, NULL));
|
||||
HIPCHECK(hipEventSynchronize(stop));
|
||||
HIPCHECK(hipEventQuery(stop));
|
||||
long long hostStop = HipTest::get_time();
|
||||
//--- STOP TIMED REGION
|
||||
|
||||
float eventMs = 1.0f;
|
||||
// should fail
|
||||
HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop));
|
||||
float hostMs = HipTest::elapsed_time(hostStart, hostStop);
|
||||
|
||||
printf("host_time (gettimeofday) =%6.3fms\n", hostMs);
|
||||
printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);
|
||||
printf("\n");
|
||||
|
||||
}
|
||||
|
||||
hipIpcEventHandle_t ipc_handle;
|
||||
HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start));
|
||||
|
||||
mProcess->WriteHandleToShmem(ipc_handle);
|
||||
mProcess->WaitTillAllChildReads();
|
||||
|
||||
} else {
|
||||
hipEvent_t ipc_event;
|
||||
hipIpcEventHandle_t ipc_handle;
|
||||
mProcess->ReadHandleFromShmem(ipc_handle);
|
||||
HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle));
|
||||
|
||||
HIPCHECK(hipEventSynchronize(ipc_event));
|
||||
HIPCHECK(hipEventDestroy(ipc_event));
|
||||
mProcess->NotifyParentDone();
|
||||
}
|
||||
|
||||
if (pid != 0) {
|
||||
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
printf("check:\n");
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N, true);
|
||||
|
||||
HIPCHECK(hipEventDestroy(start));
|
||||
HIPCHECK(hipEventDestroy(stop));
|
||||
delete mProcess;
|
||||
}
|
||||
|
||||
#endif /* __unix__ */
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
multi_process((N < 64) ? N : 64, debug_test);
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -1,103 +1,103 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "MultiProcess.h"
|
||||
|
||||
#define NUM_ELEMS 1024
|
||||
#define OFFSET 128
|
||||
|
||||
void multi_process(int num_process, bool debug_process) {
|
||||
|
||||
#ifdef __unix__
|
||||
|
||||
int* ipc_dptr = nullptr;
|
||||
int* ipc_hptr = nullptr;
|
||||
int* ipc_out_dptr = nullptr;
|
||||
int* ipc_out_hptr = nullptr;
|
||||
int* ipc_offset_dptr = nullptr;
|
||||
|
||||
MultiProcess<hipIpcMemHandle_t>* mProcess = new MultiProcess<hipIpcMemHandle_t>(num_process);
|
||||
mProcess->CreateShmem();
|
||||
pid_t pid = mProcess->SpawnProcess(debug_process);
|
||||
|
||||
// Parent Process
|
||||
if (pid != 0) {
|
||||
hipIpcMemHandle_t ipc_handle;
|
||||
memset(&ipc_handle, 0x00, sizeof(hipIpcMemHandle_t));
|
||||
|
||||
HIPCHECK(hipMalloc((void**)&ipc_dptr, NUM_ELEMS * sizeof(int)));
|
||||
// Add offset to the dev_ptr
|
||||
ipc_offset_dptr = ipc_dptr + OFFSET;
|
||||
// Get handle for the offsetted device_ptr
|
||||
HIPCHECK(hipIpcGetMemHandle(&ipc_handle, ipc_offset_dptr));
|
||||
|
||||
ipc_hptr = new int[NUM_ELEMS];
|
||||
for (size_t idx = 0; idx < NUM_ELEMS; ++idx) {
|
||||
ipc_hptr[idx] = idx;
|
||||
}
|
||||
|
||||
HIPCHECK(hipMemset(ipc_dptr, 0x00, (NUM_ELEMS * sizeof(int))));
|
||||
HIPCHECK(hipMemcpy(ipc_dptr, ipc_hptr, (NUM_ELEMS * sizeof(int)), hipMemcpyHostToDevice));
|
||||
|
||||
mProcess->WriteHandleToShmem(ipc_handle);
|
||||
|
||||
mProcess->WaitTillAllChildReads();
|
||||
|
||||
} else {
|
||||
ipc_out_hptr = new int[NUM_ELEMS];
|
||||
memset(ipc_out_hptr, 0x00, (NUM_ELEMS * sizeof(int)));
|
||||
|
||||
hipIpcMemHandle_t ipc_handle;
|
||||
mProcess->ReadHandleFromShmem(ipc_handle);
|
||||
// Open handle to get dev_ptr
|
||||
HIPCHECK(hipIpcOpenMemHandle((void**)&ipc_out_dptr, ipc_handle, hipIpcMemLazyEnablePeerAccess));
|
||||
|
||||
HIPCHECK(hipMemcpy(ipc_out_hptr, ipc_out_dptr, (NUM_ELEMS * sizeof(int)),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (size_t idx = 0; idx < NUM_ELEMS; ++idx) {
|
||||
if (ipc_out_hptr[idx] != idx) {
|
||||
std::cout<<"Failing @ idx: "<< idx << std::endl;
|
||||
}
|
||||
}
|
||||
mProcess->NotifyParentDone();
|
||||
HIPCHECK(hipIpcCloseMemHandle(ipc_out_dptr));
|
||||
delete[] ipc_out_hptr;
|
||||
}
|
||||
|
||||
if (pid != 0) {
|
||||
delete mProcess;
|
||||
}
|
||||
|
||||
#endif /* __unix__ */
|
||||
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
multi_process((N < 64) ? N : 64, debug_test);
|
||||
passed();
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include "MultiProcess.h"
|
||||
|
||||
#define NUM_ELEMS 1024
|
||||
#define OFFSET 128
|
||||
|
||||
void multi_process(int num_process, bool debug_process) {
|
||||
|
||||
#ifdef __unix__
|
||||
|
||||
int* ipc_dptr = nullptr;
|
||||
int* ipc_hptr = nullptr;
|
||||
int* ipc_out_dptr = nullptr;
|
||||
int* ipc_out_hptr = nullptr;
|
||||
int* ipc_offset_dptr = nullptr;
|
||||
|
||||
MultiProcess<hipIpcMemHandle_t>* mProcess = new MultiProcess<hipIpcMemHandle_t>(num_process);
|
||||
mProcess->CreateShmem();
|
||||
pid_t pid = mProcess->SpawnProcess(debug_process);
|
||||
|
||||
// Parent Process
|
||||
if (pid != 0) {
|
||||
hipIpcMemHandle_t ipc_handle;
|
||||
memset(&ipc_handle, 0x00, sizeof(hipIpcMemHandle_t));
|
||||
|
||||
HIPCHECK(hipMalloc((void**)&ipc_dptr, NUM_ELEMS * sizeof(int)));
|
||||
// Add offset to the dev_ptr
|
||||
ipc_offset_dptr = ipc_dptr + OFFSET;
|
||||
// Get handle for the offsetted device_ptr
|
||||
HIPCHECK(hipIpcGetMemHandle(&ipc_handle, ipc_offset_dptr));
|
||||
|
||||
ipc_hptr = new int[NUM_ELEMS];
|
||||
for (size_t idx = 0; idx < NUM_ELEMS; ++idx) {
|
||||
ipc_hptr[idx] = idx;
|
||||
}
|
||||
|
||||
HIPCHECK(hipMemset(ipc_dptr, 0x00, (NUM_ELEMS * sizeof(int))));
|
||||
HIPCHECK(hipMemcpy(ipc_dptr, ipc_hptr, (NUM_ELEMS * sizeof(int)), hipMemcpyHostToDevice));
|
||||
|
||||
mProcess->WriteHandleToShmem(ipc_handle);
|
||||
|
||||
mProcess->WaitTillAllChildReads();
|
||||
|
||||
} else {
|
||||
ipc_out_hptr = new int[NUM_ELEMS];
|
||||
memset(ipc_out_hptr, 0x00, (NUM_ELEMS * sizeof(int)));
|
||||
|
||||
hipIpcMemHandle_t ipc_handle;
|
||||
mProcess->ReadHandleFromShmem(ipc_handle);
|
||||
// Open handle to get dev_ptr
|
||||
HIPCHECK(hipIpcOpenMemHandle((void**)&ipc_out_dptr, ipc_handle, hipIpcMemLazyEnablePeerAccess));
|
||||
|
||||
HIPCHECK(hipMemcpy(ipc_out_hptr, ipc_out_dptr, (NUM_ELEMS * sizeof(int)),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (size_t idx = 0; idx < NUM_ELEMS; ++idx) {
|
||||
if (ipc_out_hptr[idx] != idx) {
|
||||
std::cout<<"Failing @ idx: "<< idx << std::endl;
|
||||
}
|
||||
}
|
||||
mProcess->NotifyParentDone();
|
||||
HIPCHECK(hipIpcCloseMemHandle(ipc_out_dptr));
|
||||
delete[] ipc_out_hptr;
|
||||
}
|
||||
|
||||
if (pid != 0) {
|
||||
delete mProcess;
|
||||
}
|
||||
|
||||
#endif /* __unix__ */
|
||||
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
multi_process((N < 64) ? N : 64, debug_test);
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -1,49 +1,49 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
int main(void) {
|
||||
hipDeviceProp_t prop;
|
||||
int dev;
|
||||
|
||||
hipGetDevice(&dev);
|
||||
printf("ID of current HIP device: %d\n", dev);
|
||||
|
||||
memset(&prop, 0, sizeof(hipDeviceProp_t));
|
||||
prop.major = 1;
|
||||
prop.minor = 3;
|
||||
hipChooseDevice(&dev, &prop);
|
||||
printf("ID of hip device closest to revision 1.3: %d\n", dev);
|
||||
|
||||
hipSetDevice(dev);
|
||||
|
||||
passed();
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
int main(void) {
|
||||
hipDeviceProp_t prop;
|
||||
int dev;
|
||||
|
||||
hipGetDevice(&dev);
|
||||
printf("ID of current HIP device: %d\n", dev);
|
||||
|
||||
memset(&prop, 0, sizeof(hipDeviceProp_t));
|
||||
prop.major = 1;
|
||||
prop.minor = 3;
|
||||
hipChooseDevice(&dev, &prop);
|
||||
printf("ID of hip device closest to revision 1.3: %d\n", dev);
|
||||
|
||||
hipSetDevice(dev);
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -1,161 +1,161 @@
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "test_common.h"
|
||||
|
||||
// Height Width Vector
|
||||
std::vector<unsigned int> hw_vector = {2048, 1024, 512, 256, 64};
|
||||
std::vector<unsigned int> mip_vector = {8, 4, 2, 1};
|
||||
|
||||
__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width,
|
||||
int height, float level) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
outputData[y * width + x] = tex2DLod<float>(textureObject, x, y, level);
|
||||
}
|
||||
|
||||
bool runMipMapTest(unsigned int width, unsigned int height, unsigned int mipmap_level) {
|
||||
bool testResult = true;
|
||||
|
||||
printf("Width: %u Height: %u mip: %u \n", width, height, mipmap_level);
|
||||
|
||||
// Create new width & height to be tested
|
||||
unsigned int orig_width = width;
|
||||
unsigned int orig_height = height;
|
||||
width /= pow(2, mipmap_level);
|
||||
height /= pow(2, mipmap_level);
|
||||
unsigned int size = width * height * sizeof(float);
|
||||
|
||||
|
||||
float* hData = (float*)malloc(size);
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < height; i++) {
|
||||
for (int j = 0; j < width; j++) {
|
||||
hData[i * width + j] = i * width + j;
|
||||
}
|
||||
}
|
||||
printf("hData: ");
|
||||
for (int i = 0; i < 64; i++) {
|
||||
printf("%f ", hData[i]);
|
||||
if (i % width == 0) {
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat);
|
||||
HIP_ARRAY3D_DESCRIPTOR mipmapped_array_desc;
|
||||
memset(&mipmapped_array_desc, 0x00, sizeof(HIP_ARRAY3D_DESCRIPTOR));
|
||||
mipmapped_array_desc.Width = orig_width;
|
||||
mipmapped_array_desc.Height = orig_height;
|
||||
mipmapped_array_desc.Depth = 0;
|
||||
mipmapped_array_desc.Format = HIP_AD_FORMAT_FLOAT;
|
||||
mipmapped_array_desc.NumChannels = ((channelDesc.x != 0) + (channelDesc.y != 0)
|
||||
+ (channelDesc.z != 0) + (channelDesc.w != 0));
|
||||
mipmapped_array_desc.Flags = 0;
|
||||
|
||||
|
||||
hipMipmappedArray* mip_array_ptr;
|
||||
hipMipmappedArrayCreate(&mip_array_ptr, &mipmapped_array_desc, 2 * mipmap_level);
|
||||
|
||||
hipArray *hipArray = nullptr;
|
||||
HIPCHECK(hipMipmappedArrayGetLevel(&hipArray, mip_array_ptr, mipmap_level));
|
||||
HIPCHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice));
|
||||
|
||||
hipResourceDesc resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
resDesc.resType = hipResourceTypeArray;
|
||||
resDesc.res.array.array = hipArray;
|
||||
|
||||
// Specify texture object parameters
|
||||
hipTextureDesc texDesc;
|
||||
memset(&texDesc, 0, sizeof(texDesc));
|
||||
texDesc.addressMode[0] = hipAddressModeWrap;
|
||||
texDesc.addressMode[1] = hipAddressModeWrap;
|
||||
texDesc.filterMode = hipFilterModePoint;
|
||||
texDesc.readMode = hipReadModeElementType;
|
||||
texDesc.normalizedCoords = 0;
|
||||
|
||||
// Create texture object
|
||||
hipTextureObject_t textureObject = 0;
|
||||
hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL);
|
||||
|
||||
float* dData = NULL;
|
||||
hipMalloc((void**)&dData, size);
|
||||
|
||||
dim3 dimBlock(16, 16, 1);
|
||||
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
|
||||
|
||||
hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject,
|
||||
width, height, (2 * mipmap_level));
|
||||
|
||||
hipDeviceSynchronize();
|
||||
|
||||
float* hOutputData = (float*)malloc(size);
|
||||
memset(hOutputData, 0, size);
|
||||
hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost);
|
||||
|
||||
printf("dData: ");
|
||||
for (int i = 0; i < 64; i++) {
|
||||
printf("%f ", hOutputData[i]);
|
||||
if (i % width == 0) {
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
for (int i = 0; i < height; i++) {
|
||||
for (int j = 0; j < width; j++) {
|
||||
if (hData[i * width + j] != hOutputData[i * width + j]) {
|
||||
printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j],
|
||||
hOutputData[i * width + j]);
|
||||
testResult = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipDestroyTextureObject(textureObject);
|
||||
hipFree(dData);
|
||||
hipFreeArray(hipArray);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
|
||||
bool runTest(int argc, char** argv) {
|
||||
bool testResult = true;
|
||||
|
||||
for (auto& hw: hw_vector) {
|
||||
for (auto& mip: mip_vector) {
|
||||
if ((hw / static_cast<int>(pow (2,(mip * 2)))) > 0) {
|
||||
testResult |= runMipMapTest(hw, hw, mip);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
return testResult;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool testResult = true;
|
||||
|
||||
#ifdef _WIN32
|
||||
testResult = runTest(argc, argv);
|
||||
#else
|
||||
std::cout<<"Mipmaps are Supported only on windows, skipping the test"<<std::endl;
|
||||
#endif
|
||||
|
||||
if (testResult) {
|
||||
passed();
|
||||
} else {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "test_common.h"
|
||||
|
||||
// Height Width Vector
|
||||
std::vector<unsigned int> hw_vector = {2048, 1024, 512, 256, 64};
|
||||
std::vector<unsigned int> mip_vector = {8, 4, 2, 1};
|
||||
|
||||
__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width,
|
||||
int height, float level) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
outputData[y * width + x] = tex2DLod<float>(textureObject, x, y, level);
|
||||
}
|
||||
|
||||
bool runMipMapTest(unsigned int width, unsigned int height, unsigned int mipmap_level) {
|
||||
bool testResult = true;
|
||||
|
||||
printf("Width: %u Height: %u mip: %u \n", width, height, mipmap_level);
|
||||
|
||||
// Create new width & height to be tested
|
||||
unsigned int orig_width = width;
|
||||
unsigned int orig_height = height;
|
||||
width /= pow(2, mipmap_level);
|
||||
height /= pow(2, mipmap_level);
|
||||
unsigned int size = width * height * sizeof(float);
|
||||
|
||||
|
||||
float* hData = (float*)malloc(size);
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < height; i++) {
|
||||
for (int j = 0; j < width; j++) {
|
||||
hData[i * width + j] = i * width + j;
|
||||
}
|
||||
}
|
||||
printf("hData: ");
|
||||
for (int i = 0; i < 64; i++) {
|
||||
printf("%f ", hData[i]);
|
||||
if (i % width == 0) {
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat);
|
||||
HIP_ARRAY3D_DESCRIPTOR mipmapped_array_desc;
|
||||
memset(&mipmapped_array_desc, 0x00, sizeof(HIP_ARRAY3D_DESCRIPTOR));
|
||||
mipmapped_array_desc.Width = orig_width;
|
||||
mipmapped_array_desc.Height = orig_height;
|
||||
mipmapped_array_desc.Depth = 0;
|
||||
mipmapped_array_desc.Format = HIP_AD_FORMAT_FLOAT;
|
||||
mipmapped_array_desc.NumChannels = ((channelDesc.x != 0) + (channelDesc.y != 0)
|
||||
+ (channelDesc.z != 0) + (channelDesc.w != 0));
|
||||
mipmapped_array_desc.Flags = 0;
|
||||
|
||||
|
||||
hipMipmappedArray* mip_array_ptr;
|
||||
hipMipmappedArrayCreate(&mip_array_ptr, &mipmapped_array_desc, 2 * mipmap_level);
|
||||
|
||||
hipArray *hipArray = nullptr;
|
||||
HIPCHECK(hipMipmappedArrayGetLevel(&hipArray, mip_array_ptr, mipmap_level));
|
||||
HIPCHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice));
|
||||
|
||||
hipResourceDesc resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
resDesc.resType = hipResourceTypeArray;
|
||||
resDesc.res.array.array = hipArray;
|
||||
|
||||
// Specify texture object parameters
|
||||
hipTextureDesc texDesc;
|
||||
memset(&texDesc, 0, sizeof(texDesc));
|
||||
texDesc.addressMode[0] = hipAddressModeWrap;
|
||||
texDesc.addressMode[1] = hipAddressModeWrap;
|
||||
texDesc.filterMode = hipFilterModePoint;
|
||||
texDesc.readMode = hipReadModeElementType;
|
||||
texDesc.normalizedCoords = 0;
|
||||
|
||||
// Create texture object
|
||||
hipTextureObject_t textureObject = 0;
|
||||
hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL);
|
||||
|
||||
float* dData = NULL;
|
||||
hipMalloc((void**)&dData, size);
|
||||
|
||||
dim3 dimBlock(16, 16, 1);
|
||||
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
|
||||
|
||||
hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject,
|
||||
width, height, (2 * mipmap_level));
|
||||
|
||||
hipDeviceSynchronize();
|
||||
|
||||
float* hOutputData = (float*)malloc(size);
|
||||
memset(hOutputData, 0, size);
|
||||
hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost);
|
||||
|
||||
printf("dData: ");
|
||||
for (int i = 0; i < 64; i++) {
|
||||
printf("%f ", hOutputData[i]);
|
||||
if (i % width == 0) {
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
for (int i = 0; i < height; i++) {
|
||||
for (int j = 0; j < width; j++) {
|
||||
if (hData[i * width + j] != hOutputData[i * width + j]) {
|
||||
printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j],
|
||||
hOutputData[i * width + j]);
|
||||
testResult = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipDestroyTextureObject(textureObject);
|
||||
hipFree(dData);
|
||||
hipFreeArray(hipArray);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
|
||||
bool runTest(int argc, char** argv) {
|
||||
bool testResult = true;
|
||||
|
||||
for (auto& hw: hw_vector) {
|
||||
for (auto& mip: mip_vector) {
|
||||
if ((hw / static_cast<int>(pow (2,(mip * 2)))) > 0) {
|
||||
testResult |= runMipMapTest(hw, hw, mip);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
return testResult;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool testResult = true;
|
||||
|
||||
#ifdef _WIN32
|
||||
testResult = runTest(argc, argv);
|
||||
#else
|
||||
std::cout<<"Mipmaps are Supported only on windows, skipping the test"<<std::endl;
|
||||
#endif
|
||||
|
||||
if (testResult) {
|
||||
passed();
|
||||
} else {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle