SWDEV-286322 - Fix EOL in files

Change-Id: I1746e80aceeeaa4fc5df65c858f4816d99010186
Bu işleme şunda yer alıyor:
Julia Jiang
2021-05-19 14:48:53 -04:00
ebeveyn 93bd76672e
işleme e1a640bfc2
7 değiştirilmiş dosya ile 1193 ekleme ve 1193 silme
+308 -308
Dosyayı Görüntüle
@@ -1,308 +1,308 @@
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t
* HIT_END
*/
#include <iostream>
#include <chrono>
#include "test_common.h"
#include <hip/hip_vector_types.h>
#include <vector>
using namespace std;
#define NUM_TYPES 3
vector<string> types= {"float", "float2", "float4"};
vector<unsigned int> typeSizes = {4, 8, 16};
#define NUM_SIZES 12
vector<unsigned int> sizes = {1, 2, 4, 8, 16, 32,
64, 128, 256, 512, 1024, 2048};
#define NUM_BUFS 6
#define MAX_BUFS (1 << (NUM_BUFS - 1))
template <typename T>
__global__ void sampleRate(T * outBuffer, unsigned int inBufSize, unsigned int writeIt,
T **inBuffer, int numBufs) {
uint gid = (blockIdx.x * blockDim.x + threadIdx.x);
uint inputIdx = gid % inBufSize;
T tmp = (T)0.0f;
for(int i = 0; i < numBufs; i++) {
tmp += *(*(inBuffer+i)+inputIdx);
}
if (writeIt*(unsigned int)tmp.x) {
outBuffer[gid] = tmp;
}
};
template <typename T>
__global__ void sampleRateFloat(T * outBuffer, unsigned int inBufSize, unsigned int writeIt,
T ** inBuffer, int numBufs) {
uint gid = (blockIdx.x * blockDim.x + threadIdx.x);
uint inputIdx = gid % inBufSize;
T tmp = (T)0.0f;
for(int i = 0; i < numBufs; i++) {
tmp += *((*inBuffer+i)+inputIdx);
}
if (writeIt*(unsigned int)tmp) {
outBuffer[gid] = tmp;
}
};
class hipPerfSampleRate {
public:
hipPerfSampleRate();
~hipPerfSampleRate();
void open(void);
void run(unsigned int testCase);
void close(void);
// array of funtion pointers
typedef void (hipPerfSampleRate::*funPtr)(void * outBuffer, unsigned int
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
int threads_per_block);
// Wrappers
void float_kernel(void * outBuffer, unsigned int
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
int threads_per_block);
void float2_kernel(void * outBuffer, unsigned int
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
int threads_per_block);
void float4_kernel(void * outBuffer, unsigned int
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
int threads_per_block);
private:
void setData(void *ptr, unsigned int value);
void checkData(uint *ptr);
unsigned int width_;
unsigned int bufSize_;
unsigned long long totalIters = 0;
int numCUs;
unsigned int outBufSize_;
static const unsigned int MAX_ITERATIONS = 25;
unsigned int numBufs_;
unsigned int typeIdx_;
};
hipPerfSampleRate::hipPerfSampleRate() {}
hipPerfSampleRate::~hipPerfSampleRate() {}
void hipPerfSampleRate::open(void) {
int nGpu = 0;
HIPCHECK(hipGetDeviceCount(&nGpu));
if (nGpu < 1) {
std::cout << "info: didn't find any GPU! skipping the test!\n";
passed();
return;
}
int deviceId = 0;
hipDeviceProp_t props = {0};
props = {0};
HIPCHECK(hipSetDevice(deviceId));
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
<< std::endl;
numCUs = props.multiProcessorCount;
}
void hipPerfSampleRate::close() {
}
// Wrappers for the kernel launches
void hipPerfSampleRate::float_kernel(void * outBuffer, unsigned int inBufSize,
unsigned int writeIt, void **inBuffer,
int numBufs, int grids, int blocks, int threads_per_block) {
hipLaunchKernelGGL(sampleRateFloat<float>, dim3(grids, grids, grids), dim3 (blocks), 0, 0,
(float*)outBuffer, inBufSize, writeIt, (float**)inBuffer, numBufs);
}
void hipPerfSampleRate::float2_kernel(void * outBuffer, unsigned int inBufSize,
unsigned int writeIt, void **inBuffer,
int grids, int blocks, int threads_per_block, int numBufs) {
hipLaunchKernelGGL(sampleRate<float2>, dim3(grids, grids, grids), dim3(blocks), 0, 0,
(float2 *)outBuffer, inBufSize, writeIt, (float2**)inBuffer, numBufs);
}
void hipPerfSampleRate::float4_kernel(void * outBuffer, unsigned int inBufSize,
unsigned int writeIt, void **inBuffer,
int grids, int blocks, int threads_per_block, int numBufs) {
hipLaunchKernelGGL(sampleRate<float4>, dim3(grids, grids, grids), dim3(blocks), 0, 0,
(float4 *) outBuffer, inBufSize, writeIt, (float4**)inBuffer, numBufs);
}
void hipPerfSampleRate::run(unsigned int test) {
funPtr p[] = {&hipPerfSampleRate::float_kernel, &hipPerfSampleRate::float2_kernel,
&hipPerfSampleRate::float4_kernel};
// We compute a square domain
width_ = sizes[test % NUM_SIZES];
typeIdx_ = (test / NUM_SIZES) % NUM_TYPES;
bufSize_ = width_ * width_ * typeSizes[typeIdx_];
numBufs_ = (1 << (test / (NUM_SIZES * NUM_TYPES)));
void * hOutPtr;
void * dOutPtr;
void * hInPtr[numBufs_];
void ** dPtr;
void * dInPtr[numBufs_];
outBufSize_ =
sizes[NUM_SIZES - 1] * sizes[NUM_SIZES - 1] * typeSizes[NUM_TYPES - 1];
// Allocate memory on the host and device
HIPCHECK(hipHostMalloc((void **)&hOutPtr, outBufSize_, hipHostMallocDefault));
setData((void *)hOutPtr, 0xdeadbeef);
HIPCHECK(hipMalloc((uint **)&dOutPtr, outBufSize_));
// Allocate 2D array in Device
hipMalloc((void **)&dPtr, numBufs_* sizeof(void *));
for (uint i = 0; i < numBufs_; i++) {
HIPCHECK(hipHostMalloc((void **)&hInPtr[i], bufSize_, hipHostMallocDefault));
HIPCHECK(hipMalloc((uint **)&dInPtr[i], bufSize_));
setData(hInPtr[i], 0x3f800000);
}
// Populate array of pointers with array addresses
hipMemcpy(dPtr, dInPtr, numBufs_* sizeof(void *), hipMemcpyHostToDevice);
// Copy memory from host to device
for (uint i = 0; i < numBufs_; i++) {
HIPCHECK(hipMemcpy(dInPtr[i], hInPtr[i], bufSize_, hipMemcpyHostToDevice));
}
HIPCHECK(hipMemcpy(dOutPtr, hOutPtr, outBufSize_, hipMemcpyHostToDevice));
// Prepare kernel launch parameters
// outBufSize_/sizeof(uint) - Grid size in 3D
int grids = 64;
int blocks = 64;
int threads_per_block = 1;
unsigned int maxIter = MAX_ITERATIONS * (MAX_BUFS / numBufs_);
unsigned int sizeDW = width_ * width_;
unsigned int writeIt = 0;
int idx = 0;
if (!types[typeIdx_].compare("float")) {
idx = 0;
}
else if(!types[typeIdx_].compare("float2")) {
idx = 1;
}
else if(!types[typeIdx_].compare("float4")) {
idx = 2;
}
// Time the kernel execution
auto all_start = std::chrono::steady_clock::now();
for (uint i = 0; i < maxIter; i++) {
(this->*p[idx]) ((void *)dOutPtr, sizeDW, writeIt, dPtr, numBufs_, grids, blocks,
threads_per_block);
}
hipDeviceSynchronize();
auto all_end = std::chrono::steady_clock::now();
std::chrono::duration<double> all_kernel_time = all_end - all_start;
double perf = ((double)outBufSize_ * numBufs_ * (double)maxIter * (double)(1e-09)) /
all_kernel_time.count();
cout << "Domain " << sizes[NUM_SIZES - 1] << "x"<< sizes[NUM_SIZES - 1] << " bufs "
<< numBufs_ << " " << types[typeIdx_] << " " << width_<<"x"<<width_<< " (GB/s) "
<< perf << endl;
HIPCHECK(hipFree(dOutPtr));
// Free host and device memory
for (uint i = 0; i < numBufs_; i++) {
HIPCHECK(hipFree(hInPtr[i]));
HIPCHECK(hipFree(dInPtr[i]));
}
HIPCHECK(hipFree(hOutPtr));
HIPCHECK(hipFree(dPtr));
}
void hipPerfSampleRate::setData(void *ptr, unsigned int value) {
unsigned int *ptr2 = (unsigned int *)ptr;
for (unsigned int i = 0; i < bufSize_ / sizeof(unsigned int); i++) {
ptr2[i] = value;
}
}
void hipPerfSampleRate::checkData(uint *ptr) {
for (unsigned int i = 0; i < outBufSize_ / sizeof(float); i++) {
if (ptr[i] != (float)numBufs_) {
cout << "Data validation failed at "<< i << " Got "<< ptr[i] << ", expected "
<< (float)numBufs_;
break;
}
}
}
int main(int argc, char* argv[]) {
hipPerfSampleRate sampleTypes;
sampleTypes.open();
for (unsigned int testCase = 0; testCase < 216 ; testCase+=36) {
sampleTypes.run(testCase);
}
passed();
}
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t
* HIT_END
*/
#include <iostream>
#include <chrono>
#include "test_common.h"
#include <hip/hip_vector_types.h>
#include <vector>
using namespace std;
#define NUM_TYPES 3
vector<string> types= {"float", "float2", "float4"};
vector<unsigned int> typeSizes = {4, 8, 16};
#define NUM_SIZES 12
vector<unsigned int> sizes = {1, 2, 4, 8, 16, 32,
64, 128, 256, 512, 1024, 2048};
#define NUM_BUFS 6
#define MAX_BUFS (1 << (NUM_BUFS - 1))
template <typename T>
__global__ void sampleRate(T * outBuffer, unsigned int inBufSize, unsigned int writeIt,
T **inBuffer, int numBufs) {
uint gid = (blockIdx.x * blockDim.x + threadIdx.x);
uint inputIdx = gid % inBufSize;
T tmp = (T)0.0f;
for(int i = 0; i < numBufs; i++) {
tmp += *(*(inBuffer+i)+inputIdx);
}
if (writeIt*(unsigned int)tmp.x) {
outBuffer[gid] = tmp;
}
};
template <typename T>
__global__ void sampleRateFloat(T * outBuffer, unsigned int inBufSize, unsigned int writeIt,
T ** inBuffer, int numBufs) {
uint gid = (blockIdx.x * blockDim.x + threadIdx.x);
uint inputIdx = gid % inBufSize;
T tmp = (T)0.0f;
for(int i = 0; i < numBufs; i++) {
tmp += *((*inBuffer+i)+inputIdx);
}
if (writeIt*(unsigned int)tmp) {
outBuffer[gid] = tmp;
}
};
class hipPerfSampleRate {
public:
hipPerfSampleRate();
~hipPerfSampleRate();
void open(void);
void run(unsigned int testCase);
void close(void);
// array of funtion pointers
typedef void (hipPerfSampleRate::*funPtr)(void * outBuffer, unsigned int
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
int threads_per_block);
// Wrappers
void float_kernel(void * outBuffer, unsigned int
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
int threads_per_block);
void float2_kernel(void * outBuffer, unsigned int
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
int threads_per_block);
void float4_kernel(void * outBuffer, unsigned int
inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks,
int threads_per_block);
private:
void setData(void *ptr, unsigned int value);
void checkData(uint *ptr);
unsigned int width_;
unsigned int bufSize_;
unsigned long long totalIters = 0;
int numCUs;
unsigned int outBufSize_;
static const unsigned int MAX_ITERATIONS = 25;
unsigned int numBufs_;
unsigned int typeIdx_;
};
hipPerfSampleRate::hipPerfSampleRate() {}
hipPerfSampleRate::~hipPerfSampleRate() {}
void hipPerfSampleRate::open(void) {
int nGpu = 0;
HIPCHECK(hipGetDeviceCount(&nGpu));
if (nGpu < 1) {
std::cout << "info: didn't find any GPU! skipping the test!\n";
passed();
return;
}
int deviceId = 0;
hipDeviceProp_t props = {0};
props = {0};
HIPCHECK(hipSetDevice(deviceId));
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
<< std::endl;
numCUs = props.multiProcessorCount;
}
void hipPerfSampleRate::close() {
}
// Wrappers for the kernel launches
void hipPerfSampleRate::float_kernel(void * outBuffer, unsigned int inBufSize,
unsigned int writeIt, void **inBuffer,
int numBufs, int grids, int blocks, int threads_per_block) {
hipLaunchKernelGGL(sampleRateFloat<float>, dim3(grids, grids, grids), dim3 (blocks), 0, 0,
(float*)outBuffer, inBufSize, writeIt, (float**)inBuffer, numBufs);
}
void hipPerfSampleRate::float2_kernel(void * outBuffer, unsigned int inBufSize,
unsigned int writeIt, void **inBuffer,
int grids, int blocks, int threads_per_block, int numBufs) {
hipLaunchKernelGGL(sampleRate<float2>, dim3(grids, grids, grids), dim3(blocks), 0, 0,
(float2 *)outBuffer, inBufSize, writeIt, (float2**)inBuffer, numBufs);
}
void hipPerfSampleRate::float4_kernel(void * outBuffer, unsigned int inBufSize,
unsigned int writeIt, void **inBuffer,
int grids, int blocks, int threads_per_block, int numBufs) {
hipLaunchKernelGGL(sampleRate<float4>, dim3(grids, grids, grids), dim3(blocks), 0, 0,
(float4 *) outBuffer, inBufSize, writeIt, (float4**)inBuffer, numBufs);
}
void hipPerfSampleRate::run(unsigned int test) {
funPtr p[] = {&hipPerfSampleRate::float_kernel, &hipPerfSampleRate::float2_kernel,
&hipPerfSampleRate::float4_kernel};
// We compute a square domain
width_ = sizes[test % NUM_SIZES];
typeIdx_ = (test / NUM_SIZES) % NUM_TYPES;
bufSize_ = width_ * width_ * typeSizes[typeIdx_];
numBufs_ = (1 << (test / (NUM_SIZES * NUM_TYPES)));
void * hOutPtr;
void * dOutPtr;
void * hInPtr[numBufs_];
void ** dPtr;
void * dInPtr[numBufs_];
outBufSize_ =
sizes[NUM_SIZES - 1] * sizes[NUM_SIZES - 1] * typeSizes[NUM_TYPES - 1];
// Allocate memory on the host and device
HIPCHECK(hipHostMalloc((void **)&hOutPtr, outBufSize_, hipHostMallocDefault));
setData((void *)hOutPtr, 0xdeadbeef);
HIPCHECK(hipMalloc((uint **)&dOutPtr, outBufSize_));
// Allocate 2D array in Device
hipMalloc((void **)&dPtr, numBufs_* sizeof(void *));
for (uint i = 0; i < numBufs_; i++) {
HIPCHECK(hipHostMalloc((void **)&hInPtr[i], bufSize_, hipHostMallocDefault));
HIPCHECK(hipMalloc((uint **)&dInPtr[i], bufSize_));
setData(hInPtr[i], 0x3f800000);
}
// Populate array of pointers with array addresses
hipMemcpy(dPtr, dInPtr, numBufs_* sizeof(void *), hipMemcpyHostToDevice);
// Copy memory from host to device
for (uint i = 0; i < numBufs_; i++) {
HIPCHECK(hipMemcpy(dInPtr[i], hInPtr[i], bufSize_, hipMemcpyHostToDevice));
}
HIPCHECK(hipMemcpy(dOutPtr, hOutPtr, outBufSize_, hipMemcpyHostToDevice));
// Prepare kernel launch parameters
// outBufSize_/sizeof(uint) - Grid size in 3D
int grids = 64;
int blocks = 64;
int threads_per_block = 1;
unsigned int maxIter = MAX_ITERATIONS * (MAX_BUFS / numBufs_);
unsigned int sizeDW = width_ * width_;
unsigned int writeIt = 0;
int idx = 0;
if (!types[typeIdx_].compare("float")) {
idx = 0;
}
else if(!types[typeIdx_].compare("float2")) {
idx = 1;
}
else if(!types[typeIdx_].compare("float4")) {
idx = 2;
}
// Time the kernel execution
auto all_start = std::chrono::steady_clock::now();
for (uint i = 0; i < maxIter; i++) {
(this->*p[idx]) ((void *)dOutPtr, sizeDW, writeIt, dPtr, numBufs_, grids, blocks,
threads_per_block);
}
hipDeviceSynchronize();
auto all_end = std::chrono::steady_clock::now();
std::chrono::duration<double> all_kernel_time = all_end - all_start;
double perf = ((double)outBufSize_ * numBufs_ * (double)maxIter * (double)(1e-09)) /
all_kernel_time.count();
cout << "Domain " << sizes[NUM_SIZES - 1] << "x"<< sizes[NUM_SIZES - 1] << " bufs "
<< numBufs_ << " " << types[typeIdx_] << " " << width_<<"x"<<width_<< " (GB/s) "
<< perf << endl;
HIPCHECK(hipFree(dOutPtr));
// Free host and device memory
for (uint i = 0; i < numBufs_; i++) {
HIPCHECK(hipFree(hInPtr[i]));
HIPCHECK(hipFree(dInPtr[i]));
}
HIPCHECK(hipFree(hOutPtr));
HIPCHECK(hipFree(dPtr));
}
void hipPerfSampleRate::setData(void *ptr, unsigned int value) {
unsigned int *ptr2 = (unsigned int *)ptr;
for (unsigned int i = 0; i < bufSize_ / sizeof(unsigned int); i++) {
ptr2[i] = value;
}
}
void hipPerfSampleRate::checkData(uint *ptr) {
for (unsigned int i = 0; i < outBufSize_ / sizeof(float); i++) {
if (ptr[i] != (float)numBufs_) {
cout << "Data validation failed at "<< i << " Got "<< ptr[i] << ", expected "
<< (float)numBufs_;
break;
}
}
}
int main(int argc, char* argv[]) {
hipPerfSampleRate sampleTypes;
sampleTypes.open();
for (unsigned int testCase = 0; testCase < 216 ; testCase+=36) {
sampleTypes.run(testCase);
}
passed();
}
+289 -289
Dosyayı Görüntüle
@@ -1,289 +1,289 @@
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t
* HIT_END
*/
#include <iostream>
#include <chrono>
#include "test_common.h"
typedef struct {
double x;
double y;
double width;
} coordRec;
static coordRec coords[] = {
{0.0, 0.0, 0.00001}, // All black
};
static unsigned int numCoords = sizeof(coords) / sizeof(coordRec);
__global__ void mandelbrot(uint *out, uint width, float xPos, float yPos, float xStep,
float yStep, uint maxIter) {
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
int i = tid % width;
int j = tid / width;
float x0 = (float)(xPos + xStep*i);
float y0 = (float)(yPos + yStep*j);
float x = x0;
float y = y0;
uint iter = 0;
float tmp;
for (iter = 0; (x*x + y*y <= 4.0f) && (iter < maxIter); iter++) {
tmp = x;
x = fma(-y,y,fma(x,x,x0));
y = fma(2.0f*tmp,y,y0);
}
out[tid] = iter;
};
class hipPerfDeviceConcurrency {
public:
hipPerfDeviceConcurrency();
~hipPerfDeviceConcurrency();
void setNumGpus(unsigned int num) {
numDevices = num;
}
unsigned int getNumGpus() {
return numDevices;
}
void open(void);
void close(void);
void run(unsigned int testCase, int numGpus);
private:
void setData(void *ptr, unsigned int value);
void checkData(uint *ptr);
unsigned int numDevices;
unsigned int width_;
unsigned int bufSize;
unsigned int coordIdx;
unsigned long long totalIters = 0;
};
hipPerfDeviceConcurrency::hipPerfDeviceConcurrency() {}
hipPerfDeviceConcurrency::~hipPerfDeviceConcurrency() {}
void hipPerfDeviceConcurrency::open(void) {
int nGpu = 0;
HIPCHECK(hipGetDeviceCount(&nGpu));
setNumGpus(nGpu);
if (nGpu < 1) {
std::cout << "info: didn't find any GPU! skipping the test!\n";
passed();
}
}
void hipPerfDeviceConcurrency::close() {
}
void hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
static int deviceId;
uint * hPtr[numGpus];
uint * dPtr[numGpus];
hipStream_t streams[numGpus];
int numCUs[numGpus];
unsigned int maxIter[numGpus];
unsigned long long expectedIters[numGpus];
int threads, threads_per_block, blocks;
float xStep, yStep, xPos, yPos;
for(int i = 0; i < numGpus; i++) {
if(testCase != 0) {
deviceId = i;
}
HIPCHECK(hipSetDevice(deviceId));
hipDeviceProp_t props = {0};
HIPCHECK(hipGetDeviceProperties(&props, i));
if (testCase != 0) {
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
<< " with " << props.multiProcessorCount << " CUs" << " and device ID: "
<< i << std::endl;
}
numCUs[i] = props.multiProcessorCount;
int clkFrequency = 0;
HIPCHECK(hipDeviceGetAttribute(&clkFrequency, hipDeviceAttributeClockRate, i));
clkFrequency =(unsigned int)clkFrequency/1000;
// Maximum iteration count
// maxIter = 8388608 * (engine_clock / 1000).serial execution
maxIter[i] = (unsigned int)(((8388608 * ((float)clkFrequency / 1000)) * numCUs[i]) / 128);
maxIter[i] = (maxIter[i] + 15) & ~15;
// Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once.
width_ = 256;
bufSize = width_ * width_ * sizeof(uint);
// Create streams for concurrency
HIPCHECK(hipStreamCreate(&streams[i]));
// Allocate memory on the host and device
HIPCHECK(hipHostMalloc((void **)&hPtr[i], bufSize, hipHostMallocDefault));
setData(hPtr[i], 0xdeadbeef);
HIPCHECK(hipMalloc((uint **)&dPtr[i], bufSize))
// Prepare kernel launch parameters
threads = (bufSize/sizeof(uint));
threads_per_block = 64;
blocks = (threads/threads_per_block) + (threads % threads_per_block);
coordIdx = testCase % numCoords;
xStep = (float)(coords[coordIdx].width / (double)width_);
yStep = (float)(-coords[coordIdx].width / (double)width_);
xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width);
yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width);
// Copy memory from host to device
HIPCHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice));
}
// Time the kernel execution
auto all_start = std::chrono::steady_clock::now();
for(int i = 0; i < numGpus; i++) {
if(testCase != 0) {
deviceId = i;
}
HIPCHECK(hipSetDevice(deviceId));
hipLaunchKernelGGL(mandelbrot, dim3(blocks), dim3(threads_per_block), 0, streams[i],
dPtr[i], width_, xPos, yPos, xStep, yStep, maxIter[i]);
}
for(int i = 0; i < numGpus; i++) {
HIPCHECK(hipStreamSynchronize(0));
}
auto all_end = std::chrono::steady_clock::now();
std::chrono::duration<double> all_kernel_time = all_end - all_start;
for(int i = 0; i < numGpus; i++) {
if(testCase != 0) {
deviceId = i;
}
HIPCHECK(hipSetDevice(deviceId));
// Copy data back from device to the host
HIPCHECK(hipMemcpy(hPtr[i], dPtr[i], bufSize, hipMemcpyDeviceToHost));
checkData(hPtr[i]);
expectedIters[i] = width_ * width_ * (unsigned long long) maxIter[i];
if (testCase != 0) {
checkData(hPtr[i]);
if(totalIters != expectedIters[i]) {
std::cout << "Incorrect iteration count detected" << std::endl;
}
}
HIPCHECK(hipStreamDestroy(streams[i]));
// Free host and device memory
HIPCHECK(hipFree(hPtr[i]));
HIPCHECK(hipFree(dPtr[i]));
}
if (testCase != 0) {
std::cout << '\n' << "Measured time for kernel computation on " << numGpus << " device (s): "
<< all_kernel_time.count() << " (s) " << '\n' << std::endl;
}
if(testCase == 0) {
deviceId++;
}
}
void hipPerfDeviceConcurrency::setData(void *ptr, unsigned int value) {
unsigned int *ptr2 = (unsigned int *)ptr;
for (unsigned int i = 0; i < width_ * width_ ; i++) {
ptr2[i] = value;
}
}
void hipPerfDeviceConcurrency::checkData(uint *ptr) {
totalIters = 0;
for (unsigned int i = 0; i < width_ * width_; i++) {
totalIters += ptr[i];
}
}
int main(int argc, char* argv[]) {
hipPerfDeviceConcurrency deviceConcurrency;
deviceConcurrency.open();
int nGpu = deviceConcurrency.getNumGpus();
// testCase = 0 refers to warmup kernel run
int testCase = 0;
for (int i = 0; i < nGpu; i++) {
// Warm-up kernel on all devices
deviceConcurrency.run(testCase, 1);
}
// Time for kernel on 1 device
deviceConcurrency.run(++testCase, 1);
// Time for kernel on all available devices
deviceConcurrency.run(++testCase, nGpu);
passed();
}
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t
* HIT_END
*/
#include <iostream>
#include <chrono>
#include "test_common.h"
typedef struct {
double x;
double y;
double width;
} coordRec;
static coordRec coords[] = {
{0.0, 0.0, 0.00001}, // All black
};
static unsigned int numCoords = sizeof(coords) / sizeof(coordRec);
__global__ void mandelbrot(uint *out, uint width, float xPos, float yPos, float xStep,
float yStep, uint maxIter) {
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
int i = tid % width;
int j = tid / width;
float x0 = (float)(xPos + xStep*i);
float y0 = (float)(yPos + yStep*j);
float x = x0;
float y = y0;
uint iter = 0;
float tmp;
for (iter = 0; (x*x + y*y <= 4.0f) && (iter < maxIter); iter++) {
tmp = x;
x = fma(-y,y,fma(x,x,x0));
y = fma(2.0f*tmp,y,y0);
}
out[tid] = iter;
};
class hipPerfDeviceConcurrency {
public:
hipPerfDeviceConcurrency();
~hipPerfDeviceConcurrency();
void setNumGpus(unsigned int num) {
numDevices = num;
}
unsigned int getNumGpus() {
return numDevices;
}
void open(void);
void close(void);
void run(unsigned int testCase, int numGpus);
private:
void setData(void *ptr, unsigned int value);
void checkData(uint *ptr);
unsigned int numDevices;
unsigned int width_;
unsigned int bufSize;
unsigned int coordIdx;
unsigned long long totalIters = 0;
};
hipPerfDeviceConcurrency::hipPerfDeviceConcurrency() {}
hipPerfDeviceConcurrency::~hipPerfDeviceConcurrency() {}
void hipPerfDeviceConcurrency::open(void) {
int nGpu = 0;
HIPCHECK(hipGetDeviceCount(&nGpu));
setNumGpus(nGpu);
if (nGpu < 1) {
std::cout << "info: didn't find any GPU! skipping the test!\n";
passed();
}
}
void hipPerfDeviceConcurrency::close() {
}
void hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
static int deviceId;
uint * hPtr[numGpus];
uint * dPtr[numGpus];
hipStream_t streams[numGpus];
int numCUs[numGpus];
unsigned int maxIter[numGpus];
unsigned long long expectedIters[numGpus];
int threads, threads_per_block, blocks;
float xStep, yStep, xPos, yPos;
for(int i = 0; i < numGpus; i++) {
if(testCase != 0) {
deviceId = i;
}
HIPCHECK(hipSetDevice(deviceId));
hipDeviceProp_t props = {0};
HIPCHECK(hipGetDeviceProperties(&props, i));
if (testCase != 0) {
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
<< " with " << props.multiProcessorCount << " CUs" << " and device ID: "
<< i << std::endl;
}
numCUs[i] = props.multiProcessorCount;
int clkFrequency = 0;
HIPCHECK(hipDeviceGetAttribute(&clkFrequency, hipDeviceAttributeClockRate, i));
clkFrequency =(unsigned int)clkFrequency/1000;
// Maximum iteration count
// maxIter = 8388608 * (engine_clock / 1000).serial execution
maxIter[i] = (unsigned int)(((8388608 * ((float)clkFrequency / 1000)) * numCUs[i]) / 128);
maxIter[i] = (maxIter[i] + 15) & ~15;
// Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once.
width_ = 256;
bufSize = width_ * width_ * sizeof(uint);
// Create streams for concurrency
HIPCHECK(hipStreamCreate(&streams[i]));
// Allocate memory on the host and device
HIPCHECK(hipHostMalloc((void **)&hPtr[i], bufSize, hipHostMallocDefault));
setData(hPtr[i], 0xdeadbeef);
HIPCHECK(hipMalloc((uint **)&dPtr[i], bufSize))
// Prepare kernel launch parameters
threads = (bufSize/sizeof(uint));
threads_per_block = 64;
blocks = (threads/threads_per_block) + (threads % threads_per_block);
coordIdx = testCase % numCoords;
xStep = (float)(coords[coordIdx].width / (double)width_);
yStep = (float)(-coords[coordIdx].width / (double)width_);
xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width);
yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width);
// Copy memory from host to device
HIPCHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice));
}
// Time the kernel execution
auto all_start = std::chrono::steady_clock::now();
for(int i = 0; i < numGpus; i++) {
if(testCase != 0) {
deviceId = i;
}
HIPCHECK(hipSetDevice(deviceId));
hipLaunchKernelGGL(mandelbrot, dim3(blocks), dim3(threads_per_block), 0, streams[i],
dPtr[i], width_, xPos, yPos, xStep, yStep, maxIter[i]);
}
for(int i = 0; i < numGpus; i++) {
HIPCHECK(hipStreamSynchronize(0));
}
auto all_end = std::chrono::steady_clock::now();
std::chrono::duration<double> all_kernel_time = all_end - all_start;
for(int i = 0; i < numGpus; i++) {
if(testCase != 0) {
deviceId = i;
}
HIPCHECK(hipSetDevice(deviceId));
// Copy data back from device to the host
HIPCHECK(hipMemcpy(hPtr[i], dPtr[i], bufSize, hipMemcpyDeviceToHost));
checkData(hPtr[i]);
expectedIters[i] = width_ * width_ * (unsigned long long) maxIter[i];
if (testCase != 0) {
checkData(hPtr[i]);
if(totalIters != expectedIters[i]) {
std::cout << "Incorrect iteration count detected" << std::endl;
}
}
HIPCHECK(hipStreamDestroy(streams[i]));
// Free host and device memory
HIPCHECK(hipFree(hPtr[i]));
HIPCHECK(hipFree(dPtr[i]));
}
if (testCase != 0) {
std::cout << '\n' << "Measured time for kernel computation on " << numGpus << " device (s): "
<< all_kernel_time.count() << " (s) " << '\n' << std::endl;
}
if(testCase == 0) {
deviceId++;
}
}
void hipPerfDeviceConcurrency::setData(void *ptr, unsigned int value) {
unsigned int *ptr2 = (unsigned int *)ptr;
for (unsigned int i = 0; i < width_ * width_ ; i++) {
ptr2[i] = value;
}
}
void hipPerfDeviceConcurrency::checkData(uint *ptr) {
totalIters = 0;
for (unsigned int i = 0; i < width_ * width_; i++) {
totalIters += ptr[i];
}
}
int main(int argc, char* argv[]) {
hipPerfDeviceConcurrency deviceConcurrency;
deviceConcurrency.open();
int nGpu = deviceConcurrency.getNumGpus();
// testCase = 0 refers to warmup kernel run
int testCase = 0;
for (int i = 0; i < nGpu; i++) {
// Warm-up kernel on all devices
deviceConcurrency.run(testCase, 1);
}
// Time for kernel on 1 device
deviceConcurrency.run(++testCase, 1);
// Time for kernel on all available devices
deviceConcurrency.run(++testCase, nGpu);
passed();
}
+157 -157
Dosyayı Görüntüle
@@ -1,157 +1,157 @@
#pragma once
#ifdef __unix__
#include <string>
#include <atomic>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
template <typename T>
struct Shmem {
std::atomic<T> handle_;
std::atomic<int> done_counter_;
};
template <typename T>
struct ShmemMeta {
std::string shmem_name_;
int shmem_fd_;
Shmem<T>* shmem_;
};
template <typename T>
class MultiProcess {
public:
MultiProcess(size_t num_proc) : num_proc_(num_proc) {}
~MultiProcess();
void DebugInfo(pid_t pid);
pid_t SpawnProcess(bool debug_bkpt);
bool CreateShmem();
bool WriteHandleToShmem(T ipc_handle);
bool WaitTillAllChildReads();
bool ReadHandleFromShmem(T& ipc_handle);
bool NotifyParentDone();
private:
const size_t num_proc_;
bool debug_proc_;
ShmemMeta<T> shmem_meta_obj_;
};
// Template Implementations
template <typename T>
MultiProcess<T>::~MultiProcess() {
if(munmap(shmem_meta_obj_.shmem_, sizeof(Shmem<T>)) < 0) {
std::cout<<"Error Unmapping shared memory "<<std::endl;
exit(0);
}
}
template <typename T>
void MultiProcess<T>::DebugInfo(pid_t pid) {
const int delay = 1;
if (pid == 0) {
std::cout<<" Child Process with ID: "<<getpid()<<std::endl;
} else {
std::cout<<" Parent Process with ID: "<<getpid()<<std::endl;
}
volatile int flag = 0;
while (!flag) {
sleep(delay);
}
}
template <typename T>
pid_t MultiProcess<T>::SpawnProcess(bool debug_bkpt) {
if (num_proc_ < 0) {
std::cout<<"Num Process cannot be less than 1"<<std::endl;
return -1;
}
pid_t pid;
for (size_t proc_idx = 0; proc_idx < num_proc_; ++proc_idx) {
pid = fork();
if (pid < 0) {
std::cout<<"Fork Failed"<<std::endl;
assert(false);
} else if (pid == 0) {
//Child Process, so break
break;
}
}
if (debug_bkpt) {
DebugInfo(pid);
}
return pid;
}
template <typename T>
bool MultiProcess<T>::CreateShmem() {
if (num_proc_ < 0) {
std::cout<<"Num Process cannot be less than 1"<<std::endl;
return false;
}
char name_template[] = "/tmp/eventXXXXX";
int temp_fd = mkstemp(name_template);
shmem_meta_obj_.shmem_name_ = name_template;
shmem_meta_obj_.shmem_name_.replace(0, 5, "/hip_");
shmem_meta_obj_.shmem_fd_ = shm_open(shmem_meta_obj_.shmem_name_.c_str(),
O_RDWR | O_CREAT, 0777);
if (ftruncate(shmem_meta_obj_.shmem_fd_, sizeof(ShmemMeta<T>)) != 0) {
std::cout<<"Cannot FTruncate "<<std::endl;
exit(0);
}
shmem_meta_obj_.shmem_ = (Shmem<T>*)mmap(0, sizeof(Shmem<T>), PROT_READ | PROT_WRITE,
MAP_SHARED, shmem_meta_obj_.shmem_fd_, 0);
memset(&shmem_meta_obj_.shmem_->handle_, 0x00, sizeof(T));
shmem_meta_obj_.shmem_->done_counter_ = -1;
return true;
}
template <typename T>
bool MultiProcess<T>::WriteHandleToShmem(T ipc_handle) {
memcpy(&shmem_meta_obj_.shmem_->handle_, &ipc_handle, sizeof(T));
shmem_meta_obj_.shmem_->done_counter_ = 0;
return true;
}
template <typename T>
bool MultiProcess<T>::WaitTillAllChildReads() {
size_t write_count = 0;
while (shmem_meta_obj_.shmem_->done_counter_ != num_proc_) {
++write_count;
}
return true;
}
template <typename T>
bool MultiProcess<T>::ReadHandleFromShmem(T& ipc_handle) {
size_t read_count = 0;
while (shmem_meta_obj_.shmem_->done_counter_ == -1) {
++read_count;
}
memcpy(&ipc_handle, &shmem_meta_obj_.shmem_->handle_, sizeof(T));
return true;
}
template <typename T>
bool MultiProcess<T>::NotifyParentDone() {
++shmem_meta_obj_.shmem_->done_counter_;
return true;
}
#endif /* __unix__ */
#pragma once
#ifdef __unix__
#include <string>
#include <atomic>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
template <typename T>
struct Shmem {
std::atomic<T> handle_;
std::atomic<int> done_counter_;
};
template <typename T>
struct ShmemMeta {
std::string shmem_name_;
int shmem_fd_;
Shmem<T>* shmem_;
};
template <typename T>
class MultiProcess {
public:
MultiProcess(size_t num_proc) : num_proc_(num_proc) {}
~MultiProcess();
void DebugInfo(pid_t pid);
pid_t SpawnProcess(bool debug_bkpt);
bool CreateShmem();
bool WriteHandleToShmem(T ipc_handle);
bool WaitTillAllChildReads();
bool ReadHandleFromShmem(T& ipc_handle);
bool NotifyParentDone();
private:
const size_t num_proc_;
bool debug_proc_;
ShmemMeta<T> shmem_meta_obj_;
};
// Template Implementations
template <typename T>
MultiProcess<T>::~MultiProcess() {
if(munmap(shmem_meta_obj_.shmem_, sizeof(Shmem<T>)) < 0) {
std::cout<<"Error Unmapping shared memory "<<std::endl;
exit(0);
}
}
template <typename T>
void MultiProcess<T>::DebugInfo(pid_t pid) {
const int delay = 1;
if (pid == 0) {
std::cout<<" Child Process with ID: "<<getpid()<<std::endl;
} else {
std::cout<<" Parent Process with ID: "<<getpid()<<std::endl;
}
volatile int flag = 0;
while (!flag) {
sleep(delay);
}
}
template <typename T>
pid_t MultiProcess<T>::SpawnProcess(bool debug_bkpt) {
if (num_proc_ < 0) {
std::cout<<"Num Process cannot be less than 1"<<std::endl;
return -1;
}
pid_t pid;
for (size_t proc_idx = 0; proc_idx < num_proc_; ++proc_idx) {
pid = fork();
if (pid < 0) {
std::cout<<"Fork Failed"<<std::endl;
assert(false);
} else if (pid == 0) {
//Child Process, so break
break;
}
}
if (debug_bkpt) {
DebugInfo(pid);
}
return pid;
}
template <typename T>
bool MultiProcess<T>::CreateShmem() {
if (num_proc_ < 0) {
std::cout<<"Num Process cannot be less than 1"<<std::endl;
return false;
}
char name_template[] = "/tmp/eventXXXXX";
int temp_fd = mkstemp(name_template);
shmem_meta_obj_.shmem_name_ = name_template;
shmem_meta_obj_.shmem_name_.replace(0, 5, "/hip_");
shmem_meta_obj_.shmem_fd_ = shm_open(shmem_meta_obj_.shmem_name_.c_str(),
O_RDWR | O_CREAT, 0777);
if (ftruncate(shmem_meta_obj_.shmem_fd_, sizeof(ShmemMeta<T>)) != 0) {
std::cout<<"Cannot FTruncate "<<std::endl;
exit(0);
}
shmem_meta_obj_.shmem_ = (Shmem<T>*)mmap(0, sizeof(Shmem<T>), PROT_READ | PROT_WRITE,
MAP_SHARED, shmem_meta_obj_.shmem_fd_, 0);
memset(&shmem_meta_obj_.shmem_->handle_, 0x00, sizeof(T));
shmem_meta_obj_.shmem_->done_counter_ = -1;
return true;
}
template <typename T>
bool MultiProcess<T>::WriteHandleToShmem(T ipc_handle) {
memcpy(&shmem_meta_obj_.shmem_->handle_, &ipc_handle, sizeof(T));
shmem_meta_obj_.shmem_->done_counter_ = 0;
return true;
}
template <typename T>
bool MultiProcess<T>::WaitTillAllChildReads() {
size_t write_count = 0;
while (shmem_meta_obj_.shmem_->done_counter_ != num_proc_) {
++write_count;
}
return true;
}
template <typename T>
bool MultiProcess<T>::ReadHandleFromShmem(T& ipc_handle) {
size_t read_count = 0;
while (shmem_meta_obj_.shmem_->done_counter_ == -1) {
++read_count;
}
memcpy(&ipc_handle, &shmem_meta_obj_.shmem_->handle_, sizeof(T));
return true;
}
template <typename T>
bool MultiProcess<T>::NotifyParentDone() {
++shmem_meta_obj_.shmem_->done_counter_;
return true;
}
#endif /* __unix__ */
+126 -126
Dosyayı Görüntüle
@@ -1,126 +1,126 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t
* HIT_END
*/
#include "test_common.h"
#include "MultiProcess.h"
void multi_process(int num_process, bool debug_process) {
#ifdef __unix__
float *A_h, *B_h, *C_h;
float *A_d, *B_d, *C_d;
hipEvent_t start, stop;
size_t Nbytes = N * sizeof(float);
MultiProcess<hipIpcEventHandle_t>* mProcess = new MultiProcess<hipIpcEventHandle_t>(num_process);
mProcess->CreateShmem();
pid_t pid = mProcess->SpawnProcess(debug_process);
// Parent Process
if (pid != 0) {
unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
if (blocks > 1024) blocks = 1024;
if (blocks == 0) blocks = 1;
printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N,
((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations);
printf("iterations=%d\n", iterations);
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
// NULL stream check:
HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess));
HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess));
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
for (int i = 0; i < iterations; i++) {
//--- START TIMED REGION
long long hostStart = HipTest::get_time();
// Record the start event
HIPCHECK(hipEventRecord(start, NULL));
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
static_cast<const float*>(A_d), static_cast<const float*>(B_d), C_d, N);
HIPCHECK(hipEventRecord(stop, NULL));
HIPCHECK(hipEventSynchronize(stop));
HIPCHECK(hipEventQuery(stop));
long long hostStop = HipTest::get_time();
//--- STOP TIMED REGION
float eventMs = 1.0f;
// should fail
HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop));
float hostMs = HipTest::elapsed_time(hostStart, hostStop);
printf("host_time (gettimeofday) =%6.3fms\n", hostMs);
printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);
printf("\n");
}
hipIpcEventHandle_t ipc_handle;
HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start));
mProcess->WriteHandleToShmem(ipc_handle);
mProcess->WaitTillAllChildReads();
} else {
hipEvent_t ipc_event;
hipIpcEventHandle_t ipc_handle;
mProcess->ReadHandleFromShmem(ipc_handle);
HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle));
HIPCHECK(hipEventSynchronize(ipc_event));
HIPCHECK(hipEventDestroy(ipc_event));
mProcess->NotifyParentDone();
}
if (pid != 0) {
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
printf("check:\n");
HipTest::checkVectorADD(A_h, B_h, C_h, N, true);
HIPCHECK(hipEventDestroy(start));
HIPCHECK(hipEventDestroy(stop));
delete mProcess;
}
#endif /* __unix__ */
}
int main(int argc, char* argv[]) {
HipTest::parseStandardArguments(argc, argv, true);
multi_process((N < 64) ? N : 64, debug_test);
passed();
}
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t
* HIT_END
*/
#include "test_common.h"
#include "MultiProcess.h"
void multi_process(int num_process, bool debug_process) {
#ifdef __unix__
float *A_h, *B_h, *C_h;
float *A_d, *B_d, *C_d;
hipEvent_t start, stop;
size_t Nbytes = N * sizeof(float);
MultiProcess<hipIpcEventHandle_t>* mProcess = new MultiProcess<hipIpcEventHandle_t>(num_process);
mProcess->CreateShmem();
pid_t pid = mProcess->SpawnProcess(debug_process);
// Parent Process
if (pid != 0) {
unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
if (blocks > 1024) blocks = 1024;
if (blocks == 0) blocks = 1;
printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N,
((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations);
printf("iterations=%d\n", iterations);
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
// NULL stream check:
HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess));
HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess));
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
for (int i = 0; i < iterations; i++) {
//--- START TIMED REGION
long long hostStart = HipTest::get_time();
// Record the start event
HIPCHECK(hipEventRecord(start, NULL));
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
static_cast<const float*>(A_d), static_cast<const float*>(B_d), C_d, N);
HIPCHECK(hipEventRecord(stop, NULL));
HIPCHECK(hipEventSynchronize(stop));
HIPCHECK(hipEventQuery(stop));
long long hostStop = HipTest::get_time();
//--- STOP TIMED REGION
float eventMs = 1.0f;
// should fail
HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop));
float hostMs = HipTest::elapsed_time(hostStart, hostStop);
printf("host_time (gettimeofday) =%6.3fms\n", hostMs);
printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);
printf("\n");
}
hipIpcEventHandle_t ipc_handle;
HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start));
mProcess->WriteHandleToShmem(ipc_handle);
mProcess->WaitTillAllChildReads();
} else {
hipEvent_t ipc_event;
hipIpcEventHandle_t ipc_handle;
mProcess->ReadHandleFromShmem(ipc_handle);
HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle));
HIPCHECK(hipEventSynchronize(ipc_event));
HIPCHECK(hipEventDestroy(ipc_event));
mProcess->NotifyParentDone();
}
if (pid != 0) {
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
printf("check:\n");
HipTest::checkVectorADD(A_h, B_h, C_h, N, true);
HIPCHECK(hipEventDestroy(start));
HIPCHECK(hipEventDestroy(stop));
delete mProcess;
}
#endif /* __unix__ */
}
int main(int argc, char* argv[]) {
HipTest::parseStandardArguments(argc, argv, true);
multi_process((N < 64) ? N : 64, debug_test);
passed();
}
+103 -103
Dosyayı Görüntüle
@@ -1,103 +1,103 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp
* TEST: %t
* HIT_END
*/
#include "test_common.h"
#include "MultiProcess.h"
#define NUM_ELEMS 1024
#define OFFSET 128
void multi_process(int num_process, bool debug_process) {
#ifdef __unix__
int* ipc_dptr = nullptr;
int* ipc_hptr = nullptr;
int* ipc_out_dptr = nullptr;
int* ipc_out_hptr = nullptr;
int* ipc_offset_dptr = nullptr;
MultiProcess<hipIpcMemHandle_t>* mProcess = new MultiProcess<hipIpcMemHandle_t>(num_process);
mProcess->CreateShmem();
pid_t pid = mProcess->SpawnProcess(debug_process);
// Parent Process
if (pid != 0) {
hipIpcMemHandle_t ipc_handle;
memset(&ipc_handle, 0x00, sizeof(hipIpcMemHandle_t));
HIPCHECK(hipMalloc((void**)&ipc_dptr, NUM_ELEMS * sizeof(int)));
// Add offset to the dev_ptr
ipc_offset_dptr = ipc_dptr + OFFSET;
// Get handle for the offsetted device_ptr
HIPCHECK(hipIpcGetMemHandle(&ipc_handle, ipc_offset_dptr));
ipc_hptr = new int[NUM_ELEMS];
for (size_t idx = 0; idx < NUM_ELEMS; ++idx) {
ipc_hptr[idx] = idx;
}
HIPCHECK(hipMemset(ipc_dptr, 0x00, (NUM_ELEMS * sizeof(int))));
HIPCHECK(hipMemcpy(ipc_dptr, ipc_hptr, (NUM_ELEMS * sizeof(int)), hipMemcpyHostToDevice));
mProcess->WriteHandleToShmem(ipc_handle);
mProcess->WaitTillAllChildReads();
} else {
ipc_out_hptr = new int[NUM_ELEMS];
memset(ipc_out_hptr, 0x00, (NUM_ELEMS * sizeof(int)));
hipIpcMemHandle_t ipc_handle;
mProcess->ReadHandleFromShmem(ipc_handle);
// Open handle to get dev_ptr
HIPCHECK(hipIpcOpenMemHandle((void**)&ipc_out_dptr, ipc_handle, hipIpcMemLazyEnablePeerAccess));
HIPCHECK(hipMemcpy(ipc_out_hptr, ipc_out_dptr, (NUM_ELEMS * sizeof(int)),
hipMemcpyDeviceToHost));
for (size_t idx = 0; idx < NUM_ELEMS; ++idx) {
if (ipc_out_hptr[idx] != idx) {
std::cout<<"Failing @ idx: "<< idx << std::endl;
}
}
mProcess->NotifyParentDone();
HIPCHECK(hipIpcCloseMemHandle(ipc_out_dptr));
delete[] ipc_out_hptr;
}
if (pid != 0) {
delete mProcess;
}
#endif /* __unix__ */
}
int main(int argc, char* argv[]) {
HipTest::parseStandardArguments(argc, argv, true);
multi_process((N < 64) ? N : 64, debug_test);
passed();
}
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp
* TEST: %t
* HIT_END
*/
#include "test_common.h"
#include "MultiProcess.h"
#define NUM_ELEMS 1024
#define OFFSET 128
void multi_process(int num_process, bool debug_process) {
#ifdef __unix__
int* ipc_dptr = nullptr;
int* ipc_hptr = nullptr;
int* ipc_out_dptr = nullptr;
int* ipc_out_hptr = nullptr;
int* ipc_offset_dptr = nullptr;
MultiProcess<hipIpcMemHandle_t>* mProcess = new MultiProcess<hipIpcMemHandle_t>(num_process);
mProcess->CreateShmem();
pid_t pid = mProcess->SpawnProcess(debug_process);
// Parent Process
if (pid != 0) {
hipIpcMemHandle_t ipc_handle;
memset(&ipc_handle, 0x00, sizeof(hipIpcMemHandle_t));
HIPCHECK(hipMalloc((void**)&ipc_dptr, NUM_ELEMS * sizeof(int)));
// Add offset to the dev_ptr
ipc_offset_dptr = ipc_dptr + OFFSET;
// Get handle for the offsetted device_ptr
HIPCHECK(hipIpcGetMemHandle(&ipc_handle, ipc_offset_dptr));
ipc_hptr = new int[NUM_ELEMS];
for (size_t idx = 0; idx < NUM_ELEMS; ++idx) {
ipc_hptr[idx] = idx;
}
HIPCHECK(hipMemset(ipc_dptr, 0x00, (NUM_ELEMS * sizeof(int))));
HIPCHECK(hipMemcpy(ipc_dptr, ipc_hptr, (NUM_ELEMS * sizeof(int)), hipMemcpyHostToDevice));
mProcess->WriteHandleToShmem(ipc_handle);
mProcess->WaitTillAllChildReads();
} else {
ipc_out_hptr = new int[NUM_ELEMS];
memset(ipc_out_hptr, 0x00, (NUM_ELEMS * sizeof(int)));
hipIpcMemHandle_t ipc_handle;
mProcess->ReadHandleFromShmem(ipc_handle);
// Open handle to get dev_ptr
HIPCHECK(hipIpcOpenMemHandle((void**)&ipc_out_dptr, ipc_handle, hipIpcMemLazyEnablePeerAccess));
HIPCHECK(hipMemcpy(ipc_out_hptr, ipc_out_dptr, (NUM_ELEMS * sizeof(int)),
hipMemcpyDeviceToHost));
for (size_t idx = 0; idx < NUM_ELEMS; ++idx) {
if (ipc_out_hptr[idx] != idx) {
std::cout<<"Failing @ idx: "<< idx << std::endl;
}
}
mProcess->NotifyParentDone();
HIPCHECK(hipIpcCloseMemHandle(ipc_out_dptr));
delete[] ipc_out_hptr;
}
if (pid != 0) {
delete mProcess;
}
#endif /* __unix__ */
}
int main(int argc, char* argv[]) {
HipTest::parseStandardArguments(argc, argv, true);
multi_process((N < 64) ? N : 64, debug_test);
passed();
}
+49 -49
Dosyayı Görüntüle
@@ -1,49 +1,49 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../../test_common.cpp
* TEST: %t
* HIT_END
*/
#include <stdio.h>
#include "hip/hip_runtime.h"
#include "test_common.h"
int main(void) {
hipDeviceProp_t prop;
int dev;
hipGetDevice(&dev);
printf("ID of current HIP device: %d\n", dev);
memset(&prop, 0, sizeof(hipDeviceProp_t));
prop.major = 1;
prop.minor = 3;
hipChooseDevice(&dev, &prop);
printf("ID of hip device closest to revision 1.3: %d\n", dev);
hipSetDevice(dev);
passed();
}
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../../test_common.cpp
* TEST: %t
* HIT_END
*/
#include <stdio.h>
#include "hip/hip_runtime.h"
#include "test_common.h"
int main(void) {
hipDeviceProp_t prop;
int dev;
hipGetDevice(&dev);
printf("ID of current HIP device: %d\n", dev);
memset(&prop, 0, sizeof(hipDeviceProp_t));
prop.major = 1;
prop.minor = 3;
hipChooseDevice(&dev, &prop);
printf("ID of hip device closest to revision 1.3: %d\n", dev);
hipSetDevice(dev);
passed();
}
+161 -161
Dosyayı Görüntüle
@@ -1,161 +1,161 @@
/* HIT_START
* BUILD: %t %s ../test_common.cpp
* TEST: %t
* HIT_END
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <vector>
#include <hip/hip_runtime.h>
#include "test_common.h"
// Height Width Vector
std::vector<unsigned int> hw_vector = {2048, 1024, 512, 256, 64};
std::vector<unsigned int> mip_vector = {8, 4, 2, 1};
__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width,
int height, float level) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
outputData[y * width + x] = tex2DLod<float>(textureObject, x, y, level);
}
bool runMipMapTest(unsigned int width, unsigned int height, unsigned int mipmap_level) {
bool testResult = true;
printf("Width: %u Height: %u mip: %u \n", width, height, mipmap_level);
// Create new width & height to be tested
unsigned int orig_width = width;
unsigned int orig_height = height;
width /= pow(2, mipmap_level);
height /= pow(2, mipmap_level);
unsigned int size = width * height * sizeof(float);
float* hData = (float*)malloc(size);
memset(hData, 0, size);
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
hData[i * width + j] = i * width + j;
}
}
printf("hData: ");
for (int i = 0; i < 64; i++) {
printf("%f ", hData[i]);
if (i % width == 0) {
printf("\n");
}
}
printf("\n");
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat);
HIP_ARRAY3D_DESCRIPTOR mipmapped_array_desc;
memset(&mipmapped_array_desc, 0x00, sizeof(HIP_ARRAY3D_DESCRIPTOR));
mipmapped_array_desc.Width = orig_width;
mipmapped_array_desc.Height = orig_height;
mipmapped_array_desc.Depth = 0;
mipmapped_array_desc.Format = HIP_AD_FORMAT_FLOAT;
mipmapped_array_desc.NumChannels = ((channelDesc.x != 0) + (channelDesc.y != 0)
+ (channelDesc.z != 0) + (channelDesc.w != 0));
mipmapped_array_desc.Flags = 0;
hipMipmappedArray* mip_array_ptr;
hipMipmappedArrayCreate(&mip_array_ptr, &mipmapped_array_desc, 2 * mipmap_level);
hipArray *hipArray = nullptr;
HIPCHECK(hipMipmappedArrayGetLevel(&hipArray, mip_array_ptr, mipmap_level));
HIPCHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice));
hipResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = hipResourceTypeArray;
resDesc.res.array.array = hipArray;
// Specify texture object parameters
hipTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.addressMode[0] = hipAddressModeWrap;
texDesc.addressMode[1] = hipAddressModeWrap;
texDesc.filterMode = hipFilterModePoint;
texDesc.readMode = hipReadModeElementType;
texDesc.normalizedCoords = 0;
// Create texture object
hipTextureObject_t textureObject = 0;
hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL);
float* dData = NULL;
hipMalloc((void**)&dData, size);
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject,
width, height, (2 * mipmap_level));
hipDeviceSynchronize();
float* hOutputData = (float*)malloc(size);
memset(hOutputData, 0, size);
hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost);
printf("dData: ");
for (int i = 0; i < 64; i++) {
printf("%f ", hOutputData[i]);
if (i % width == 0) {
printf("\n");
}
}
printf("\n");
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (hData[i * width + j] != hOutputData[i * width + j]) {
printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j],
hOutputData[i * width + j]);
testResult = false;
break;
}
}
}
hipDestroyTextureObject(textureObject);
hipFree(dData);
hipFreeArray(hipArray);
return testResult;
}
bool runTest(int argc, char** argv) {
bool testResult = true;
for (auto& hw: hw_vector) {
for (auto& mip: mip_vector) {
if ((hw / static_cast<int>(pow (2,(mip * 2)))) > 0) {
testResult |= runMipMapTest(hw, hw, mip);
}
}
}
printf("\n");
return testResult;
}
int main(int argc, char** argv) {
bool testResult = true;
#ifdef _WIN32
testResult = runTest(argc, argv);
#else
std::cout<<"Mipmaps are Supported only on windows, skipping the test"<<std::endl;
#endif
if (testResult) {
passed();
} else {
exit(EXIT_FAILURE);
}
}
/* HIT_START
* BUILD: %t %s ../test_common.cpp
* TEST: %t
* HIT_END
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <vector>
#include <hip/hip_runtime.h>
#include "test_common.h"
// Height Width Vector
std::vector<unsigned int> hw_vector = {2048, 1024, 512, 256, 64};
std::vector<unsigned int> mip_vector = {8, 4, 2, 1};
__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width,
int height, float level) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
outputData[y * width + x] = tex2DLod<float>(textureObject, x, y, level);
}
bool runMipMapTest(unsigned int width, unsigned int height, unsigned int mipmap_level) {
bool testResult = true;
printf("Width: %u Height: %u mip: %u \n", width, height, mipmap_level);
// Create new width & height to be tested
unsigned int orig_width = width;
unsigned int orig_height = height;
width /= pow(2, mipmap_level);
height /= pow(2, mipmap_level);
unsigned int size = width * height * sizeof(float);
float* hData = (float*)malloc(size);
memset(hData, 0, size);
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
hData[i * width + j] = i * width + j;
}
}
printf("hData: ");
for (int i = 0; i < 64; i++) {
printf("%f ", hData[i]);
if (i % width == 0) {
printf("\n");
}
}
printf("\n");
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat);
HIP_ARRAY3D_DESCRIPTOR mipmapped_array_desc;
memset(&mipmapped_array_desc, 0x00, sizeof(HIP_ARRAY3D_DESCRIPTOR));
mipmapped_array_desc.Width = orig_width;
mipmapped_array_desc.Height = orig_height;
mipmapped_array_desc.Depth = 0;
mipmapped_array_desc.Format = HIP_AD_FORMAT_FLOAT;
mipmapped_array_desc.NumChannels = ((channelDesc.x != 0) + (channelDesc.y != 0)
+ (channelDesc.z != 0) + (channelDesc.w != 0));
mipmapped_array_desc.Flags = 0;
hipMipmappedArray* mip_array_ptr;
hipMipmappedArrayCreate(&mip_array_ptr, &mipmapped_array_desc, 2 * mipmap_level);
hipArray *hipArray = nullptr;
HIPCHECK(hipMipmappedArrayGetLevel(&hipArray, mip_array_ptr, mipmap_level));
HIPCHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice));
hipResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = hipResourceTypeArray;
resDesc.res.array.array = hipArray;
// Specify texture object parameters
hipTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.addressMode[0] = hipAddressModeWrap;
texDesc.addressMode[1] = hipAddressModeWrap;
texDesc.filterMode = hipFilterModePoint;
texDesc.readMode = hipReadModeElementType;
texDesc.normalizedCoords = 0;
// Create texture object
hipTextureObject_t textureObject = 0;
hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL);
float* dData = NULL;
hipMalloc((void**)&dData, size);
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject,
width, height, (2 * mipmap_level));
hipDeviceSynchronize();
float* hOutputData = (float*)malloc(size);
memset(hOutputData, 0, size);
hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost);
printf("dData: ");
for (int i = 0; i < 64; i++) {
printf("%f ", hOutputData[i]);
if (i % width == 0) {
printf("\n");
}
}
printf("\n");
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (hData[i * width + j] != hOutputData[i * width + j]) {
printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j],
hOutputData[i * width + j]);
testResult = false;
break;
}
}
}
hipDestroyTextureObject(textureObject);
hipFree(dData);
hipFreeArray(hipArray);
return testResult;
}
bool runTest(int argc, char** argv) {
bool testResult = true;
for (auto& hw: hw_vector) {
for (auto& mip: mip_vector) {
if ((hw / static_cast<int>(pow (2,(mip * 2)))) > 0) {
testResult |= runMipMapTest(hw, hw, mip);
}
}
}
printf("\n");
return testResult;
}
int main(int argc, char** argv) {
bool testResult = true;
#ifdef _WIN32
testResult = runTest(argc, argv);
#else
std::cout<<"Mipmaps are Supported only on windows, skipping the test"<<std::endl;
#endif
if (testResult) {
passed();
} else {
exit(EXIT_FAILURE);
}
}