SWDEV-286322 - clean up trailing space (#2361)

Change-Id: I03c07e67a8d1fa1a874718ffba43eb396c2aa05c
This commit is contained in:
Julia Jiang
2021-09-24 06:57:51 -04:00
zatwierdzone przez GitHub
rodzic abe851ad75
commit 44581b4d3c
14 zmienionych plików z 85 dodań i 85 usunięć
@@ -38,9 +38,9 @@ THE SOFTWARE.
__global__ void EmptyKernel() { }
void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &results, int batch = 1) {
float total_us = 0.0f, mean_us = 0.0f, stddev_us = 0.0f;
// skip warm-up runs
auto start_iter = std::next(results.begin(), WARMUP_RUN_COUNT);
auto end_iter = results.end();
@@ -48,7 +48,7 @@ void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &re
// mean
std::for_each(start_iter, end_iter, [&](const float &run_ms) {
total_us += (run_ms * 1000) / batch;
});
});
mean_us = total_us / TIMING_RUN_COUNT;
// stddev
@@ -63,18 +63,18 @@ void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &re
printf("\n %s: %.1f us, std: %.1f us\n", test.c_str(), mean_us, stddev_us);
}
int main() {
int main() {
hipStream_t stream0 = 0;
hipDevice_t device;
hipDeviceGet(&device, 0);
hipCtx_t context;
hipCtxCreate(&context, 0, device);
hipCtx_t context;
hipCtxCreate(&context, 0, device);
hipModule_t module;
hipFunction_t function;
hipModuleLoad(&module, FILE_NAME);
hipModuleGetFunction(&function, module, KERNEL_NAME);
void* params = nullptr;
std::array<float, TOTAL_RUN_COUNT> results;
hipEvent_t start, stop;
hipEventCreate(&start);
@@ -83,7 +83,7 @@ int main() {
/************************************************************************************/
/* HIP kernel launch enqueue rate: */
/* Measure time taken to enqueue a kernel on the GPU */
/************************************************************************************/
/************************************************************************************/
// Timing hipModuleLaunchKernel
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
@@ -104,8 +104,8 @@ int main() {
print_timing("hipLaunchKernelGGL enqueue rate", results);
/***********************************************************************************/
/* Single dispatch execution latency using HIP events: */
/* Measures latency to start & finish executing a kernel with GPU-scope visibility */
/* Single dispatch execution latency using HIP events: */
/* Measures latency to start & finish executing a kernel with GPU-scope visibility */
/***********************************************************************************/
//Timing around the dispatch
@@ -120,7 +120,7 @@ int main() {
/*********************************************************************************/
/* Batch dispatch execution latency using HIP events: */
/* Measures latency to start & finish executing each dispatch in a batch */
/* Measures latency to start & finish executing each dispatch in a batch */
/*********************************************************************************/
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
@@ -29,23 +29,23 @@ THE SOFTWARE.
// Device (Kernel) function
__global__ void multiply(float* C, float* A, float* B, int N){
int tx = blockDim.x*blockIdx.x+threadIdx.x;
if (tx < N){
C[tx] = A[tx] * B[tx];
}
}
// CPU implementation
void multiplyCPU(float* C, float* A, float* B, int N){
for(unsigned int i=0; i<N; i++){
C[i] = A[i] * B[i];
for(unsigned int i=0; i<N; i++){
C[i] = A[i] * B[i];
}
}
void launchKernel(float* C, float* A, float* B, bool manual){
hipDeviceProp_t devProp;
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
@@ -59,9 +59,9 @@ void launchKernel(float* C, float* A, float* B, bool manual){
int mingridSize = 0;
int gridSize = 0;
int blockSize = 0;
if (manual){
blockSize = threadsperblock;
blockSize = threadsperblock;
gridSize = blocks;
std::cout << std::endl << "Manual Configuration with block size " << blockSize << std::endl;
}
@@ -69,15 +69,15 @@ void launchKernel(float* C, float* A, float* B, bool manual){
HIP_CHECK(hipOccupancyMaxPotentialBlockSize(&mingridSize, &blockSize, multiply, 0, 0));
std::cout << std::endl << "Automatic Configuation based on hipOccupancyMaxPotentialBlockSize " << std::endl;
std::cout << "Suggested blocksize is " << blockSize << ", Minimum gridsize is " << mingridSize << std::endl;
gridSize = (NUM/blockSize)+1;
gridSize = (NUM/blockSize)+1;
}
// Record the start event
HIP_CHECK(hipEventRecord(start, NULL));
HIP_CHECK(hipEventRecord(start, NULL));
// Launching the Kernel from Host
hipLaunchKernelGGL(multiply, dim3(gridSize), dim3(blockSize), 0, 0, C, A, B, NUM);
// Record the stop event
HIP_CHECK(hipEventRecord(stop, NULL));
HIP_CHECK(hipEventSynchronize(stop));
@@ -88,7 +88,7 @@ void launchKernel(float* C, float* A, float* B, bool manual){
//Calculate Occupancy
int numBlock = 0;
HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0));
if(devProp.maxThreadsPerMultiProcessor){
std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl;
}
@@ -106,26 +106,26 @@ int main() {
C0 = (float *)malloc(NUM * sizeof(float));
C1 = (float *)malloc(NUM * sizeof(float));
cpuC = (float *)malloc(NUM * sizeof(float));
for(i=0; i< NUM; i++){
A[i] = i;
B[i] = i;
}
// allocate the memory on the device side
// allocate the memory on the device side
HIP_CHECK(hipMalloc((void**)&Ad, NUM * sizeof(float)));
HIP_CHECK(hipMalloc((void**)&Bd, NUM * sizeof(float)));
HIP_CHECK(hipMalloc((void**)&C0d, NUM * sizeof(float)));
HIP_CHECK(hipMalloc((void**)&C1d, NUM * sizeof(float)));
// Memory transfer from host to device
HIP_CHECK(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice));
//Kernel launch with manual/default block size
launchKernel(C0d, Ad, Bd, 1);
//Kernel launch with the block size suggested by hipOccupancyMaxPotentialBlockSize
//Kernel launch with the block size suggested by hipOccupancyMaxPotentialBlockSize
launchKernel(C1d, Ad, Bd, 0);
// Memory transfer from device to host
@@ -137,26 +137,26 @@ int main() {
//verify the results
double eps = 1.0E-6;
for (i = 0; i < NUM; i++) {
if (std::abs(C0[i] - cpuC[i]) > eps) {
errors++;
}
}
if (errors != 0){
printf("\nManual Test FAILED: %d errors\n", errors);
errors=0;
} else {
printf("\nManual Test PASSED!\n");
}
for (i = 0; i < NUM; i++) {
if (std::abs(C1[i] - cpuC[i]) > eps) {
errors++;
}
}
if (errors != 0){
printf("\n Automatic Test FAILED: %d errors\n", errors);
} else {
@@ -36,11 +36,11 @@ int main(){
e = hipMemcpyFromSymbol(S, HIP_SYMBOL(Sd), SIZE, 0, hipMemcpyDeviceToHost);
HIPASSERT(e==hipErrorInvalidSymbol);
e = hipMemcpyFromSymbol(S, NULL, SIZE, 0, hipMemcpyDeviceToHost);
HIPASSERT(e==hipErrorInvalidSymbol);
HIPCHECK(hipFree(Sd));
HIPCHECK(hipFree(Sd));
passed();
}
@@ -39,11 +39,11 @@ int main(){
e = hipMemcpyFromSymbolAsync(S, HIP_SYMBOL(Sd), SIZE, 0, hipMemcpyDeviceToHost, stream);
HIPASSERT(e==hipErrorInvalidSymbol);
e = hipMemcpyFromSymbolAsync(S, NULL, SIZE, 0, hipMemcpyDeviceToHost, stream);
HIPASSERT(e==hipErrorInvalidSymbol);
HIPCHECK(hipFree(Sd));
HIPCHECK(hipFree(Sd));
passed();
}
@@ -36,11 +36,11 @@ int main(){
e = hipMemcpyToSymbol(HIP_SYMBOL(Sd), S, SIZE, 0, hipMemcpyHostToDevice);
HIPASSERT(e==hipErrorInvalidSymbol);
e = hipMemcpyToSymbol(NULL, S, SIZE, 0, hipMemcpyHostToDevice);
HIPASSERT(e==hipErrorInvalidSymbol);
HIPCHECK(hipFree(Sd));
HIPCHECK(hipFree(Sd));
passed();
}
@@ -31,7 +31,7 @@ int main(){
void *Sd;
hipError_t e;
char S[SIZE]="This is not a device symbol";
HIPCHECK(hipMalloc(&Sd,SIZE));
hipStream_t stream;
@@ -39,11 +39,11 @@ int main(){
e = hipMemcpyToSymbolAsync(HIP_SYMBOL(Sd), S, SIZE, 0, hipMemcpyHostToDevice, stream);
HIPASSERT(e==hipErrorInvalidSymbol);
e = hipMemcpyToSymbolAsync(NULL, S, SIZE, 0, hipMemcpyHostToDevice, stream);
HIPASSERT(e==hipErrorInvalidSymbol);
HIPCHECK(hipFree(Sd));
HIPCHECK(hipFree(Sd));
passed();
}
+2 -2
Wyświetl plik
@@ -1,4 +1,4 @@
/*
/*
Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -32,7 +32,7 @@ int main(){
e = hipMemcpy(0, str, SIZE, hipMemcpyHostToDevice);
HIPASSERT(e==hipErrorInvalidValue);
e = hipMemcpy(NULL, str, SIZE, hipMemcpyHostToDevice);
HIPASSERT(e==hipErrorInvalidValue);
+8 -8
Wyświetl plik
@@ -45,7 +45,7 @@ void HipClassTests::TestForOverride(void){
0,
0,
result_ecd);
HipClassTests::VerifyResult(result_ech,result_ecd);
HipClassTests::FreeMem(result_ech,result_ecd);
}
@@ -70,13 +70,13 @@ void HipClassTests::TestForOverload(void){
0,
0,
result_ecd);
HipClassTests::VerifyResult(result_ech,result_ecd);
HipClassTests::FreeMem(result_ech,result_ecd);
}
#endif
#ifdef ENABLE_FRIEND_TEST
#ifdef ENABLE_FRIEND_TEST
// check for friend
__global__ void
friendClassKernel(bool* result_ecd){
@@ -106,7 +106,7 @@ void HipClassTests::TestForEmptyClass(void){
0,
0,
result_ecd);
HipClassTests::VerifyResult(result_ech,result_ecd);
HipClassTests::FreeMem(result_ech,result_ecd);
}
@@ -157,7 +157,7 @@ void HipClassTests::TestForClassSize(void){
0,
0,
result_ecd);
HipClassTests::VerifyResult(result_ech,result_ecd);
HipClassTests::FreeMem(result_ech,result_ecd);
}
@@ -217,7 +217,7 @@ void HipClassTests::TestForPassByValue(void){
HipClassTests::VerifyResult(result_ech,result_ecd);
HipClassTests::FreeMem(result_ech,result_ecd);
}
// check obj created with hipMalloc
__global__ void
mallocObjKernel(testPassByValue *obj, bool* result_ecd) {
@@ -292,7 +292,7 @@ bool* HipClassTests::AllocateHostMemory(void){
}
bool* HipClassTests::AllocateDeviceMemory(void){
bool* result_ecd;
bool* result_ecd;
HIPCHECK(hipMalloc(&result_ecd,
NBOOL));
HIPCHECK(hipMemset(result_ecd,
@@ -351,5 +351,5 @@ int main(){
#ifdef ENABLE_DESTRUCTOR_TEST
classTests.TestForConsrtDesrt();
test_passed(TestForConsrtDesrt);
#endif
#endif
}
+6 -6
Wyświetl plik
@@ -55,17 +55,17 @@ __host__ __device__ void testOperations(float &fa, float &fb) {
hip_bfloat16 bf_a(fa);
hip_bfloat16 bf_b(fb);
float fc = float(bf_a);
float fd = float(bf_b);
float fd = float(bf_b);
assert(testRelativeAccuracy(fa, bf_a));
assert(testRelativeAccuracy(fb, bf_b));
assert(testRelativeAccuracy(fc + fd, bf_a + bf_b));
//when checked as above for add, operation sub fails on GPU
//when checked as above for add, operation sub fails on GPU
assert(hip_bfloat16(fc - fd) == (bf_a - bf_b));
assert(testRelativeAccuracy(fc * fd, bf_a * bf_b));
assert(testRelativeAccuracy(fc / fd, bf_a / bf_b));
hip_bfloat16 bf_opNegate = -bf_a;
assert(bf_opNegate == -bf_a);
@@ -75,7 +75,7 @@ __host__ __device__ void testOperations(float &fa, float &fb) {
bf_x--;
++bf_x;
--bf_x;
//hip_bfloat16 is converted to float and then inc/decremented, hence check with reduced precision
//hip_bfloat16 is converted to float and then inc/decremented, hence check with reduced precision
assert(testRelativeAccuracy(bf_x,bf_a));
bf_x = bf_a;
@@ -95,7 +95,7 @@ __host__ __device__ void testOperations(float &fa, float &fb) {
if (isnan(bf_rounded)) {
assert(isnan(bf_rounded) || isinf(bf_rounded));
}
}
}
__global__ void testOperationsGPU(float* d_a, float* d_b)
{
@@ -126,7 +126,7 @@ int main(){
hipLaunchKernelGGL(testOperationsGPU, 1, SIZE, 0, 0, d_fa, d_fb);
hipDeviceSynchronize();
cout<<"Device bfloat16 Operations Successful!!"<<endl;
cout<<"Device bfloat16 Operations Successful!!"<<endl;
delete[] h_fa;
delete[] h_fb;
+14 -14
Wyświetl plik
@@ -56,21 +56,21 @@ __global__ void kernel_lgamma_double(double *input, double *output) {
void check_lgamma_double() {
using datatype_t = double;
const int NUM_INPUTS = 8;
auto memsize = NUM_INPUTS * sizeof(datatype_t);
// allocate memories
datatype_t *inputCPU = (datatype_t *) malloc(memsize);
datatype_t *outputCPU = (datatype_t *) malloc(memsize);
datatype_t *inputGPU = nullptr; hipMalloc((void**)&inputGPU, memsize);
datatype_t *outputGPU = nullptr; hipMalloc((void**)&outputGPU, memsize);
// populate input
for (int i=0; i<NUM_INPUTS; i++) {
inputCPU[i] = -3.5 + i;
}
// copy inputs to device
hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice);
@@ -84,13 +84,13 @@ void check_lgamma_double() {
for (int i=0; i<NUM_INPUTS; i++) {
CHECK_LGAMMA_DOUBLE(inputCPU[i], outputCPU[i], lgamma(inputCPU[i]));
}
// free memories
hipFree(inputGPU);
hipFree(outputGPU);
free(inputCPU);
free(outputCPU);
// done
return;
}
@@ -102,15 +102,15 @@ void check_abs_int64() {
const int NUM_INPUTS = 8;
auto memsize = NUM_INPUTS * sizeof(datatype_t);
// allocate memories
datatype_t *inputCPU = (datatype_t *) malloc(memsize);
datatype_t *outputCPU = (datatype_t *) malloc(memsize);
datatype_t *inputGPU = nullptr; hipMalloc((void**)&inputGPU, memsize);
datatype_t *outputGPU = nullptr; hipMalloc((void**)&outputGPU, memsize);
// populate input
inputCPU[0] = -81985529216486895ll;
inputCPU[0] = -81985529216486895ll;
inputCPU[1] = 81985529216486895ll;
inputCPU[2] = -1250999896491ll;
inputCPU[3] = 1250999896491ll;
@@ -118,7 +118,7 @@ void check_abs_int64() {
inputCPU[5] = 19088743ll;
inputCPU[6] = -291ll;
inputCPU[7] = 291ll;
// copy inputs to device
hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice);
@@ -137,17 +137,17 @@ void check_abs_int64() {
CHECK_ABS_INT64(inputCPU[5], outputCPU[5], outputCPU[5]);
CHECK_ABS_INT64(inputCPU[6], outputCPU[6], outputCPU[7]);
CHECK_ABS_INT64(inputCPU[7], outputCPU[7], outputCPU[7]);
// free memories
hipFree(inputGPU);
hipFree(outputGPU);
free(inputCPU);
free(outputCPU);
// done
return;
}
template<class T, class F>
__global__ void kernel_simple(F f, T *out) {
@@ -191,7 +191,7 @@ int main(int argc, char* argv[]) {
check_abs_int64();
// check_lgamma_double();
test_fp16();
test_pown();
+4 -4
Wyświetl plik
@@ -82,7 +82,7 @@ __device__ __host__ complex<FloatT> calc(complex<FloatT> A,
return A * B;
case CK_div:
return A / B;
ONE_ARG(abs)
ONE_ARG(arg)
ONE_ARG(sin)
@@ -111,7 +111,7 @@ void test() {
hipMalloc((void**)&Ad, sizeof(ComplexT)*LEN);
hipMalloc((void**)&Bd, sizeof(ComplexT)*LEN);
hipMalloc((void**)&Cd, sizeof(ComplexT)*LEN);
for (uint32_t i = 0; i < LEN; i++) {
A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f);
B[i] = A[i];
@@ -119,7 +119,7 @@ void test() {
}
hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice);
hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice);
// Run kernel for a calculation kind and verify by comparing with host
// calculation result. Returns false if fails.
auto test_fun = [&](enum CalcKind CK) {
@@ -145,7 +145,7 @@ void test() {
}
return true;
};
#define OP(x) assert(test_fun(CK_##x));
ALL_FUN
#undef OP
+1 -1
Wyświetl plik
@@ -84,7 +84,7 @@ void kernel_hisnan(__half* input, int* output) {
}
__global__
void kernel_hisinf(__half* input, int* output) {
void kernel_hisinf(__half* input, int* output) {
int tx = threadIdx.x;
output[tx] = __hisinf(input[tx]);
}
+1 -1
Wyświetl plik
@@ -41,7 +41,7 @@ THE SOFTWARE.
private:
int a;
};
static __global__ void kernel(int* Ad) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
new(Ad+tid) A();
+2 -2
Wyświetl plik
@@ -41,7 +41,7 @@ int readHipEnvVar(string flags, char* buff){
std::cout << "\nFinding hipEnvVar in " << directed_dir << "...\n";
FILE* directed_in = popen((directed_dir + flags).c_str(), "r");
if(fgets(buff, 512, directed_in) == NULL){
std::cout << "Finding hipEnvVar in " << dir << "...\n";
FILE* in = popen((dir + flags).c_str(), "r");
@@ -74,7 +74,7 @@ int getDeviceNumber(bool print_err=true) {
}
// Query the current device ID remotely to hipEnvVar
void getDevicePCIBusNumRemote(int deviceID, char* pciBusID) {
void getDevicePCIBusNumRemote(int deviceID, char* pciBusID) {
std::this_thread::sleep_for(std::chrono::milliseconds(10));
if (readHipEnvVar((" -d " + std::to_string(deviceID)), pciBusID)){
std::cerr << "The system cannot find hipEnvVar\n";