Merge branch 'rocm-rel-1.5'
Change-Id: Ib2318f9c0d01a1bc8be2fcb172a3075e82851877
[ROCm/hip-tests commit: fa64db5171]
This commit is contained in:
@@ -11,10 +11,6 @@ HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
ifeq (${HIP_PLATFORM}, nvcc)
|
||||
HIPCC_FLAGS = -gencode=arch=compute_20,code=sm_20
|
||||
endif
|
||||
ifeq (${HIP_PLATFORM}, hcc)
|
||||
HIPCC_FLAGS = -stdlib=libc++
|
||||
endif
|
||||
|
||||
|
||||
EXE=bit_extract
|
||||
|
||||
@@ -24,4 +20,3 @@ $(EXE): bit_extract.cpp
|
||||
|
||||
clean:
|
||||
rm -f *.o $(EXE)
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ THE SOFTWARE.
|
||||
}\
|
||||
}
|
||||
|
||||
void __global__
|
||||
__global__ void
|
||||
bit_extract_kernel(hipLaunchParm lp, uint32_t *C_d, const uint32_t *A_d, size_t N)
|
||||
{
|
||||
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
@@ -45,7 +45,7 @@ bit_extract_kernel(hipLaunchParm lp, uint32_t *C_d, const uint32_t *A_d, size_t
|
||||
|
||||
for (size_t i=offset; i<N; i+=stride) {
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
C_d[i] = hc::__bitextract_u32(A_d[i], 8, 4);
|
||||
C_d[i] = hc::__bitextract_u32(A_d[i], 8, 4);
|
||||
#else /* defined __HIP_PLATFORM_NVCC__ or other path */
|
||||
C_d[i] = ((A_d[i] & 0xf00) >> 8);
|
||||
#endif
|
||||
@@ -73,7 +73,7 @@ int main(int argc, char *argv[])
|
||||
C_h = (uint32_t*)malloc(Nbytes);
|
||||
CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
|
||||
for (size_t i=0; i<N; i++)
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
A_h[i] = i;
|
||||
}
|
||||
|
||||
@@ -5,14 +5,19 @@ endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
|
||||
|
||||
all: vcpy_kernel.code runKernel.hip.out
|
||||
all: vcpy_kernel.code runKernel.hip.out defaultDriver.hip.out
|
||||
|
||||
runKernel.hip.out: runKernel.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
|
||||
launchKernelHcc.hip.out: launchKernelHcc.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
|
||||
defaultDriver.hip.out: defaultDriver.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
|
||||
vcpy_kernel.code: vcpy_kernel.cpp
|
||||
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
|
||||
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
|
||||
|
||||
clean:
|
||||
rm -f *.code *.out
|
||||
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include<iostream>
|
||||
#include<fstream>
|
||||
#include<vector>
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN<<2
|
||||
|
||||
#define fileName "test.co"
|
||||
#define kernel_name "vadd"
|
||||
|
||||
int main(){
|
||||
float *A, *B, *C;
|
||||
hipDeviceptr_t Ad, Bd, Cd;
|
||||
A = new float[LEN];
|
||||
B = new float[LEN];
|
||||
C = new float[LEN];
|
||||
|
||||
for(uint32_t i=0;i<LEN;i++){
|
||||
A[i] = i*1.0f;
|
||||
B[i] = 1.0f;
|
||||
C[i] = 0.0f;
|
||||
}
|
||||
|
||||
hipInit(0);
|
||||
hipDevice_t device;
|
||||
hipCtx_t context;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipCtxCreate(&context, 0, device);
|
||||
|
||||
hipMalloc((void**)&Ad, SIZE);
|
||||
hipMalloc((void**)&Bd, SIZE);
|
||||
hipMalloc((void**)&Cd, SIZE);
|
||||
|
||||
hipMemcpyHtoD(Ad, A, SIZE);
|
||||
hipMemcpyHtoD(Bd, B, SIZE);
|
||||
hipMemcpyHtoD(Cd, C, SIZE);
|
||||
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
hipModuleLoad(&Module, fileName);
|
||||
hipModuleGetFunction(&Function, Module, kernel_name);
|
||||
|
||||
int n = LEN;
|
||||
void * args[4] = {&Ad, &Bd, &Cd, &n};
|
||||
|
||||
hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, args, nullptr);
|
||||
|
||||
hipMemcpyDtoH(C, Cd, SIZE);
|
||||
int mismatchCount = 0;
|
||||
for(uint32_t i=0;i<LEN;i++){
|
||||
if (A[i] + B[i] != C[i]) {
|
||||
mismatchCount++;
|
||||
std::cout<<"error: mismatch " << A[i]<<" + "<<B[i]<<" != "<<C[i]<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatchCount == 0) {
|
||||
std::cout << "PASSED!\n";
|
||||
} else {
|
||||
std::cout << "FAILED!\n";
|
||||
};
|
||||
|
||||
hipCtxDestroy(context);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#include <hip/hip_hcc.h>
|
||||
#endif
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN<<2
|
||||
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
#define HIP_CHECK(status) \
|
||||
if(status != hipSuccess) {std::cout<<"Got Status: "<<status<<" at Line: "<<__LINE__<<std::endl;exit(0);}
|
||||
|
||||
int main(){
|
||||
float *A, *B;
|
||||
hipDeviceptr_t Ad, Bd;
|
||||
A = new float[LEN];
|
||||
B = new float[LEN];
|
||||
|
||||
for(uint32_t i=0;i<LEN;i++){
|
||||
A[i] = i*1.0f;
|
||||
B[i] = 0.0f;
|
||||
}
|
||||
|
||||
hipInit(0);
|
||||
hipDevice_t device;
|
||||
hipCtx_t context;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipCtxCreate(&context, 0, device);
|
||||
|
||||
hipMalloc((void**)&Ad, SIZE);
|
||||
hipMalloc((void**)&Bd, SIZE);
|
||||
|
||||
hipMemcpyHtoD(Ad, A, SIZE);
|
||||
hipMemcpyHtoD(Bd, B, SIZE);
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
|
||||
uint32_t len = LEN;
|
||||
uint32_t one = 1;
|
||||
|
||||
struct {
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
|
||||
args._Ad = Ad;
|
||||
args._Bd = Bd;
|
||||
|
||||
|
||||
|
||||
size_t size = sizeof(args);
|
||||
|
||||
void *config[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
|
||||
HIP_CHECK(hipHccModuleLaunchKernel(Function, LEN, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
|
||||
hipMemcpyDtoH(B, Bd, SIZE);
|
||||
|
||||
int mismatchCount = 0;
|
||||
for(uint32_t i=0;i<LEN;i++){
|
||||
if (A[i] != B[i]) {
|
||||
mismatchCount++;
|
||||
std::cout<<"error: mismatch " << A[i]<<" != "<<B[i]<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatchCount == 0) {
|
||||
std::cout << "PASSED!\n";
|
||||
} else {
|
||||
std::cout << "FAILED!\n";
|
||||
};
|
||||
|
||||
hipCtxDestroy(context);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -22,9 +22,10 @@ THE SOFTWARE.
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include<iostream>
|
||||
#include<fstream>
|
||||
#include<vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <hip/hip_hcc.h>
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN<<2
|
||||
@@ -32,6 +33,9 @@ THE SOFTWARE.
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
#define HIP_CHECK(status) \
|
||||
if(status != hipSuccess) {std::cout<<"Got Status: "<<status<<" at Line: "<<__LINE__<<std::endl;exit(0);}
|
||||
|
||||
int main(){
|
||||
float *A, *B;
|
||||
hipDeviceptr_t Ad, Bd;
|
||||
@@ -43,10 +47,10 @@ int main(){
|
||||
B[i] = 0.0f;
|
||||
}
|
||||
|
||||
hipInit(0);
|
||||
hipDevice_t device;
|
||||
hipCtx_t context;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipInit(0);
|
||||
hipDevice_t device;
|
||||
hipCtx_t context;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipCtxCreate(&context, 0, device);
|
||||
|
||||
hipMalloc((void**)&Ad, SIZE);
|
||||
@@ -56,22 +60,18 @@ int main(){
|
||||
hipMemcpyHtoD(Bd, B, SIZE);
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
hipModuleLoad(&Module, fileName);
|
||||
hipModuleGetFunction(&Function, Module, kernel_name);
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
uint32_t len = LEN;
|
||||
uint32_t one = 1;
|
||||
|
||||
struct {
|
||||
uint32_t _hidden[6];
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
|
||||
for (int i=0; i<6; i++) {
|
||||
args._hidden[i] = 0;
|
||||
}
|
||||
args._Ad = Ad;
|
||||
args._Bd = Bd;
|
||||
|
||||
@@ -98,9 +98,10 @@ int main(){
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
|
||||
hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config);
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
|
||||
hipMemcpyDtoH(B, Bd, SIZE);
|
||||
|
||||
int mismatchCount = 0;
|
||||
for(uint32_t i=0;i<LEN;i++){
|
||||
if (A[i] != B[i]) {
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
__kernel void memset(char in, __global int* out) {
|
||||
int tx = get_global_id(0);
|
||||
out[tx] = in;
|
||||
}
|
||||
|
||||
|
||||
__kernel void vadd(__global float *Ad, __global float *Bd, __global float *Cd, int N){
|
||||
int tx = get_global_id(0);
|
||||
if(tx < N){
|
||||
Cd[tx] = Ad[tx] + Bd[tx];
|
||||
}
|
||||
}
|
||||
Binair bestand niet weergegeven.
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -27,4 +27,3 @@ extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b)
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = a[tx];
|
||||
}
|
||||
|
||||
|
||||
@@ -15,5 +15,6 @@ square.hip.out: square.hipref.cpp
|
||||
|
||||
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.o *.out
|
||||
|
||||
@@ -32,7 +32,7 @@ THE SOFTWARE.
|
||||
}\
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* Square each element in the array A and write to array C.
|
||||
*/
|
||||
template <typename T>
|
||||
@@ -58,16 +58,18 @@ int main(int argc, char *argv[])
|
||||
hipDeviceProp_t props;
|
||||
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
printf ("info: running on device %s\n", props.name);
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
printf ("info: architecture on AMD GPU device is: %d\n",props.gcnArch);
|
||||
#endif
|
||||
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
A_h = (float*)malloc(Nbytes);
|
||||
CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
C_h = (float*)malloc(Nbytes);
|
||||
CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
|
||||
// Fill with Phi + i
|
||||
for (size_t i=0; i<N; i++)
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
A_h[i] = 1.618f + i;
|
||||
A_h[i] = 1.618f + i;
|
||||
}
|
||||
|
||||
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
@@ -81,7 +83,7 @@ int main(int argc, char *argv[])
|
||||
const unsigned threadsPerBlock = 256;
|
||||
|
||||
printf ("info: launch 'vector_square' kernel\n");
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(vector_square), dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
|
||||
hipLaunchKernel(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -6,9 +6,12 @@
|
||||
|
||||
#include "ResultDatabase.h"
|
||||
|
||||
enum MallocMode {MallocPinned, MallocUnpinned, MallocRegistered};
|
||||
|
||||
// Cmdline parms:
|
||||
bool p_verbose = false;
|
||||
bool p_pinned = true;
|
||||
MallocMode p_malloc_mode = MallocPinned;
|
||||
int p_numa_ctl = -1;
|
||||
int p_iterations = 10;
|
||||
int p_beatsperiteration=1;
|
||||
int p_device = 0;
|
||||
@@ -21,7 +24,7 @@ bool p_h2d = true;
|
||||
bool p_d2h = true;
|
||||
bool p_bidir = true;
|
||||
|
||||
|
||||
//#define NO_CHECK
|
||||
|
||||
|
||||
#define CHECK_HIP_ERROR() \
|
||||
@@ -36,6 +39,14 @@ bool p_bidir = true;
|
||||
}
|
||||
|
||||
|
||||
std::string mallocModeString(int mallocMode) {
|
||||
switch (mallocMode) {
|
||||
case MallocPinned : return "pinned";
|
||||
case MallocUnpinned: return "unpinned";
|
||||
case MallocRegistered: return "registered";
|
||||
default: return "mallocmode-UNKNOWN";
|
||||
};
|
||||
};
|
||||
|
||||
// ****************************************************************************
|
||||
int sizeToBytes(int size) {
|
||||
@@ -106,7 +117,7 @@ void RunBenchmark_H2D(ResultDatabase &resultDB)
|
||||
|
||||
// Create some host memory pattern
|
||||
float *hostMem = NULL;
|
||||
if (p_pinned)
|
||||
if (p_malloc_mode == MallocPinned)
|
||||
{
|
||||
hipHostMalloc((void**)&hostMem, sizeof(float) * numMaxFloats);
|
||||
while (hipGetLastError() != hipSuccess)
|
||||
@@ -116,20 +127,33 @@ void RunBenchmark_H2D(ResultDatabase &resultDB)
|
||||
--nSizes;
|
||||
if (nSizes < 1)
|
||||
{
|
||||
std::cerr << "Error: Couldn't allocated any pinned buffer\n";
|
||||
std::cerr << "Error: Couldn't allocate any pinned buffer\n";
|
||||
return;
|
||||
}
|
||||
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
hipHostMalloc((void**)&hostMem, sizeof(float) * numMaxFloats);
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (p_malloc_mode == MallocUnpinned)
|
||||
{
|
||||
if (p_alignedhost) {
|
||||
hostMem = (float*)aligned_alloc(p_alignedhost, numMaxFloats*sizeof(float));
|
||||
} else {
|
||||
hostMem = new float[numMaxFloats];
|
||||
}
|
||||
}
|
||||
else if (p_malloc_mode == MallocRegistered)
|
||||
{
|
||||
if (p_numa_ctl == -1) {
|
||||
hostMem = (float*)malloc(numMaxFloats*sizeof(float));
|
||||
}
|
||||
|
||||
hipHostRegister(hostMem, numMaxFloats * sizeof(float), 0);
|
||||
CHECK_HIP_ERROR();
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
for (int i = 0; i < numMaxFloats; i++)
|
||||
@@ -146,7 +170,7 @@ void RunBenchmark_H2D(ResultDatabase &resultDB)
|
||||
--nSizes;
|
||||
if (nSizes < 1)
|
||||
{
|
||||
std::cerr << "Error: Couldn't allocated any device buffer\n";
|
||||
std::cerr << "Error: Couldn't allocate any device buffer\n";
|
||||
return;
|
||||
}
|
||||
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
@@ -199,8 +223,8 @@ void RunBenchmark_H2D(ResultDatabase &resultDB)
|
||||
} else {
|
||||
sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str());
|
||||
}
|
||||
resultDB.AddResult(std::string("H2D_Bandwidth") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "GB/sec", speed);
|
||||
resultDB.AddResult(std::string("H2D_Time") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "ms", t);
|
||||
resultDB.AddResult(std::string("H2D_Bandwidth") + "_" + mallocModeString(p_malloc_mode), sizeStr, "GB/sec", speed);
|
||||
resultDB.AddResult(std::string("H2D_Time") + mallocModeString(p_malloc_mode), sizeStr, "ms", t);
|
||||
|
||||
if (p_onesize) {
|
||||
break;
|
||||
@@ -212,6 +236,8 @@ void RunBenchmark_H2D(ResultDatabase &resultDB)
|
||||
numMaxFloats = sizeToBytes(p_onesize) / sizeof(float);
|
||||
}
|
||||
|
||||
#ifndef NO_CHECK
|
||||
|
||||
// Check. First reset the host memory, then copy-back result. Then compare against original ref value.
|
||||
for (int i = 0; i < numMaxFloats; i++)
|
||||
{
|
||||
@@ -225,24 +251,36 @@ void RunBenchmark_H2D(ResultDatabase &resultDB)
|
||||
printf ("error: H2D. i=%d reference:%6.f != copyback:%6.2f\n", i, ref, hostMem[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// Cleanup
|
||||
hipFree((void*)device);
|
||||
CHECK_HIP_ERROR();
|
||||
if (p_pinned)
|
||||
{
|
||||
switch (p_malloc_mode) {
|
||||
case MallocPinned:
|
||||
hipHostFree((void*)hostMem);
|
||||
CHECK_HIP_ERROR();
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
|
||||
case MallocUnpinned:
|
||||
if (p_alignedhost) {
|
||||
delete[] hostMem;
|
||||
} else {
|
||||
free(hostMem);
|
||||
}
|
||||
break;
|
||||
|
||||
case MallocRegistered:
|
||||
hipHostUnregister(hostMem);
|
||||
CHECK_HIP_ERROR();
|
||||
free(hostMem);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
}
|
||||
@@ -257,38 +295,56 @@ void RunBenchmark_D2H(ResultDatabase &resultDB)
|
||||
// Create some host memory pattern
|
||||
float *hostMem1;
|
||||
float *hostMem2;
|
||||
if (p_pinned)
|
||||
if (p_malloc_mode == MallocPinned)
|
||||
{
|
||||
hipHostMalloc((void**)&hostMem1, sizeof(float)*numMaxFloats);
|
||||
hipError_t err1 = hipGetLastError();
|
||||
hipHostMalloc((void**)&hostMem2, sizeof(float)*numMaxFloats);
|
||||
hipError_t err2 = hipGetLastError();
|
||||
while (err1 != hipSuccess || err2 != hipSuccess)
|
||||
{
|
||||
// free the first buffer if only the second failed
|
||||
if (err1 == hipSuccess)
|
||||
hipHostFree((void*)hostMem1);
|
||||
while (err1 != hipSuccess || err2 != hipSuccess)
|
||||
{
|
||||
// free the first buffer if only the second failed
|
||||
if (err1 == hipSuccess)
|
||||
hipHostFree((void*)hostMem1);
|
||||
|
||||
// drop the size and try again
|
||||
if (p_verbose) std::cout << " - dropping size allocating pinned mem\n";
|
||||
--nSizes;
|
||||
if (nSizes < 1)
|
||||
{
|
||||
std::cerr << "Error: Couldn't allocated any pinned buffer\n";
|
||||
return;
|
||||
}
|
||||
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
hipHostMalloc((void**)&hostMem1, sizeof(float)*numMaxFloats);
|
||||
err1 = hipGetLastError();
|
||||
hipHostMalloc((void**)&hostMem2, sizeof(float)*numMaxFloats);
|
||||
err2 = hipGetLastError();
|
||||
}
|
||||
}
|
||||
else
|
||||
// drop the size and try again
|
||||
if (p_verbose) std::cout << " - dropping size allocating pinned mem\n";
|
||||
--nSizes;
|
||||
if (nSizes < 1)
|
||||
{
|
||||
std::cerr << "Error: Couldn't allocate any pinned buffer\n";
|
||||
return;
|
||||
}
|
||||
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
hipHostMalloc((void**)&hostMem1, sizeof(float)*numMaxFloats);
|
||||
err1 = hipGetLastError();
|
||||
hipHostMalloc((void**)&hostMem2, sizeof(float)*numMaxFloats);
|
||||
err2 = hipGetLastError();
|
||||
}
|
||||
}
|
||||
else if (p_malloc_mode == MallocUnpinned)
|
||||
{
|
||||
hostMem1 = new float[numMaxFloats];
|
||||
hostMem2 = new float[numMaxFloats];
|
||||
}
|
||||
else if (p_malloc_mode == MallocRegistered)
|
||||
{
|
||||
if (p_numa_ctl == -1) {
|
||||
hostMem1 = (float*)malloc(numMaxFloats*sizeof(float));
|
||||
hostMem2 = (float*)malloc(numMaxFloats*sizeof(float));
|
||||
}
|
||||
|
||||
hipHostRegister(hostMem1, numMaxFloats * sizeof(float), 0);
|
||||
CHECK_HIP_ERROR();
|
||||
hipHostRegister(hostMem2, numMaxFloats * sizeof(float), 0);
|
||||
CHECK_HIP_ERROR();
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
for (int i=0; i<numMaxFloats; i++)
|
||||
hostMem1[i] = i % 77;
|
||||
|
||||
@@ -301,7 +357,7 @@ void RunBenchmark_D2H(ResultDatabase &resultDB)
|
||||
--nSizes;
|
||||
if (nSizes < 1)
|
||||
{
|
||||
std::cerr << "Error: Couldn't allocated any device buffer\n";
|
||||
std::cerr << "Error: Couldn't allocate any device buffer\n";
|
||||
return;
|
||||
}
|
||||
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
@@ -358,8 +414,8 @@ void RunBenchmark_D2H(ResultDatabase &resultDB)
|
||||
} else {
|
||||
sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str());
|
||||
}
|
||||
resultDB.AddResult(std::string("D2H_Bandwidth") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "GB/sec", speed);
|
||||
resultDB.AddResult(std::string("D2H_Time") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "ms", t);
|
||||
resultDB.AddResult(std::string("D2H_Bandwidth") +"_" + mallocModeString(p_malloc_mode) , sizeStr, "GB/sec", speed);
|
||||
resultDB.AddResult(std::string("D2H_Time") +"_" + mallocModeString(p_malloc_mode) , sizeStr, "ms", t);
|
||||
if (p_onesize) {
|
||||
break;
|
||||
}
|
||||
@@ -381,20 +437,31 @@ void RunBenchmark_D2H(ResultDatabase &resultDB)
|
||||
// Cleanup
|
||||
hipFree((void*)device);
|
||||
CHECK_HIP_ERROR();
|
||||
if (p_pinned)
|
||||
{
|
||||
|
||||
switch (p_malloc_mode) {
|
||||
case MallocPinned:
|
||||
hipHostFree((void*)hostMem1);
|
||||
CHECK_HIP_ERROR();
|
||||
hipHostFree((void*)hostMem2);
|
||||
CHECK_HIP_ERROR();
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
case MallocUnpinned:
|
||||
delete[] hostMem1;
|
||||
delete[] hostMem2;
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
break;
|
||||
case MallocRegistered:
|
||||
hipHostUnregister(hostMem1);
|
||||
CHECK_HIP_ERROR();
|
||||
free(hostMem1);
|
||||
hipHostUnregister(hostMem2);
|
||||
free(hostMem2);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
}
|
||||
|
||||
|
||||
@@ -409,7 +476,7 @@ void RunBenchmark_Bidir(ResultDatabase &resultDB)
|
||||
|
||||
// Create some host memory pattern
|
||||
float *hostMem[2] = {NULL, NULL};
|
||||
if (p_pinned)
|
||||
if (p_malloc_mode == MallocPinned)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
@@ -424,18 +491,34 @@ void RunBenchmark_Bidir(ResultDatabase &resultDB)
|
||||
--nSizes;
|
||||
if (nSizes < 1)
|
||||
{
|
||||
std::cerr << "Error: Couldn't allocated any pinned buffer\n";
|
||||
std::cerr << "Error: Couldn't allocate any pinned buffer\n";
|
||||
return;
|
||||
}
|
||||
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (p_malloc_mode == MallocUnpinned)
|
||||
{
|
||||
hostMem[0] = new float[numMaxFloats];
|
||||
hostMem[1] = new float[numMaxFloats];
|
||||
}
|
||||
else if (p_malloc_mode == MallocRegistered)
|
||||
{
|
||||
if (p_numa_ctl == -1) {
|
||||
hostMem[0] = (float*)malloc(numMaxFloats*sizeof(float));
|
||||
hostMem[1] = (float*)malloc(numMaxFloats*sizeof(float));
|
||||
}
|
||||
hipHostRegister(hostMem[0], numMaxFloats * sizeof(float), 0);
|
||||
CHECK_HIP_ERROR();
|
||||
hipHostRegister(hostMem[1], numMaxFloats * sizeof(float), 0);
|
||||
CHECK_HIP_ERROR();
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
for (int i = 0; i < numMaxFloats; i++)
|
||||
{
|
||||
@@ -459,7 +542,7 @@ void RunBenchmark_Bidir(ResultDatabase &resultDB)
|
||||
--nSizes;
|
||||
if (nSizes < 1)
|
||||
{
|
||||
std::cerr << "Error: Couldn't allocated any device buffer\n";
|
||||
std::cerr << "Error: Couldn't allocate any device buffer\n";
|
||||
return;
|
||||
}
|
||||
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
@@ -512,8 +595,8 @@ void RunBenchmark_Bidir(ResultDatabase &resultDB)
|
||||
double speed = (double(sizeToBytes(thisSize)) / (1000*1000)) / t;
|
||||
char sizeStr[256];
|
||||
sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str());
|
||||
resultDB.AddResult(std::string("Bidir_Bandwidth") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "GB/sec", speed);
|
||||
resultDB.AddResult(std::string("Bidir_Time") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "ms", t);
|
||||
resultDB.AddResult(std::string("Bidir_Bandwidth") + "_" + mallocModeString(p_malloc_mode), sizeStr, "GB/sec", speed);
|
||||
resultDB.AddResult(std::string("Bidir_Time") + "_" + mallocModeString(p_malloc_mode), sizeStr, "ms", t);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -521,17 +604,27 @@ void RunBenchmark_Bidir(ResultDatabase &resultDB)
|
||||
hipFree((void*)deviceMem[0]);
|
||||
hipFree((void*)deviceMem[1]);
|
||||
CHECK_HIP_ERROR();
|
||||
if (p_pinned)
|
||||
{
|
||||
switch (p_malloc_mode) {
|
||||
case MallocPinned:
|
||||
hipHostFree((void*)hostMem[0]);
|
||||
hipHostFree((void*)hostMem[1]);
|
||||
CHECK_HIP_ERROR();
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
case MallocUnpinned:
|
||||
delete[] hostMem[0];
|
||||
delete[] hostMem[1];
|
||||
}
|
||||
break;
|
||||
case MallocRegistered:
|
||||
for (int i=0; i<2; i++) {
|
||||
hipHostUnregister(hostMem[i]);
|
||||
CHECK_HIP_ERROR();
|
||||
free(hostMem[i]);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
};
|
||||
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipStreamDestroy(stream[0]);
|
||||
@@ -557,7 +650,7 @@ void printConfig() {
|
||||
hipDeviceProp_t props;
|
||||
hipGetDeviceProperties(&props, p_device);
|
||||
|
||||
printf ("Device:%s Mem=%.1fGB #CUs=%d Freq=%.0fMhz Pinned=%s\n", props.name, props.totalGlobalMem/1024.0/1024.0/1024.0, props.multiProcessorCount, props.clockRate/1000.0, p_pinned ? "YES" : "NO");
|
||||
printf ("Device:%s Mem=%.1fGB #CUs=%d Freq=%.0fMhz MallocMode=%s\n", props.name, props.totalGlobalMem/1024.0/1024.0/1024.0, props.multiProcessorCount, props.clockRate/1000.0, mallocModeString(p_malloc_mode).c_str());
|
||||
}
|
||||
|
||||
void help() {
|
||||
@@ -601,7 +694,9 @@ int parseStandardArguments(int argc, char *argv[])
|
||||
failed("Bad onesize argument");
|
||||
}
|
||||
} else if (!strcmp(arg, "--unpinned")) {
|
||||
p_pinned = 0;
|
||||
p_malloc_mode = MallocUnpinned;
|
||||
} else if (!strcmp(arg, "--registered")) {
|
||||
p_malloc_mode = MallocRegistered;
|
||||
} else if (!strcmp(arg, "--h2d")) {
|
||||
p_h2d = true;
|
||||
p_d2h = false;
|
||||
|
||||
@@ -10,9 +10,6 @@ OPT=-O3
|
||||
CXXFLAGS = $(OPT) --std=c++11
|
||||
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
|
||||
ifeq (${HIP_PLATFORM}, hcc)
|
||||
CXXFLAGS += " -stdlib=libc++"
|
||||
endif
|
||||
|
||||
CODE_OBJECTS=nullkernel.hsaco
|
||||
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
_ Add AQL kernel.
|
||||
_ Fix &*kernel command so the kernel name/type is an argument not a new command.
|
||||
|
||||
_ Add command to parse only.
|
||||
_ Add regression to parse all the hcm files.
|
||||
|
||||
_ Partition HCC, HIP, HSA, OpenCL commands into separate files.
|
||||
|
||||
|
||||
_ Show time for back-to-back copies.
|
||||
_ Add variables.
|
||||
%loopcnt
|
||||
|
||||
./hipCommander %loopcnt=4
|
||||
|
||||
_ Add datasize command.
|
||||
|
||||
|
||||
_ Add ( ) to parsing.
|
||||
_ Add argument parsing and checking.
|
||||
|
||||
_ Add verbose option to print each step of setup.
|
||||
- print deliniater between setup and run. Add run start message.
|
||||
|
||||
- print sizes of all buffers.
|
||||
- print each command before running.
|
||||
- show start/stop of timer routine.
|
||||
|
||||
_
|
||||
_ Clear documentation on what each oepration does.
|
||||
_ Add time instrumentation for each command.
|
||||
_ Add pcie atomic.
|
||||
|
||||
|
||||
_ Add tests for negative cases, ie endloop w/o opening loop.
|
||||
|
||||
|
||||
README tips
|
||||
---
|
||||
- HIP_API_TRACE combined with -v is useful to track the exact commands generates by hipCommander.
|
||||
|
||||
|
||||
Other ideas:
|
||||
---
|
||||
[ ] Perf guide : stream creation very slow on HCC and should be avoided.
|
||||
|
||||
|
||||
Scratch:
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
#include <elf.h>
|
||||
#include <hsa/hsa.h>
|
||||
#include <hc.hpp>
|
||||
#include <hip/hcc_detail/hcc_acc.h>
|
||||
#endif
|
||||
|
||||
#include <sys/time.h>
|
||||
@@ -23,7 +22,7 @@ bool g_printedTiming = false;
|
||||
|
||||
// Cmdline parms:
|
||||
int p_device = 0;
|
||||
const char* p_command = "H2D; NullKernel; D2H";
|
||||
const char* p_command = "setstream(1); H2D; NullKernel; D2H;";
|
||||
const char* p_file = nullptr;
|
||||
unsigned p_verbose = 0x0;
|
||||
unsigned p_db = 0x0;
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
|
||||
loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
|
||||
loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
|
||||
loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
|
||||
loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
|
||||
loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
|
||||
loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
|
||||
loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
|
||||
loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1);
|
||||
loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1);
|
||||
loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); streamsync; streamsync; endloop(1);
|
||||
loop(10); streamsync; streamsync; endloop(1);
|
||||
loop(100); streamsync; streamsync; endloop(1);
|
||||
loop(100); streamsync; streamsync; endloop(1);
|
||||
loop(1000); streamsync; streamsync; endloop(1);
|
||||
loop(1000); streamsync; streamsync; endloop(1);
|
||||
loop(10000); streamsync; streamsync; endloop(1);
|
||||
loop(10000); streamsync; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10); D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H;streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; D2H; streamsync; endloop(1);
|
||||
loop(10); D2H; D2H; streamsync; endloop(1);
|
||||
loop(100); D2H; D2H; streamsync; endloop(1);
|
||||
loop(100); D2H; D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H; D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H; D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H; D2H; streamsync; endloop(1);
|
||||
loop(10000); D2H; D2H; streamsync; endloop(1);
|
||||
loop(10000); D2H; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10); H2D; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D;streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
|
||||
loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1);
|
||||
loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1);
|
||||
loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1);
|
||||
loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D ; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; H2D; streamsync; endloop(1);
|
||||
loop(10); H2D; H2D; streamsync; endloop(1);
|
||||
loop(100); H2D; H2D; streamsync; endloop(1);
|
||||
loop(100); H2D; H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; H2D; streamsync; endloop(1);
|
||||
loop(10000); H2D; H2D; streamsync; endloop(1);
|
||||
loop(10000); H2D; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10); NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
|
||||
loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
|
||||
loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
|
||||
loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
|
||||
loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
|
||||
loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
|
||||
loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
|
||||
loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
|
||||
loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
|
||||
loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
|
||||
loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
|
||||
loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
|
||||
loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
|
||||
loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
|
||||
loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
|
||||
loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
|
||||
loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
|
||||
loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
|
||||
loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
|
||||
loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
|
||||
loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
|
||||
loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
|
||||
loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
|
||||
loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
|
||||
loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10);streamsync; streamsync; streamsync; endloop(1);
|
||||
loop(10);streamsync; streamsync; streamsync; endloop(1);
|
||||
loop(100);streamsync; streamsync; streamsync; endloop(1);
|
||||
loop(100);streamsync; streamsync; streamsync; endloop(1);
|
||||
loop(1000);streamsync; streamsync; streamsync; endloop(1);
|
||||
loop(1000);streamsync; streamsync; streamsync; endloop(1);
|
||||
loop(10000);streamsync; streamsync; streamsync; endloop(1);
|
||||
loop(10000);streamsync; streamsync; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H;streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; D2H; D2H; streamsync; endloop(1);
|
||||
loop(10); D2H; D2H; D2H; streamsync; endloop(1);
|
||||
loop(100); D2H; D2H; D2H; streamsync; endloop(1);
|
||||
loop(100); D2H; D2H; D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H; D2H; D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H; D2H; D2H; streamsync; endloop(1);
|
||||
loop(10000); D2H; D2H; D2H; streamsync; endloop(1);
|
||||
loop(10000); D2H; D2H; D2H;streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync;H2D;streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10); H2D; streamsync;H2D;streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync;H2D; streamsync;H2D; streamsync; endloop(1);
|
||||
loop(100); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D;streamsync; H2D;streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1);
|
||||
loop(10000); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1);
|
||||
loop(10000); H2D;streamsync; H2D;streamsync; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; H2D; H2D; streamsync; endloop(1);
|
||||
loop(10); H2D; H2D; H2D; streamsync; endloop(1);
|
||||
loop(100); H2D; H2D; H2D; streamsync; endloop(1);
|
||||
loop(100); H2D; H2D; H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; H2D; H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; H2D; H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; H2D; H2D; streamsync; endloop(1);
|
||||
loop(10000); H2D; H2D; H2D; streamsync; endloop(1);
|
||||
loop(10000); H2D; H2D; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
|
||||
loop(100); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; NullKernel; NullKernel; NullKernel;NullKernel; streamsync; endloop(1);
|
||||
loop(10); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
|
||||
loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; NullKernel; NullKernel; NullKernel;NullKernel;NullKernel; streamsync; endloop(1);
|
||||
loop(10); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
|
||||
loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;NullKernel;streamsync; endloop(1);
|
||||
loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
|
||||
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
|
||||
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
|
||||
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; streamsync; endloop(1);
|
||||
loop(10); D2H; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; H2D; streamsync; endloop(1);
|
||||
loop(10); D2H; H2D; streamsync; endloop(1);
|
||||
loop(100); D2H; H2D; streamsync; endloop(1);
|
||||
loop(100); D2H; H2D; streamsync; endloop(1);
|
||||
loop(1000); D2H; H2D; streamsync; endloop(1);
|
||||
loop(1000); D2H; H2D; streamsync; endloop(1);
|
||||
loop(10000); D2H; H2D; streamsync; endloop(1);
|
||||
loop(10000); D2H; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(10); D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(100); D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(100); D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); D2H; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); D2H; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10); D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); D2H; streamsync;NullKernel; streamsync; endloop(1);
|
||||
loop(10); D2H; streamsync;NullKernel; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync;NullKernel; streamsync; endloop(1);
|
||||
loop(100); D2H; streamsync;NullKernel; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync;NullKernel; streamsync; endloop(1);
|
||||
loop(1000); D2H; streamsync;NullKernel; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync;NullKernel; streamsync; endloop(1);
|
||||
loop(10000); D2H; streamsync;NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync; endloop(1);
|
||||
loop(10); H2D; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,2 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; D2H; streamsync; endloop(1);
|
||||
loop(10); H2D; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10);H2D; streamsync; NullKernel; D2H; streamsync;endloop(1);
|
||||
loop(10); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10);H2D; NullKernel; D2H; streamsync;endloop(1);
|
||||
loop(10); H2D; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; NullKernel; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D ; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10); H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10); H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10);H2D; streamsync; NullKernel;streamsync; D2H; streamsync;endloop(1);
|
||||
loop(10); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,10 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; streamsync; endloop(1);
|
||||
loop(10); NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; streamsync; streamsync; endloop(1);
|
||||
loop(10); NullKernel; streamsync; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(10); NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(100); NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(100); NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; D2H; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(10); NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(100); NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(100); NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; H2D; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10); NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; D2H; streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10); NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(100); NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(1000); NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
loop(10000); NullKernel; streamsync; H2D; streamsync; endloop(1);
|
||||
@@ -0,0 +1,2 @@
|
||||
setstream(1);
|
||||
loop(10);setstream(1);setstream(2);setstream(3);setstream(4);setstream(5);streamsync; endloop(1);
|
||||
@@ -0,0 +1,9 @@
|
||||
setstream(1);
|
||||
loop(10); streamsync; endloop(1);
|
||||
loop(10); streamsync; endloop(1);
|
||||
loop(100); streamsync; endloop(1);
|
||||
loop(100); streamsync; endloop(1);
|
||||
loop(1000); streamsync; endloop(1);
|
||||
loop(1000); streamsync; endloop(1);
|
||||
loop(10000); streamsync; endloop(1);
|
||||
loop(10000); streamsync; endloop(1);
|
||||
@@ -63,6 +63,14 @@ double bytesToGB(size_t s)
|
||||
return (double)s / (1024.0*1024.0*1024.0);
|
||||
}
|
||||
|
||||
#define printLimit(w1, limit, units) \
|
||||
{\
|
||||
size_t val;\
|
||||
cudaDeviceGetLimit(&val, limit);\
|
||||
std::cout << setw(w1) << #limit": " << val << " " << units << std::endl;\
|
||||
}
|
||||
|
||||
|
||||
void printDeviceProp (int deviceId)
|
||||
{
|
||||
using namespace std;
|
||||
@@ -144,6 +152,17 @@ void printDeviceProp (int deviceId)
|
||||
cout << endl;
|
||||
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
// Limits:
|
||||
cout << endl;
|
||||
printLimit(w1, cudaLimitStackSize, "bytes/thread");
|
||||
printLimit(w1, cudaLimitPrintfFifoSize, "bytes/device");
|
||||
printLimit(w1, cudaLimitMallocHeapSize, "bytes/device");
|
||||
printLimit(w1, cudaLimitDevRuntimeSyncDepth, "grids");
|
||||
printLimit(w1, cudaLimitDevRuntimePendingLaunchCount, "launches");
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
cout << endl;
|
||||
|
||||
@@ -3,6 +3,10 @@ ifeq (,$(HIP_PATH))
|
||||
HIP_PATH=../../..
|
||||
endif
|
||||
|
||||
ifeq (gfx701, $(findstring gfx701,$(HCC_AMDGPU_TARGET)))
|
||||
$(error gfx701 is not a supported device for this sample)
|
||||
endif
|
||||
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
|
||||
TARGET=hcc
|
||||
@@ -22,7 +26,7 @@ CXX=$(HIPCC)
|
||||
|
||||
|
||||
$(EXECUTABLE): $(OBJECTS)
|
||||
$(HIPCC) $(OBJECTS) -o $@
|
||||
$(HIPCC) $(OBJECTS) -o $@
|
||||
|
||||
|
||||
test: $(EXECUTABLE)
|
||||
@@ -33,4 +37,3 @@ clean:
|
||||
rm -f $(EXECUTABLE)
|
||||
rm -f $(OBJECTS)
|
||||
rm -f $(HIP_PATH)/src/*.o
|
||||
|
||||
|
||||
@@ -3,6 +3,10 @@ ifeq (,$(HIP_PATH))
|
||||
HIP_PATH=../../..
|
||||
endif
|
||||
|
||||
ifeq (gfx701, $(findstring gfx701,$(HCC_AMDGPU_TARGET)))
|
||||
$(error gfx701 is not a supported device for this sample)
|
||||
endif
|
||||
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
|
||||
TARGET=hcc
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
HIP_PATH?= $(wildcard /opt/rocm/hip)
|
||||
ifeq (,$(HIP_PATH))
|
||||
HIP_PATH=../../..
|
||||
endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
|
||||
HIPCC_FLAGS += -std=c++11
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
|
||||
ifeq (${HIP_PLATFORM}, nvcc)
|
||||
LIBS = -lcublas
|
||||
endif
|
||||
ifeq (${HIP_PLATFORM}, hcc)
|
||||
HCBLAS_ROOT?= $(wildcard /opt/rocm/hcblas)
|
||||
HIPCC_FLAGS += -stdlib=libc++ -I$(HCBLAS_ROOT)/include
|
||||
LIBS = -L$(HCBLAS_ROOT)/lib -lhipblas -rpath $(HIP_PATH)/lib
|
||||
endif
|
||||
|
||||
|
||||
all: saxpy.hipblas.out
|
||||
|
||||
saxpy.cublas.out : saxpy.cublas.cpp
|
||||
nvcc -std=c++11 -I$(CUDA_HOME)/include saxpy.cublas.cpp -o $@ -L$(CUDA_HOME)/lib64 -lcublas
|
||||
|
||||
# $HIPBLAS_ROOT/bin/hipifyblas ./saxpy.cublas.cpp > ./saxpy.hipblas.cpp
|
||||
# Then review & finish port in saxpy.hipblas.cpp
|
||||
|
||||
saxpy.hipblasref.o: saxpy.hipblasref.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) -c $< -o $@
|
||||
|
||||
saxpy.hipblas.out: saxpy.hipblasref.o
|
||||
$(HIPCC) $< -o $@ $(LIBS)
|
||||
|
||||
clean:
|
||||
rm -f *.o *.out
|
||||
@@ -1,94 +0,0 @@
|
||||
|
||||
#include <random>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
// header file for the GPU API
|
||||
#include <cuda_runtime.h>
|
||||
#include <cublas_v2.h>
|
||||
|
||||
#define N (1024 * 500)
|
||||
|
||||
#define CHECK(cmd) \
|
||||
{\
|
||||
cudaError_t error = cmd; \
|
||||
if (error != cudaSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", cudaGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
}
|
||||
|
||||
#define CHECK_BLAS(cmd) \
|
||||
{\
|
||||
cublasStatus_t error = cmd;\
|
||||
if (error != CUBLAS_STATUS_SUCCESS) { \
|
||||
fprintf(stderr, "error: (%d) at %s:%d\n", error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
||||
const float a = 100.0f;
|
||||
float x[N];
|
||||
float y[N], y_cpu_res[N], y_gpu_res[N];
|
||||
|
||||
// initialize the input data
|
||||
std::default_random_engine random_gen;
|
||||
std::uniform_real_distribution<float> distribution(-N, N);
|
||||
std::generate_n(x, N, [&]() { return distribution(random_gen); });
|
||||
std::generate_n(y, N, [&]() { return distribution(random_gen); });
|
||||
std::copy_n(y, N, y_cpu_res);
|
||||
|
||||
// Explicit GPU code:
|
||||
|
||||
size_t Nbytes = N*sizeof(float);
|
||||
float *x_gpu, *y_gpu;
|
||||
|
||||
cublasHandle_t handle;
|
||||
|
||||
cudaDeviceProp props;
|
||||
CHECK(cudaGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
printf ("info: running on device %s\n", props.name);
|
||||
|
||||
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
CHECK(cudaMalloc(&x_gpu, Nbytes));
|
||||
CHECK(cudaMalloc(&y_gpu, Nbytes));
|
||||
|
||||
// Initialize the blas library
|
||||
CHECK_BLAS ( cublasCreate(&handle));
|
||||
|
||||
// copy n elements from a vector in host memory space to a vector in GPU memory space
|
||||
printf ("info: copy Host2Device\n");
|
||||
CHECK_BLAS ( cublasSetVector(N, sizeof(*x), x, 1, x_gpu, 1));
|
||||
CHECK_BLAS ( cublasSetVector(N, sizeof(*y), y, 1, y_gpu, 1));
|
||||
|
||||
printf ("info: launch 'saxpy' kernel\n");
|
||||
CHECK_BLAS ( cublasSaxpy(handle, N, &a, x_gpu, 1, y_gpu, 1));
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
CHECK_BLAS ( cublasGetVector(N, sizeof(*y_gpu_res), y_gpu, 1, y_gpu_res, 1));
|
||||
|
||||
// CPU implementation of saxpy
|
||||
for (int i = 0; i < N; i++) {
|
||||
y_cpu_res[i] = a * x[i] + y[i];
|
||||
}
|
||||
|
||||
// verify the results
|
||||
int errors = 0;
|
||||
for (int i = 0; i < N; i++) {
|
||||
if (fabs(y_cpu_res[i] - y_gpu_res[i]) > fabs(y_cpu_res[i] * 0.0001f))
|
||||
errors++;
|
||||
}
|
||||
std::cout << errors << " errors" << std::endl;
|
||||
|
||||
CHECK( cudaFree(x_gpu));
|
||||
CHECK( cudaFree(y_gpu));
|
||||
CHECK_BLAS( cublasDestroy(handle));
|
||||
|
||||
return errors;
|
||||
}
|
||||
@@ -1,94 +0,0 @@
|
||||
|
||||
#include <random>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
// header file for the GPU API
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hipblas.h>
|
||||
|
||||
#define N (1024 * 500)
|
||||
|
||||
#define CHECK(cmd) \
|
||||
{\
|
||||
hipError_t error = cmd; \
|
||||
if (error != hipSuccess) { \
|
||||
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
}
|
||||
|
||||
#define CHECK_BLAS(cmd) \
|
||||
{\
|
||||
hipblasStatus_t error = cmd;\
|
||||
if (error != HIPBLAS_STATUS_SUCCESS) { \
|
||||
fprintf(stderr, "error: (%d) at %s:%d\n", error,__FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE);\
|
||||
}\
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
||||
const float a = 100.0f;
|
||||
float x[N];
|
||||
float y[N], y_cpu_res[N], y_gpu_res[N];
|
||||
|
||||
// initialize the input data
|
||||
std::default_random_engine random_gen;
|
||||
std::uniform_real_distribution<float> distribution(-N, N);
|
||||
std::generate_n(x, N, [&]() { return distribution(random_gen); });
|
||||
std::generate_n(y, N, [&]() { return distribution(random_gen); });
|
||||
std::copy_n(y, N, y_cpu_res);
|
||||
|
||||
// Explicit GPU code:
|
||||
|
||||
size_t Nbytes = N*sizeof(float);
|
||||
float *x_gpu, *y_gpu;
|
||||
|
||||
hipblasHandle_t handle;
|
||||
|
||||
hipDeviceProp_t props;
|
||||
CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
|
||||
printf ("info: running on device %s\n", props.name);
|
||||
|
||||
printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
CHECK(hipMalloc(&x_gpu, Nbytes));
|
||||
CHECK(hipMalloc(&y_gpu, Nbytes));
|
||||
|
||||
// Initialize the blas library
|
||||
CHECK_BLAS ( hipblasCreate(&handle));
|
||||
|
||||
// copy n elements from a vector in host memory space to a vector in GPU memory space
|
||||
printf ("info: copy Host2Device\n");
|
||||
CHECK_BLAS ( hipblasSetVector(N, sizeof(*x), x, 1, x_gpu, 1));
|
||||
CHECK_BLAS ( hipblasSetVector(N, sizeof(*y), y, 1, y_gpu, 1));
|
||||
|
||||
printf ("info: launch 'saxpy' kernel\n");
|
||||
CHECK_BLAS ( hipblasSaxpy(handle, N, &a, x_gpu, 1, y_gpu, 1));
|
||||
|
||||
hipDeviceSynchronize();
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
CHECK_BLAS ( hipblasGetVector(N, sizeof(*y_gpu_res), y_gpu, 1, y_gpu_res, 1));
|
||||
|
||||
// CPU implementation of saxpy
|
||||
for (int i = 0; i < N; i++) {
|
||||
y_cpu_res[i] = a * x[i] + y[i];
|
||||
}
|
||||
|
||||
// verify the results
|
||||
int errors = 0;
|
||||
for (int i = 0; i < N; i++) {
|
||||
if (fabs(y_cpu_res[i] - y_gpu_res[i]) > fabs(y_cpu_res[i] * 0.0001f))
|
||||
errors++;
|
||||
}
|
||||
std::cout << errors << " errors" << std::endl;
|
||||
|
||||
CHECK( hipFree(x_gpu));
|
||||
CHECK( hipFree(y_gpu));
|
||||
CHECK_BLAS( hipblasDestroy(handle));
|
||||
|
||||
return errors;
|
||||
}
|
||||
Verwijs in nieuw issue
Block a user