Merge branch 'privatestaging' of https://github.com/AMDComputeLibraries/HIP-privatestaging into privatestaging

Cette révision appartient à :
pensun
2016-02-28 13:09:46 -06:00
révision 2b9ffefad0
4 fichiers modifiés avec 80 ajouts et 20 suppressions
+14
Voir le fichier
@@ -45,6 +45,20 @@ Verify your can find hipconfig (one of the hip tools in bin dir):
```
> hipconfig -pn
/home/me/HIP
### Using HIP with the AMD Native-GCN compiler.
AMD recently released a direct-to-GCN-ISA target. This compiler generates GCN ISA directly from LLVM, without going through an intermediate compiler
IR such as HSAIL or PTX.
The native GCN target is included with upstream LLVM, and has also been integrated with HCC compiler and can be used to compiler HIP programs for AMD.
Here's how to use it with HIP:
- Follow the instructions here to compile the HCC and native LLVM compiler:
> https://github.com/RadeonOpenCompute/HCC-Native-GCN-ISA/wiki
> (In the make step for HCC, we recommend setting -DCMAKE_INSTALL_PREFIX=/opt/hcc-native)
Set HCC_HOME environment variable before compiling HIP program to point to the native compiler:
> export HCC_HOME=/opt/hcc-native
```
## Examples and Getting Started:
+37 -3
Voir le fichier
@@ -15,6 +15,7 @@ GetOptions(
,"platform|P" => \$p_platform
,"cpp_config|cxx_config|C" => \$p_cpp_config
,"full|f|info" => \$p_full,
,"check" => \$p_check,
,"newline|n" => \$p_newline
);
@@ -25,6 +26,7 @@ if ($p_help) {
print " --compiler, -c : print compiler (hcc or nvcc)\n";
print " --platform, -P : print platform (hcc or nvcc)\n";
print " --full, -f : print full config\n";
print " --check : check configuration\n";
print " --newline, -n : print newline\n";
print " --help, -h : print help message\n";
exit();
@@ -37,6 +39,9 @@ $CUDA_PATH='/usr/local/cuda' unless defined $CUDA_PATH;
$HCC_HOME=$ENV{'HCC_HOME'};
$HCC_HOME='/opt/hcc' unless defined $HCC_HOME;
$HSA_PATH=$ENV{'HSA_PATH'};
$HSA_PATH='/opt/hsa' unless defined $HSA_PATH;
#---
#HIP_PLATFORM controls whether to use NVCC or HCC for compilation:
$HIP_PLATFORM=$ENV{'HIP_PLATFORM'};
@@ -67,7 +72,15 @@ if ($p_cpp_config) {
$printed = 1;
}
if ($p_full) {
if ($p_compiler or $p_platform) {
print $HIP_PLATFORM;
$printed = 1;
}
if (!$printed or $p_full) {
print "== hipconfig\n";
print "HIP_PATH : ", $HIP_PATH, "\n";
print "HIP_PLATFORM : ", $HIP_PLATFORM, "\n";
@@ -76,6 +89,7 @@ if ($p_full) {
{
print "\n" ;
print "== hcc\n";
print ("HSA_PATH : $HSA_PATH\n");
print ("HCC_HOME : $HCC_HOME\n");
system("$HCC_HOME/bin/hcc --version");
print ("HCC-cxxflags: ");
@@ -107,10 +121,30 @@ if ($p_full) {
}
if (!$printed or $p_compiler or $p_platform) {
print $HIP_PLATFORM;
if ($p_check) {
print "\nCheck system installation:\n";
printf ("%-70s", "check hipconfig in PATH...");
if (system ("hipconfig > /dev/null 2>&1") != 0) {
print "FAIL\n";
} else {
printf "good\n";
}
if ($HIP_PLATFORM eq "hcc") {
$LD_LIBRARY_PATH=$ENV{'LD_LIBRARY_PATH'};
printf("%-70s", "check LD_LIBRARY_PATH ($LD_LIBRARY_PATH) contains HSA_PATH ($HSA_PATH)...");
if (index($LD_LIBRARY_PATH, $HSA_PATH) == -1) {
print "FAIL\n";
} else {
printf "good\n";
}
# TODO - check hipcc / nvcc found and executable.
}
}
if ($p_newline) {
print "\n";
}
+1 -1
Voir le fichier
@@ -115,7 +115,7 @@ directory names.
```shell
> hipconverinplace.sh MY_SRC_DIR
> hipconvertinplace.sh MY_SRC_DIR
```
+28 -16
Voir le fichier
@@ -46,7 +46,7 @@ THE SOFTWARE.
#define USE_AM_TRACKER 1 /* >0 = use new AM memory tracker features. */
#define USE_ROCR_V2 0 /* use the ROCR v2 async copy API with dst and src agents */
#define USE_ROCR_V2 1 /* use the ROCR v2 async copy API with dst and src agents */
#if (USE_AM_TRACKER) and (__hcc_workweek__ < 16074)
#error (USE_AM_TRACKER requries HCC version of 16074 or newer)
@@ -498,9 +498,14 @@ inline bool ihipStream_t::preKernelCommand()
this, ihipCommandName[_last_command_type], ihipCommandName[ihipCommandKernel], _last_copy_signal->_sig_id)
} else {
tprintf (TRACE_SYNC, "stream %p switch %s to %s (wait for previous...)\n",
this, ihipCommandName[_last_command_type], ihipCommandName[ihipCommandKernel]);
this->waitAndReclaimOlder(_last_copy_signal);
if (HIP_DISABLE_HW_KERNEL_DEP != -1) {
tprintf (TRACE_SYNC, "stream %p switch %s to %s (wait for previous...)\n",
this, ihipCommandName[_last_command_type], ihipCommandName[ihipCommandKernel]);
this->waitAndReclaimOlder(_last_copy_signal);
} else {
tprintf (TRACE_SYNC, "stream %p switch %s to %s (IGNORE dependency)\n",
this, ihipCommandName[_last_command_type], ihipCommandName[ihipCommandKernel]);
}
}
}
_last_command_type = ihipCommandKernel;
@@ -548,9 +553,15 @@ inline int ihipStream_t::copyCommand(ihipSignal_t *lastCopy, hsa_signal_t *waitS
}
if (HIP_DISABLE_HW_COPY_DEP && needSync) {
// do the wait here on the host, and disable the device-side command resolution.
hsa_signal_wait_acquire(*waitSignal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
needSync = 0;
if (HIP_DISABLE_HW_COPY_DEP == -1) {
tprintf (TRACE_SYNC, "IGNORE copy dependency\n")
} else {
tprintf (TRACE_SYNC, "HOST-wait for copy dependency\n")
// do the wait here on the host, and disable the device-side command resolution.
hsa_signal_wait_acquire(*waitSignal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
needSync = 0;
}
}
_last_command_type = copyType;
@@ -937,8 +948,8 @@ void ihipInit()
READ_ENV_I(release, HIP_STREAM_SIGNALS, 0, "Number of signals to allocate when new stream is created (signal pool will grow on demand)");
READ_ENV_I(release, HIP_VISIBLE_DEVICES, CUDA_VISIBLE_DEVICES, "Only devices whose index is present in the secquence are visible to HIP applications and they are enumerated in the order of secquence" );
READ_ENV_I(release, HIP_DISABLE_HW_KERNEL_DEP, 0, "Disable HW dependencies before kernel commands - instead wait for dependency on host.");
READ_ENV_I(release, HIP_DISABLE_HW_COPY_DEP, 0, "Disable HW dependencies before copy commands - instead wait for dependency on host.");
READ_ENV_I(release, HIP_DISABLE_HW_KERNEL_DEP, 0, "Disable HW dependencies before kernel commands - instead wait for dependency on host. -1 means ignore these dependencies. (debug mode)");
READ_ENV_I(release, HIP_DISABLE_HW_COPY_DEP, 0, "Disable HW dependencies before copy commands - instead wait for dependency on host. -1 means ifnore these dependencies (debug mode)");
READ_ENV_I(release, HIP_DISABLE_BIDIR_MEMCPY, 0, "Disable simultaneous H2D memcpy and D2H memcpy to same device");
READ_ENV_I(release, HIP_ONESHOT_COPY_DEP, 0, "If set, only set the copy input dependency for the first copy command in a staged copy. If clear, set the dep for each copy.");
@@ -2262,15 +2273,14 @@ void ihipSyncCopy(ihipStream_t *stream, void* dst, const void* src, size_t sizeB
}
}
hsa_signal_t depSignal;
int depSignalCnt = stream->copyCommand(NULL, &depSignal, ihipCommandCopyH2D);
if ((kind == hipMemcpyHostToDevice) && (srcNotTracked)) {
if (HIP_STAGING_BUFFERS) {
std::lock_guard<std::mutex> l (device->_copy_lock[0]);
//printf ("staged-copy- read dep signals\n");
hsa_signal_t depSignal;
int depSignalCnt = stream->copyCommand(NULL, &depSignal, ihipCommandCopyH2D);
if (HIP_PININPLACE) {
device->_staging_buffer[0]->CopyHostToDevicePinInPlace(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
} else {
@@ -2287,14 +2297,17 @@ void ihipSyncCopy(ihipStream_t *stream, void* dst, const void* src, size_t sizeB
if (HIP_STAGING_BUFFERS) {
std::lock_guard<std::mutex> l (device->_copy_lock[HIP_DISABLE_BIDIR_MEMCPY ? 0:1]);
//printf ("staged-copy- read dep signals\n");
hsa_signal_t depSignal;
int depSignalCnt = stream->copyCommand(NULL, &depSignal, ihipCommandCopyD2H);
device->_staging_buffer[1]->CopyDeviceToHost(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
} else {
// TODO - remove, slow path.
hc::am_copy(dst, src, sizeBytes);
}
} else if (kind == hipMemcpyHostToHost) { // TODO-refactor.
if (depSignalCnt) {
// host waits before doing host memory copy.
hsa_signal_wait_acquire(depSignal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
}
memcpy(dst, src, sizeBytes);
} else {
@@ -2315,8 +2328,6 @@ void ihipSyncCopy(ihipStream_t *stream, void* dst, const void* src, size_t sizeB
#if USE_ROCR_V2
hsa_signal_t depSignal;
int depSignalCnt = stream->copyCommand(NULL, &depSignal, copyType);
hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, device->_hsa_agent, src, device->_hsa_agent, sizeBytes, depSignalCnt, depSignalCnt ? &depSignal:0x0, device->_copy_signal);
#else
hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, src, sizeBytes, device->_hsa_agent, 0, NULL, device->_copy_signal);
@@ -2370,6 +2381,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind
/**
* @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue
* @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.
* @warning on HCC hipMemcpyAsync requires that any host pointers are pinned (ie via the hipMallocHost call).
*/
//---
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)