diff --git a/rocclr/device/devkernel.cpp b/rocclr/device/devkernel.cpp
index 6f62239a61..7cb64a6179 100644
--- a/rocclr/device/devkernel.cpp
+++ b/rocclr/device/devkernel.cpp
@@ -674,7 +674,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
   if (workGroupInfo()->compileSize_[0] == 0) {
     // Find the default local workgroup size, if it wasn't specified
     if (lclWorkSize[0] == 0) {
-      if ((dev().settings().overrideLclSet & (1 << (workDim - 1))) == 0) {
+      if ((device().settings().overrideLclSet & (1 << (workDim - 1))) == 0) {
         // Find threads per group
         size_t thrPerGrp = workGroupInfo()->size_;
 
@@ -685,7 +685,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
           // and it's 2 or 3-dimensional workload
           (workDim > 1) && (((gblWorkSize[0] % 16) == 0) && ((gblWorkSize[1] % 16) == 0))) {
           // Use 8x8 workgroup size if kernel has image writes
-          if (flags_.imageWriteEna_ || (thrPerGrp != dev().info().preferredWorkGroupSize_)) {
+          if (flags_.imageWriteEna_ || (thrPerGrp != device().info().preferredWorkGroupSize_)) {
             lclWorkSize[0] = 8;
             lclWorkSize[1] = 8;
           }
@@ -709,7 +709,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
           }
 
           // Assuming DWORD access
-          const uint cacheLineMatch = dev().info().globalMemCacheLineSize_ >> 2;
+          const uint cacheLineMatch = device().info().globalMemCacheLineSize_ >> 2;
 
           // Check if we couldn't find optimal workload
           if (((lclWorkSize.product() % workGroupInfo()->wavefrontSize_) != 0) ||
@@ -1095,7 +1095,7 @@ bool Kernel::GetAttrCodePropMetadata() {
   InitParameters(kernelMetaNode);
 
   // Set the workgroup information for the kernel
-  workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
+  workGroupInfo_.availableLDSSize_ = device().info().localMemSizePerCU_;
   workGroupInfo_.availableSGPRs_ = 104;
   workGroupInfo_.availableVGPRs_ = 256;
 
diff --git a/rocclr/device/devkernel.hpp b/rocclr/device/devkernel.hpp
index d164f76a76..ad97d9a2f7 100644
--- a/rocclr/device/devkernel.hpp
+++ b/rocclr/device/devkernel.hpp
@@ -407,7 +407,7 @@ class Kernel : public amd::HeapObject {
   };
 
   //! Returns GPU device object, associated with this kernel
-  const amd::Device& dev() const { return dev_; }
+  const amd::Device& device() const { return dev_; }
 
   void setVecTypeHint(const std::string& hint) { workGroupInfo_.compileVecTypeHint_ = hint; }
 
diff --git a/rocclr/device/gpu/gpucompiler.cpp b/rocclr/device/gpu/gpucompiler.cpp
index 596095a9bd..d399fd4821 100644
--- a/rocclr/device/gpu/gpucompiler.cpp
+++ b/rocclr/device/gpu/gpucompiler.cpp
@@ -42,7 +42,7 @@ bool NullProgram::compileImpl(const std::string& src,
                               const char** headerIncludeNames, amd::option::Options* options) {
   std::string sourceCode = src;
 
-  if (dev().settings().debugFlags_ & Settings::CheckForILSource) {
+  if (gpuNullDevice().settings().debugFlags_ & Settings::CheckForILSource) {
     size_t inc = sourceCode.find("il_cs_", 0);
     if (inc != std::string::npos) {
       // CL program is an IL program
@@ -55,7 +55,7 @@ bool NullProgram::compileImpl(const std::string& src,
   std::string tempFolder = amd::Os::getTempPath();
   std::string tempFileName = amd::Os::getTempFileName();
 
-  if (dev().settings().debugFlags_ & Settings::StubCLPrograms) {
+  if (gpuNullDevice().settings().debugFlags_ & Settings::StubCLPrograms) {
     std::stringstream fileName;
     std::fstream stubRead;
     // Dump the IL function
@@ -130,7 +130,7 @@ bool NullProgram::compileImpl(const std::string& src,
   }
 
   if (ACL_SUCCESS !=
-      aclInsertSection(dev().amdilCompiler(), bin, sourceCode.c_str(), sourceCode.size(), aclSOURCE)) {
+      aclInsertSection(gpuNullDevice().amdilCompiler(), bin, sourceCode.c_str(), sourceCode.size(), aclSOURCE)) {
     LogWarning("aclInsertSection failed");
     aclBinaryFini(bin);
     return false;
@@ -145,7 +145,7 @@ bool NullProgram::compileImpl(const std::string& src,
   opts << options->origOptionStr.c_str();
 
   if (options->origOptionStr.find("-cl-std=CL") == std::string::npos) {
-    switch (dev().settings().oclVersion_) {
+    switch (gpuNullDevice().settings().oclVersion_) {
       case OpenCL10:
         opts << " -cl-std=CL1.0";
         break;
@@ -179,15 +179,15 @@ bool NullProgram::compileImpl(const std::string& src,
     opts << " -I" << tempFolder;
   }
 
-  if (!dev().settings().imageSupport_) {
+  if (!gpuNullDevice().settings().imageSupport_) {
     opts << " -fno-image-support";
   }
 
-  if (dev().settings().reportFMAF_) {
+  if (gpuNullDevice().settings().reportFMAF_) {
     opts << " -mfast-fmaf";
   }
 
-  if (dev().settings().reportFMA_) {
+  if (gpuNullDevice().settings().reportFMA_) {
     opts << " -mfast-fma";
   }
 
@@ -206,10 +206,10 @@ bool NullProgram::compileImpl(const std::string& src,
     pos = newOpt.find("-fno-bin-llvmir");
   }
 
-  err = aclCompile(dev().amdilCompiler(), bin, newOpt.c_str(), ACL_TYPE_OPENCL, ACL_TYPE_LLVMIR_BINARY,
+  err = aclCompile(gpuNullDevice().amdilCompiler(), bin, newOpt.c_str(), ACL_TYPE_OPENCL, ACL_TYPE_LLVMIR_BINARY,
                    NULL);
 
-  buildLog_ += aclGetCompilerLog(dev().amdilCompiler());
+  buildLog_ += aclGetCompilerLog(gpuNullDevice().amdilCompiler());
 
   if (err != ACL_SUCCESS) {
     LogWarning("aclCompile failed");
@@ -218,7 +218,7 @@ bool NullProgram::compileImpl(const std::string& src,
   }
 
   size_t len = 0;
-  const void* ir = aclExtractSection(dev().amdilCompiler(), bin, &len, aclLLVMIR, &err);
+  const void* ir = aclExtractSection(gpuNullDevice().amdilCompiler(), bin, &len, aclLLVMIR, &err);
   if (err != ACL_SUCCESS) {
     LogWarning("aclExtractSection failed");
     aclBinaryFini(bin);
@@ -284,7 +284,7 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
   }
 
   if (ACL_SUCCESS !=
-      aclInsertSection(dev().amdilCompiler(), bin, llvmBinary_.data(), llvmBinary_.size(), spirFlag)) {
+      aclInsertSection(gpuNullDevice().amdilCompiler(), bin, llvmBinary_.data(), llvmBinary_.size(), spirFlag)) {
     LogWarning("aclInsertSection failed");
     aclBinaryFini(bin);
     return CL_BUILD_PROGRAM_FAILURE;
@@ -294,7 +294,7 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
   std::string optionStr = options->origOptionStr;
   if (options->origOptionStr.find("kernel-arg-alignment") == std::string::npos) {
     char s[256];
-    sprintf(s, " -Wb,-kernel-arg-alignment=%d", dev().info().memBaseAddrAlign_ / 8);
+    sprintf(s, " -Wb,-kernel-arg-alignment=%d", gpuNullDevice().info().memBaseAddrAlign_ / 8);
     optionStr += s;
   }
 
@@ -308,8 +308,8 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
     type = ACL_TYPE_ISA;
   }
 
-  err = aclCompile(dev().amdilCompiler(), bin, optionStr.c_str(), aclTypeBinaryUsed, type, NULL);
-  buildLog_ += aclGetCompilerLog(dev().amdilCompiler());
+  err = aclCompile(gpuNullDevice().amdilCompiler(), bin, optionStr.c_str(), aclTypeBinaryUsed, type, NULL);
+  buildLog_ += aclGetCompilerLog(gpuNullDevice().amdilCompiler());
 
   if (err != ACL_SUCCESS) {
     LogWarning("aclCompile failed");
@@ -332,7 +332,7 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
   }
 
   size_t len = 0;
-  const void* amdil = aclExtractSection(dev().amdilCompiler(), bin, &len, aclCODEGEN, &err);
+  const void* amdil = aclExtractSection(gpuNullDevice().amdilCompiler(), bin, &len, aclCODEGEN, &err);
   if (err != ACL_SUCCESS) {
     LogWarning("aclExtractSection failed");
     aclBinaryFini(bin);
diff --git a/rocclr/device/gpu/gpukernel.cpp b/rocclr/device/gpu/gpukernel.cpp
index 005e5a7b7b..6e26afc454 100644
--- a/rocclr/device/gpu/gpukernel.cpp
+++ b/rocclr/device/gpu/gpukernel.cpp
@@ -3045,7 +3045,7 @@ void HSAILKernel::initHsailArgs(const aclArgData* aclArg) {
 
 HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions,
                          uint extraArgsNum)
-    : device::Kernel(prog->dev(), name, *prog),
+    : device::Kernel(prog->device(), name, *prog),
       compileOptions_(compileOptions),
       index_(0),
       code_(NULL),
diff --git a/rocclr/device/gpu/gpuprogram.cpp b/rocclr/device/gpu/gpuprogram.cpp
index af3616eddf..0e1cd6bd94 100644
--- a/rocclr/device/gpu/gpuprogram.cpp
+++ b/rocclr/device/gpu/gpuprogram.cpp
@@ -115,7 +115,7 @@ NullKernel* Program::createKernel(const std::string& name, const Kernel::InitDat
 
   *created = false;
   // Create a GPU kernel
-  Kernel* gpuKernel = new Kernel(name, static_cast<const gpu::Device&>(device()), *this, initData);
+  Kernel* gpuKernel = new Kernel(name, gpuDevice(), *this, initData);
 
   if (gpuKernel == NULL) {
     buildLog_ += "new Kernel() failed";
@@ -512,7 +512,7 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
     } else {
       aclTypeUsed = aclLLVMIR;
     }
-    err = aclInsertSection(dev().amdilCompiler(), libs[i], llvmBinaries[i]->data(),
+    err = aclInsertSection(gpuNullDevice().amdilCompiler(), libs[i], llvmBinaries[i]->data(),
                            llvmBinaries[i]->size(), aclTypeUsed);
     if (err != ACL_SUCCESS) {
       LogWarning("aclInsertSection failed");
@@ -529,10 +529,10 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
       unsigned int numLibs = libs.size() - 1;
 
       if (numLibs > 0) {
-        err = aclLink(dev().amdilCompiler(), libs[0], numLibs, &libs[1], ACL_TYPE_LLVMIR_BINARY,
+        err = aclLink(gpuNullDevice().amdilCompiler(), libs[0], numLibs, &libs[1], ACL_TYPE_LLVMIR_BINARY,
                       "-create-library", NULL);
 
-        buildLog_ += aclGetCompilerLog(dev().amdilCompiler());
+        buildLog_ += aclGetCompilerLog(gpuNullDevice().amdilCompiler());
 
         if (err != ACL_SUCCESS) {
           LogWarning("aclLink failed");
@@ -549,7 +549,7 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
       } else {
         aclTypeUsed = aclLLVMIR;
       }
-      const void* llvmir = aclExtractSection(dev().amdilCompiler(), libs[0], &size, aclTypeUsed, &err);
+      const void* llvmir = aclExtractSection(gpuNullDevice().amdilCompiler(), libs[0], &size, aclTypeUsed, &err);
       if (err != ACL_SUCCESS) {
         LogWarning("aclExtractSection failed");
         break;
@@ -1456,13 +1456,13 @@ bool Program::allocGlobalData(const void* globalData, size_t dataSize, uint inde
     // so possible reallocation won't occur twice or
     // another thread could destroy a heap block,
     // while we didn't finish allocation
-    amd::ScopedLock k(dev().lockAsyncOps());
+    amd::ScopedLock k(gpuDevice().lockAsyncOps());
 
     // Allocate memory for the global data store
-    glbData_ = dev().createScratchBuffer(amd::alignUp(dataSize, 0x1000));
+    glbData_ = gpuDevice().createScratchBuffer(amd::alignUp(dataSize, 0x1000));
     dataStore = glbData_;
   } else {
-    dataStore = new Memory(dev(), amd::alignUp(dataSize, ConstBuffer::VectorSize));
+    dataStore = new Memory(gpuDevice(), amd::alignUp(dataSize, ConstBuffer::VectorSize));
 
     // Initialize constant buffer
     if ((dataStore == NULL) || !dataStore->create(Resource::RemoteUSWC)) {
@@ -1478,7 +1478,7 @@ bool Program::allocGlobalData(const void* globalData, size_t dataSize, uint inde
     static const bool Entire = true;
     amd::Coord3D origin(0, 0, 0);
     amd::Coord3D region(dataSize);
-    result = dev().xferMgr().writeBuffer(globalData, *dataStore, origin, region, Entire);
+    result = gpuDevice().xferMgr().writeBuffer(globalData, *dataStore, origin, region, Entire);
   }
 
   return result;
@@ -1505,7 +1505,7 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
       maxScratchRegs_(0),
       executable_(NULL),
       loaderContext_(this) {
-  machineTarget_ = dev().hwInfo()->targetName_;
+  machineTarget_ = gpuNullDevice().hwInfo()->targetName_;
   loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
 }
 
@@ -1517,7 +1517,7 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
       executable_(NULL),
       loaderContext_(this) {
   isNull_ = true;
-  machineTarget_ = dev().hwInfo()->targetName_;
+  machineTarget_ = gpuNullDevice().hwInfo()->targetName_;
   loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
 }
 
@@ -1578,9 +1578,9 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
     // 1. if the program is created with binary and contains only hsail text
     case ACL_TYPE_HSAIL_TEXT: {
       std::string curOptions = options->origOptionStr + hsailOptions();
-      errorCode = aclCompile(dev().hsaCompiler(), binaryElf_, curOptions.c_str(),
+      errorCode = aclCompile(gpuNullDevice().hsaCompiler(), binaryElf_, curOptions.c_str(),
                              continueCompileFrom, ACL_TYPE_CG, NULL);
-      buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
+      buildLog_ += aclGetCompilerLog(gpuNullDevice().hsaCompiler());
       if (errorCode != ACL_SUCCESS) {
         buildLog_ += "Error: BRIG code generation failed.\n";
         return false;
@@ -1602,12 +1602,12 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
     std::string fin_options(options->origOptionStr + hsailOptions());
     // Append an option so that we can selectively enable a SCOption on CZ
     // whenever IOMMUv2 is enabled.
-    if (dev().settings().svmFineGrainSystem_) {
+    if (gpuNullDevice().settings().svmFineGrainSystem_) {
       fin_options.append(" -sc-xnack-iommu");
     }
-    errorCode = aclCompile(dev().hsaCompiler(), binaryElf_, fin_options.c_str(), ACL_TYPE_CG,
+    errorCode = aclCompile(gpuNullDevice().hsaCompiler(), binaryElf_, fin_options.c_str(), ACL_TYPE_CG,
                            ACL_TYPE_ISA, NULL);
-    buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
+    buildLog_ += aclGetCompilerLog(gpuNullDevice().hsaCompiler());
     if (errorCode != ACL_SUCCESS) {
       buildLog_ += "Error: BRIG finalization to ISA failed.\n";
       return false;
@@ -1625,7 +1625,7 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
     size_t size = 0;
     hsa_code_object_t code_object;
     code_object.handle = reinterpret_cast<uint64_t>(
-        aclExtractSection(dev().hsaCompiler(), binaryElf_, &size, aclTEXT, &errorCode));
+        aclExtractSection(gpuNullDevice().hsaCompiler(), binaryElf_, &size, aclTEXT, &errorCode));
     if (errorCode != ACL_SUCCESS) {
       buildLog_ += "Error: Extracting AMD HSA Code Object from binary failed.\n";
       return false;
@@ -1638,14 +1638,14 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
   }
   size_t kernelNamesSize = 0;
   errorCode =
-      aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
+      aclQueryInfo(gpuNullDevice().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
   if (errorCode != ACL_SUCCESS) {
     buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
     return false;
   }
   if (kernelNamesSize > 0) {
     char* kernelNames = new char[kernelNamesSize];
-    errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames,
+    errorCode = aclQueryInfo(gpuNullDevice().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames,
                              &kernelNamesSize);
     if (errorCode != ACL_SUCCESS) {
       buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
@@ -1661,7 +1661,7 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
     for (const auto& it : vKernels) {
       std::string kernelName(it);
       std::string openclKernelName = Kernel::openclMangledName(kernelName);
-      errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
+      errorCode = aclQueryInfo(gpuNullDevice().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
                                openclKernelName.c_str(), &md.numHiddenKernelArgs,
                                &sizeOfnumHiddenKernelArgs);
       if (errorCode != ACL_SUCCESS) {
@@ -1698,7 +1698,7 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
   }
   // Save the binary in the interface class
   saveBinaryAndSetType(TYPE_EXECUTABLE);
-  buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
+  buildLog_ += aclGetCompilerLog(gpuNullDevice().hsaCompiler());
   return true;
 }
 
@@ -1708,13 +1708,13 @@ std::string HSAILProgram::hsailOptions() {
   std::string hsailOptions;
   // Set options for the standard device specific options
   // All our devices support these options now
-  if (dev().settings().reportFMAF_) {
+  if (gpuNullDevice().settings().reportFMAF_) {
     hsailOptions.append(" -DFP_FAST_FMAF=1");
   }
-  if (dev().settings().reportFMA_) {
+  if (gpuNullDevice().settings().reportFMA_) {
     hsailOptions.append(" -DFP_FAST_FMA=1");
   }
-  if (!dev().settings().singleFpDenorm_) {
+  if (!gpuNullDevice().settings().singleFpDenorm_) {
     hsailOptions.append(" -cl-denorms-are-zero");
   }
 
@@ -1738,7 +1738,7 @@ std::string HSAILProgram::hsailOptions() {
 bool HSAILProgram::allocKernelTable() {
   uint size = kernels().size() * sizeof(size_t);
 
-  kernels_ = new gpu::Memory(dev(), size);
+  kernels_ = new gpu::Memory(gpuDevice(), size);
   // Initialize kernel table
   if ((kernels_ == NULL) || !kernels_->create(Resource::RemoteUSWC)) {
     delete kernels_;
@@ -1767,7 +1767,7 @@ const aclTargetInfo& HSAILProgram::info(const char* str) {
     arch = "hsail64";
   }
   info_ = aclGetTargetInfo(arch.c_str(),
-                           (str && str[0] == '\0' ? dev().hwInfo()->targetName_ : str), &err);
+                           (str && str[0] == '\0' ? gpuNullDevice().hwInfo()->targetName_ : str), &err);
   if (err != ACL_SUCCESS) {
     LogWarning("aclGetTargetInfo failed");
   }
@@ -1867,7 +1867,7 @@ hsa_isa_t ORCAHSALoaderContext::IsaFromName(const char* name) {
 }
 
 bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
-  uint dev_gfxip = program_->dev().hwInfo()->gfxipVersion_;
+ uint dev_gfxip = program_->gpuNullDevice().hwInfo()->gfxipVersion_;
   uint isa_gfxip = isa.handle;
   switch (dev_gfxip) {
     case gfx700:
@@ -1892,7 +1892,7 @@ bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa)
     case gfx602:
     default:
       LogPrintfError("Unsupported gfxip version gfx%d", dev_gfxip);
-      return false;
+    return false;
   }
 }
 
@@ -2024,7 +2024,7 @@ hsa_status_t ORCAHSALoaderContext::SamplerCreate(
       assert(false);
       return HSA_STATUS_ERROR_INVALID_ARGUMENT;
   }
-  gpu::Sampler* sampler = new gpu::Sampler(program_->dev());
+  gpu::Sampler* sampler = new gpu::Sampler(program_->gpuDevice());
   if (!sampler || !sampler->create(state)) {
     delete sampler;
     return HSA_STATUS_ERROR;
@@ -2075,15 +2075,15 @@ void* ORCAHSALoaderContext::GpuMemAlloc(size_t size, size_t align, bool zero) {
     return new char[size];
   }
 
-  gpu::Memory* mem = new gpu::Memory(program_->dev(), amd::alignUp(size, align));
+  gpu::Memory* mem = new gpu::Memory(program_->gpuDevice(), amd::alignUp(size, align));
   if (!mem || !mem->create(gpu::Resource::Local)) {
     delete mem;
     return NULL;
   }
-  assert(program_->dev().xferQueue());
+  assert(program_->gpuDevice().xferQueue());
   if (zero) {
     char pattern = 0;
-    program_->dev().xferMgr().fillBuffer(*mem, &pattern, sizeof(pattern), amd::Coord3D(0),
+    program_->gpuDevice().xferMgr().fillBuffer(*mem, &pattern, sizeof(pattern), amd::Coord3D(0),
                                          amd::Coord3D(size));
   }
   program_->addGlobalStore(mem);
@@ -2102,9 +2102,9 @@ bool ORCAHSALoaderContext::GpuMemCopy(void* dst, size_t offset, const void* src,
     memcpy(reinterpret_cast<address>(dst) + offset, src, size);
     return true;
   }
-  assert(program_->dev().xferQueue());
+  assert(program_->gpuDevice().xferQueue());
   gpu::Memory* mem = reinterpret_cast<gpu::Memory*>(dst);
-  return program_->dev().xferMgr().writeBuffer(src, *mem, amd::Coord3D(offset), amd::Coord3D(size),
+  return program_->gpuDevice().xferMgr().writeBuffer(src, *mem, amd::Coord3D(offset), amd::Coord3D(size),
                                                true);
   return true;
 }
diff --git a/rocclr/device/gpu/gpuprogram.hpp b/rocclr/device/gpu/gpuprogram.hpp
index 17796d89af..05f4569a5c 100644
--- a/rocclr/device/gpu/gpuprogram.hpp
+++ b/rocclr/device/gpu/gpuprogram.hpp
@@ -289,7 +289,7 @@ class NullProgram : public device::Program {
                       );
 
   //! Return a typecasted GPU device
-  gpu::NullDevice& dev() {
+  gpu::NullDevice& gpuNullDevice() {
     return const_cast<gpu::NullDevice&>(static_cast<const gpu::NullDevice&>(device()));
   }
 
@@ -339,10 +339,7 @@ class Program : public NullProgram {
   //! Returns pritnf info array
   const std::vector<device::PrintfInfo>& printfInfo() const { return printf_; }
 
-   //! Return a typecasted GPU device
-  gpu::Device& dev() { return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device())); }
-
-protected:
+ protected:
  private:
   //! Disable copy constructor
   Program(const Program&);
@@ -350,6 +347,11 @@ protected:
   //! Disable operator=
   Program& operator=(const Program&);
 
+  //! Return a typecasted GPU device
+  gpu::Device& gpuDevice() {
+    return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device()));
+  }
+
   HwConstBuffers constBufs_;  //!< Constant buffers for the global store
   gpu::Memory* glbData_;      //!< Global data store
 };
@@ -467,8 +469,15 @@ class HSAILProgram : public device::Program {
 
   const std::vector<Memory*>& globalStores() const { return globalStores_; }
 
-  //! Return a typecasted GPU device
-  gpu::Device& dev() { return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device())); }
+  //! Return a typecasted GPU null device.
+  gpu::NullDevice& gpuNullDevice() {
+    return const_cast<gpu::NullDevice&>(static_cast<const gpu::NullDevice&>(device()));
+  }
+
+  //! Return a typecasted GPU device. The device must not be the NullDevice.
+  gpu::Device& gpuDevice() {
+    return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device()));
+  }
 
   //! Returns GPU kernel table
   const Memory* kernelTable() const { return kernels_; }
diff --git a/rocclr/device/pal/palkernel.cpp b/rocclr/device/pal/palkernel.cpp
index b0fdea8eff..7fcdb6b91f 100644
--- a/rocclr/device/pal/palkernel.cpp
+++ b/rocclr/device/pal/palkernel.cpp
@@ -43,8 +43,8 @@ void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
   // Make sure runtime matches HW alignment, which is 256 scratch regs (DWORDs) per wave
   constexpr uint32_t ScratchRegAlignment = 256;
   workGroupInfo_.scratchRegs_ =
-      amd::alignUp((workGroupInfo_.scratchRegs_ * dev().info().wavefrontWidth_),
-                   ScratchRegAlignment) / dev().info().wavefrontWidth_;
+      amd::alignUp((workGroupInfo_.scratchRegs_ * device().info().wavefrontWidth_),
+                   ScratchRegAlignment) / device().info().wavefrontWidth_;
   workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t);
   workGroupInfo_.localMemSize_ = workGroupInfo_.usedLDSSize_ = groupSegmentSize;
   workGroupInfo_.usedSGPRs_ = numSGPRs;
@@ -52,13 +52,13 @@ void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
   workGroupInfo_.usedVGPRs_ = numVGPRs;
 
   if (!prog().isNull()) {
-    workGroupInfo_.availableLDSSize_ = dev().properties().gfxipProperties.shaderCore.ldsSizePerCu;
+    workGroupInfo_.availableLDSSize_ = palDevice().properties().gfxipProperties.shaderCore.ldsSizePerCu;
     workGroupInfo_.availableSGPRs_ =
-        dev().properties().gfxipProperties.shaderCore.numAvailableSgprs;
+        palDevice().properties().gfxipProperties.shaderCore.numAvailableSgprs;
     workGroupInfo_.availableVGPRs_ =
-        dev().properties().gfxipProperties.shaderCore.numAvailableVgprs;
+        palDevice().properties().gfxipProperties.shaderCore.numAvailableVgprs;
     workGroupInfo_.preferredSizeMultiple_ = workGroupInfo_.wavefrontPerSIMD_ =
-        dev().info().wavefrontWidth_;
+        device().info().wavefrontWidth_;
   } else {
     workGroupInfo_.availableLDSSize_ = 64 * Ki;
     workGroupInfo_.availableSGPRs_ = 104;
@@ -105,7 +105,7 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) {
 }
 
 HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions)
-    : device::Kernel(prog->dev(), name, *prog),
+    : device::Kernel(prog->device(), name, *prog),
       compileOptions_(compileOptions),
       index_(0),
       code_(0),
@@ -128,12 +128,12 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
     options.append(openClKernelName.c_str());
     // Append an option so that we can selectively enable a SCOption on CZ
     // whenever IOMMUv2 is enabled.
-    if (dev().settings().svmFineGrainSystem_) {
+    if (palNullDevice().settings().svmFineGrainSystem_) {
       options.append(" -sc-xnack-iommu");
     }
-    error = aclCompile(dev().compiler(), prog().binaryElf(), options.c_str(), ACL_TYPE_CG,
+    error = aclCompile(palNullDevice().compiler(), prog().binaryElf(), options.c_str(), ACL_TYPE_CG,
                        ACL_TYPE_ISA, nullptr);
-    buildLog_ += aclGetCompilerLog(dev().compiler());
+    buildLog_ += aclGetCompilerLog(palNullDevice().compiler());
     if (error != ACL_SUCCESS) {
       LogError("Failed to finalize kernel");
       return false;
@@ -144,7 +144,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
 
   // Pull out metadata from the ELF
   size_t sizeOfArgList;
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
+  error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
                        openClKernelName.c_str(), nullptr, &sizeOfArgList);
   if (error != ACL_SUCCESS) {
     return false;
@@ -154,7 +154,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
   if (nullptr == aclArgList) {
     return false;
   }
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
+  error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
                        openClKernelName.c_str(), aclArgList, &sizeOfArgList);
   if (error != ACL_SUCCESS) {
     return false;
@@ -164,30 +164,30 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
   delete[] aclArgList;
 
   size_t sizeOfWorkGroupSize;
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
+  error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
                        openClKernelName.c_str(), nullptr, &sizeOfWorkGroupSize);
   if (error != ACL_SUCCESS) {
     return false;
   }
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
+  error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
                        openClKernelName.c_str(), workGroupInfo_.compileSize_, &sizeOfWorkGroupSize);
   if (error != ACL_SUCCESS) {
     return false;
   }
 
   // Copy wavefront size
-  workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
+  workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_;
   // Find total workgroup size
   if (workGroupInfo_.compileSize_[0] != 0) {
     workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
         workGroupInfo_.compileSize_[2];
   } else {
-    workGroupInfo_.size_ = dev().info().preferredWorkGroupSize_;
+    workGroupInfo_.size_ = device().info().preferredWorkGroupSize_;
   }
 
   // Pull out printf metadata from the ELF
   size_t sizeOfPrintfList;
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
+  error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
                        openClKernelName.c_str(), nullptr, &sizeOfPrintfList);
   if (error != ACL_SUCCESS) {
     return false;
@@ -199,7 +199,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
     if (nullptr == aclPrintfList) {
       return false;
     }
-    error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
+    error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
                          openClKernelName.c_str(), aclPrintfList, &sizeOfPrintfList);
     if (error != ACL_SUCCESS) {
       return false;
@@ -213,7 +213,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
   aclMetadata md;
   md.enqueue_kernel = false;
   size_t sizeOfDeviceEnqueue = sizeof(md.enqueue_kernel);
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_DEVICE_ENQUEUE,
+  error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_DEVICE_ENQUEUE,
                        openClKernelName.c_str(), &md.enqueue_kernel, &sizeOfDeviceEnqueue);
   if (error != ACL_SUCCESS) {
     return false;
@@ -222,7 +222,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
 
   md.kernel_index = -1;
   size_t sizeOfIndex = sizeof(md.kernel_index);
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_KERNEL_INDEX,
+  error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_KERNEL_INDEX,
                        openClKernelName.c_str(), &md.kernel_index, &sizeOfIndex);
   if (error != ACL_SUCCESS) {
     return false;
@@ -230,7 +230,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
   index_ = md.kernel_index;
 
   size_t sizeOfWavesPerSimdHint = sizeof(workGroupInfo_.wavesPerSimdHint_);
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WAVES_PER_SIMD_HINT,
+  error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WAVES_PER_SIMD_HINT,
                        openClKernelName.c_str(), &workGroupInfo_.wavesPerSimdHint_,
                        &sizeOfWavesPerSimdHint);
   if (error != ACL_SUCCESS) {
@@ -240,7 +240,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
   waveLimiter_.enable();
 
   size_t sizeOfWorkGroupSizeHint = sizeof(workGroupInfo_.compileSizeHint_);
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE_HINT,
+  error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE_HINT,
                        openClKernelName.c_str(), workGroupInfo_.compileSizeHint_,
                        &sizeOfWorkGroupSizeHint);
   if (error != ACL_SUCCESS) {
@@ -248,7 +248,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
   }
 
   size_t sizeOfVecTypeHint;
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
+  error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
                        openClKernelName.c_str(), NULL, &sizeOfVecTypeHint);
   if (error != ACL_SUCCESS) {
     return false;
@@ -259,7 +259,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
     if (NULL == VecTypeHint) {
       return false;
     }
-    error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
+    error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
                          openClKernelName.c_str(), VecTypeHint, &sizeOfVecTypeHint);
     if (error != ACL_SUCCESS) {
       return false;
@@ -273,8 +273,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
   return true;
 }
 
-const Device& HSAILKernel::dev() const { return reinterpret_cast<const Device&>(dev_); }
-
 const HSAILProgram& HSAILKernel::prog() const {
   return reinterpret_cast<const HSAILProgram&>(prog_);
 }
@@ -476,7 +474,7 @@ bool LightningKernel::init() {
                    workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_);
 
   // Copy wavefront size
-  workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
+  workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_;
 
   if (workGroupInfo_.size_ == 0) {
     return false;
diff --git a/rocclr/device/pal/palkernel.hpp b/rocclr/device/pal/palkernel.hpp
index c5d067a94d..71b5373c03 100644
--- a/rocclr/device/pal/palkernel.hpp
+++ b/rocclr/device/pal/palkernel.hpp
@@ -68,8 +68,11 @@ class HSAILKernel : public device::Kernel {
   //! finalizes the kernel if needed
   bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
 
-  //! Returns PAL device object, associated with this kernel
-  const Device& dev() const;
+  //! Returns PAL, possibly null, device object, associated with this kernel.
+  const NullDevice& palNullDevice() const { return reinterpret_cast<const NullDevice&>(dev_); }
+
+  //! Returns PAL device object, associated with this kernel which must not be the null device.
+  const Device& palDevice() const { return reinterpret_cast<const Device&>(dev_); }
 
   //! Returns HSA program associated with this kernel
   const HSAILProgram& prog() const;
diff --git a/rocclr/device/pal/palprogram.cpp b/rocclr/device/pal/palprogram.cpp
index 16e2f678e1..76eecb8690 100644
--- a/rocclr/device/pal/palprogram.cpp
+++ b/rocclr/device/pal/palprogram.cpp
@@ -70,8 +70,8 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
                     bool zero) {
   align = amd::alignUp(align, sizeof(uint32_t));
 
-  amd::Memory* amd_mem_obj = new (prog.dev().context())
-      amd::Buffer(prog.dev().context(), 0, amd::alignUp(size, align),
+  amd::Memory* amd_mem_obj = new (prog.palDevice().context())
+      amd::Buffer(prog.palDevice().context(), 0, amd::alignUp(size, align),
                   // HIP requires SVM allocation for segment code due to possible global variable
                   // access and global variables are a part of code segment with the latest loader
                   amd::IS_HIP ? reinterpret_cast<void*>(1) : nullptr);
@@ -87,11 +87,11 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
     return false;
   }
 
-  gpuAccess_ = static_cast<pal::Memory*>(amd_mem_obj->getDeviceMemory(prog.dev(), false));
+  gpuAccess_ = static_cast<pal::Memory*>(amd_mem_obj->getDeviceMemory(prog.palDevice(), false));
 
   if (segment == AMDGPU_HSA_SEGMENT_CODE_AGENT) {
     void* ptr = nullptr;
-    cpuAccess_ = new pal::Memory(prog.dev(), amd::alignUp(size, align));
+    cpuAccess_ = new pal::Memory(prog.palDevice(), amd::alignUp(size, align));
     if ((cpuAccess_ == nullptr) || !cpuAccess_->create(pal::Resource::Remote)) {
       delete cpuAccess_;
       cpuAccess_ = nullptr;
@@ -111,8 +111,8 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
   if ((cpuAccess_ == nullptr) && zero && !prog.isInternal()) {
     uint64_t pattern = 0;
     size_t patternSize = ((size % sizeof(pattern)) == 0) ? sizeof(pattern) : 1;
-    prog.dev().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(0),
-                                    amd::Coord3D(size));
+    prog.palDevice().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(0),
+                                          amd::Coord3D(size));
   }
 
   switch (segment) {
@@ -266,7 +266,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
   }
 
   size_t kernelNamesSize = 0;
-  acl_error errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr,
+  acl_error errorCode = aclQueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr,
                                      nullptr, &kernelNamesSize);
   if (errorCode != ACL_SUCCESS) {
     buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
@@ -274,7 +274,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
   }
   if (kernelNamesSize > 0) {
     char* kernelNames = new char[kernelNamesSize];
-    errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr, kernelNames,
+    errorCode = aclQueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr, kernelNames,
                              &kernelNamesSize);
     if (errorCode != ACL_SUCCESS) {
       buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
@@ -326,7 +326,7 @@ bool HSAILProgram::createBinary(amd::option::Options* options) { return true; }
 bool HSAILProgram::allocKernelTable() {
   uint size = kernels().size() * sizeof(size_t);
 
-  kernels_ = new pal::Memory(dev(), size);
+  kernels_ = new pal::Memory(palDevice(), size);
   // Initialize kernel table
   if ((kernels_ == nullptr) || !kernels_->create(Resource::RemoteUSWC)) {
     delete kernels_;
@@ -352,7 +352,7 @@ const aclTargetInfo& HSAILProgram::info(const char* str) {
     arch = "hsail64";
   }
   info_ = aclGetTargetInfo(arch.c_str(),
-                           (str && str[0] == '\0' ? dev().hwInfo()->machineTarget_ : str), &err);
+                           (str && str[0] == '\0' ? palNullDevice().hwInfo()->machineTarget_ : str), &err);
   if (err != ACL_SUCCESS) {
     LogWarning("aclGetTargetInfo failed");
   }
@@ -523,9 +523,9 @@ hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
 }
 
 bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
-  uint32_t gfxipVersion = program_->dev().settings().useLightning_
-      ? program_->dev().hwInfo()->gfxipVersionLC_
-      : program_->dev().hwInfo()->gfxipVersion_;
+  uint32_t gfxipVersion = program_->palNullDevice().settings().useLightning_
+      ? program_->palNullDevice().hwInfo()->gfxipVersionLC_
+      : program_->palNullDevice().hwInfo()->gfxipVersion_;
   uint32_t majorSrc = gfxipVersion / 10;
   uint32_t minorSrc = gfxipVersion % 10;
 
@@ -671,7 +671,7 @@ hsa_status_t PALHSALoaderContext::SamplerCreate(
       assert(false);
       return HSA_STATUS_ERROR_INVALID_ARGUMENT;
   }
-  pal::Sampler* sampler = new pal::Sampler(program_->dev());
+  pal::Sampler* sampler = new pal::Sampler(program_->palDevice());
   if (!sampler || !sampler->create(state)) {
     delete sampler;
     return HSA_STATUS_ERROR;
diff --git a/rocclr/device/pal/palprogram.hpp b/rocclr/device/pal/palprogram.hpp
index 3027107fc8..57aa9cc40e 100644
--- a/rocclr/device/pal/palprogram.hpp
+++ b/rocclr/device/pal/palprogram.hpp
@@ -157,8 +157,15 @@ class HSAILProgram : public device::Program {
 
   const std::vector<Memory*>& globalStores() const { return globalStores_; }
 
-  //! Return a typecasted PAL device
-  pal::Device& dev() { return const_cast<pal::Device&>(static_cast<const pal::Device&>(device())); }
+  //! Return a typecasted PAL null device.
+  pal::NullDevice& palNullDevice() {
+    return const_cast<pal::NullDevice&>(static_cast<const pal::NullDevice&>(device()));
+  }
+
+  //! Return a typecasted PAL device. The device must not be the NullDevice.
+  pal::Device& palDevice() {
+    return const_cast<pal::Device&>(static_cast<const pal::Device&>(device()));
+  }
 
   //! Returns GPU kernel table
   const Memory* kernelTable() const { return kernels_; }
@@ -241,7 +248,7 @@ class LightningProgram : public HSAILProgram {
   LightningProgram(NullDevice& device, amd::Program& owner) : HSAILProgram(device, owner) {
     isLC_ = true;
     isHIP_ = (owner.language() == amd::Program::HIP);
-    machineTarget_ = dev().hwInfo()->machineTargetLC_;
+    machineTarget_ = palNullDevice().hwInfo()->machineTargetLC_;
   }
 
   LightningProgram(Device& device, amd::Program& owner) : HSAILProgram(device, owner) {
diff --git a/rocclr/device/rocm/rockernel.cpp b/rocclr/device/rocm/rockernel.cpp
index 5c5eaca48f..25498a4e12 100644
--- a/rocclr/device/rocm/rockernel.cpp
+++ b/rocclr/device/rocm/rockernel.cpp
@@ -31,7 +31,7 @@ Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle
                const uint32_t workgroupGroupSegmentByteSize,
                const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize,
                const uint32_t kernargSegmentAlignment)
-    : device::Kernel(prog->dev(), name, *prog) {
+    : device::Kernel(prog->device(), name, *prog) {
   kernelCodeHandle_ = kernelCodeHandle;
   workgroupGroupSegmentByteSize_ = workgroupGroupSegmentByteSize;
   workitemPrivateSegmentByteSize_ = workitemPrivateSegmentByteSize;
@@ -40,7 +40,7 @@ Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle
 }
 
 Kernel::Kernel(std::string name, Program* prog)
-    : device::Kernel(prog->dev(), name, *prog) {
+    : device::Kernel(prog->device(), name, *prog) {
 }
 
 #if defined(USE_COMGR_LIBRARY)
@@ -57,10 +57,10 @@ bool LightningKernel::init() {
     symbolName_ = name();
   }
   kernargSegmentAlignment_ =
-      amd::alignUp(std::max(kernargSegmentAlignment_, 128u), dev().info().globalMemCacheLineSize_);
+      amd::alignUp(std::max(kernargSegmentAlignment_, 128u), device().info().globalMemCacheLineSize_);
 
   // Set the workgroup information for the kernel
-  workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
+  workGroupInfo_.availableLDSSize_ = device().info().localMemSizePerCU_;
   assert(workGroupInfo_.availableLDSSize_ > 0);
 
   if (!SetAvailableSgprVgpr()) {
@@ -155,7 +155,7 @@ bool LightningKernel::init() {
   workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
   workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
   workGroupInfo_.usedStackSize_ = 0;
-  workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
+  workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size;
   workGroupInfo_.wavefrontSize_ = wavefront_size;
   if (workGroupInfo_.size_ == 0) {
     return false;
@@ -181,7 +181,7 @@ bool HSAILKernel::init() {
   hsa_agent_t hsaDevice = program()->hsaDevice();
   // Pull out metadata from the ELF
   size_t sizeOfArgList;
-  aclCompiler* compileHandle = program()->dev().compiler();
+  aclCompiler* compileHandle = program()->rocDevice().compiler();
   std::string openClKernelName("&__OpenCL_" + name() + "_kernel");
   errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_ARGUMENT_ARRAY,
                                          openClKernelName.c_str(), nullptr, &sizeOfArgList);
@@ -202,7 +202,7 @@ bool HSAILKernel::init() {
 
   // Set the workgroup information for the kernel
   memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
-  workGroupInfo_.availableLDSSize_ = program()->dev().info().localMemSizePerCU_;
+  workGroupInfo_.availableLDSSize_ = program()->rocDevice().info().localMemSizePerCU_;
   assert(workGroupInfo_.availableLDSSize_ > 0);
   workGroupInfo_.availableSGPRs_ = 104;
   workGroupInfo_.availableVGPRs_ = 256;
@@ -250,13 +250,13 @@ bool HSAILKernel::init() {
   }
 
   workGroupInfo_.usedStackSize_ = 0;
-  workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
+  workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size;
   workGroupInfo_.wavefrontSize_ = wavefront_size;
   if (workGroupInfo_.compileSize_[0] != 0) {
     workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
         workGroupInfo_.compileSize_[2];
   } else {
-    workGroupInfo_.size_ = program()->dev().info().preferredWorkGroupSize_;
+    workGroupInfo_.size_ = program()->rocDevice().info().preferredWorkGroupSize_;
   }
 
   // Pull out printf metadata from the ELF
diff --git a/rocclr/device/rocm/rocprogram.cpp b/rocclr/device/rocm/rocprogram.cpp
index 512bf4ee2c..cb766f9aba 100644
--- a/rocclr/device/rocm/rocprogram.cpp
+++ b/rocclr/device/rocm/rocprogram.cpp
@@ -127,7 +127,7 @@ bool Program::initClBinary(char* binaryIn, size_t size) {
 
 bool Program::defineGlobalVar(const char* name, void* dptr) {
   hsa_status_t status = HSA_STATUS_SUCCESS;
-  hsa_agent_t hsa_device = dev().getBackendDevice();
+  hsa_agent_t hsa_device = rocDevice().getBackendDevice();
 
   status = hsa_executable_agent_global_variable_define(hsaExecutable_, hsa_device, name, dptr);
   if (status != HSA_STATUS_SUCCESS) {
@@ -153,7 +153,7 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
     return false;
   }
 
-  hsa_device= dev().getBackendDevice();
+  hsa_device = rocDevice().getBackendDevice();
 
   /* Find HSA Symbol by name */
   status = hsa_executable_get_symbol_by_name(hsaExecutable_, global_name, &hsa_device,
@@ -206,7 +206,7 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
       return false;
     }
 
-    roc_device = static_cast<const roc::Device*>(&dev());
+    roc_device = &(rocDevice());
     *amd_mem_obj = new(roc_device->context()) amd::Buffer(roc_device->context(), 0, *bytes,
                                                           *device_pptr);
 
@@ -227,7 +227,7 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
 }
 
 HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner) : roc::Program(device, owner) {
-  machineTarget_ = dev().deviceInfo().machineTarget_;
+  machineTarget_ = rocNullDevice().deviceInfo().machineTarget_;
 }
 
 HSAILProgram::~HSAILProgram() {
@@ -268,7 +268,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
 #if defined(WITH_COMPILER_LIB)
   // Stop compilation if it is an offline device - HSA runtime does not
   // support ISA compiled offline
-  if (!dev().isOnline()) {
+  if (!device().isOnline()) {
     return true;
   }
 
@@ -294,7 +294,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
     return false;
   }
 
-  hsa_agent_t hsaDevice = dev().getBackendDevice();
+  hsa_agent_t hsaDevice = rocDevice().getBackendDevice();
   status = hsa_executable_load_agent_code_object(hsaExecutable_, hsaDevice, hsaCodeObjectReader_,
                                                  nullptr, nullptr);
   if (status != HSA_STATUS_SUCCESS) {
@@ -432,7 +432,7 @@ LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner)
   : roc::Program(device, owner) {
   isLC_ = true;
   isHIP_ = (owner.language() == amd::Program::HIP);
-  machineTarget_ = dev().deviceInfo().machineTargetLC_;
+  machineTarget_ = rocNullDevice().deviceInfo().machineTargetLC_;
 }
 
 bool LightningProgram::createBinary(amd::option::Options* options) {
@@ -477,7 +477,7 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
     return false;
   }
 
-  hsa_agent_t agent = dev().getBackendDevice();
+  hsa_agent_t agent = rocDevice().getBackendDevice();
   hsa_status_t status;
 
   status = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
diff --git a/rocclr/device/rocm/rocprogram.hpp b/rocclr/device/rocm/rocprogram.hpp
index b33448c881..f8f9691419 100644
--- a/rocclr/device/rocm/rocprogram.hpp
+++ b/rocclr/device/rocm/rocprogram.hpp
@@ -49,10 +49,13 @@ class Program : public device::Program {
   virtual bool initClBinary(char* binaryIn, size_t size);
 
   //! Return a typecasted GPU device
-  const NullDevice& dev() const { return static_cast<const NullDevice&>(device()); }
+  const NullDevice& rocNullDevice() const { return static_cast<const NullDevice&>(device()); }
+
+  //! Return a typecasted GPU device
+  const Device& rocDevice() const { return static_cast<const Device&>(device()); }
 
   //! Returns the hsaBinary associated with the program
-  hsa_agent_t hsaDevice() const { return dev().getBackendDevice(); }
+  hsa_agent_t hsaDevice() const { return rocNullDevice().getBackendDevice(); }
 
   hsa_executable_t hsaExecutable() const { return hsaExecutable_; }