diff --git a/projects/rocr-runtime/src/debug.c b/projects/rocr-runtime/src/debug.c index 51b6f8b0b9..05a21329b9 100644 --- a/projects/rocr-runtime/src/debug.c +++ b/projects/rocr-runtime/src/debug.c @@ -29,14 +29,14 @@ #include static bool *is_device_debugged; -int debug_get_reg_status(uint32_t node_id, bool* is_debugged); +int debug_get_reg_status(uint32_t node_id, bool *is_debugged); HSAKMT_STATUS init_device_debugging_memory(unsigned int NumNodes) { unsigned int i; is_device_debugged = malloc(NumNodes * sizeof(bool)); - if (is_device_debugged == NULL) + if (!is_device_debugged) return HSAKMT_STATUS_NO_MEMORY; for (i = 0; i < NumNodes; i++) @@ -51,17 +51,14 @@ void destroy_device_debugging_memory(void) free(is_device_debugged); } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtDbgRegister( - HSAuint32 NodeId //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtDbgRegister(HSAuint32 NodeId) { HSAKMT_STATUS result; uint32_t gpu_id; + CHECK_KFD_OPEN(); - if (is_device_debugged == NULL) + if (!is_device_debugged) return HSAKMT_STATUS_NO_MEMORY; result = validate_nodeid(NodeId, &gpu_id); @@ -69,31 +66,28 @@ hsaKmtDbgRegister( return result; struct kfd_ioctl_dbg_register_args args; + memset(&args, 0, sizeof(args)); args.gpu_id = gpu_id; - long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_REGISTER, &args); + + long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_REGISTER, &args); if (err == 0) result = HSAKMT_STATUS_SUCCESS; else result = HSAKMT_STATUS_ERROR; - return (result); + return result; } -/* =============================================================================== */ - -HSAKMT_STATUS -HSAKMTAPI -hsaKmtDbgUnregister( - HSAuint32 NodeId //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtDbgUnregister(HSAuint32 NodeId) { - HSAKMT_STATUS result; uint32_t gpu_id; + HSAKMT_STATUS result; + CHECK_KFD_OPEN(); - if (is_device_debugged == NULL) + if (!is_device_debugged) return HSAKMT_STATUS_NO_MEMORY; result = validate_nodeid(NodeId, &gpu_id); @@ -101,28 +95,22 @@ hsaKmtDbgUnregister( return result; struct kfd_ioctl_dbg_unregister_args args; + memset(&args, 0, sizeof(args)); args.gpu_id = gpu_id; - long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_UNREGISTER, &args); - if (err == 0) - result = HSAKMT_STATUS_SUCCESS; - else - result = HSAKMT_STATUS_ERROR; + long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_UNREGISTER, &args); - return (result); + if (err) + return HSAKMT_STATUS_ERROR; + + return HSAKMT_STATUS_SUCCESS; } -/* =============================================================================== */ - -HSAKMT_STATUS -HSAKMTAPI -hsaKmtDbgWavefrontControl( - HSAuint32 NodeId, //IN - HSA_DBG_WAVEOP Operand, //IN - HSA_DBG_WAVEMODE Mode, //IN - HSAuint32 TrapId, //IN - HsaDbgWaveMessage* DbgWaveMsgRing //IN (? - see thunk API doc!) - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtDbgWavefrontControl(HSAuint32 NodeId, + HSA_DBG_WAVEOP Operand, + HSA_DBG_WAVEMODE Mode, + HSAuint32 TrapId, + HsaDbgWaveMessage *DbgWaveMsgRing) { HSAKMT_STATUS result; uint32_t gpu_id; @@ -136,18 +124,14 @@ hsaKmtDbgWavefrontControl( return result; -/* Determine Size of the ioctl buffer */ +/* Determine Size of the ioctl buffer */ + uint32_t buff_size = sizeof(Operand) + sizeof(Mode) + sizeof(TrapId) + + sizeof(DbgWaveMsgRing->DbgWaveMsg) + + sizeof(DbgWaveMsgRing->MemoryVA) + sizeof(*args); - uint32_t buff_size = sizeof(Operand)+ - sizeof(Mode) + sizeof(TrapId) + - sizeof(DbgWaveMsgRing->DbgWaveMsg)+ sizeof(DbgWaveMsgRing->MemoryVA) + sizeof(*args); - - - args = (struct kfd_ioctl_dbg_wave_control_args*) malloc(buff_size); - if (args == NULL) - { + args = (struct kfd_ioctl_dbg_wave_control_args *)malloc(buff_size); + if (!args) return HSAKMT_STATUS_ERROR; - } memset(args, 0, buff_size); @@ -155,67 +139,53 @@ hsaKmtDbgWavefrontControl( args->buf_size_in_bytes = buff_size; /* increment pointer to the start of the non fixed part */ - - unsigned char* run_ptr = (unsigned char*)args + sizeof(*args); + unsigned char *run_ptr = (unsigned char *)args + sizeof(*args); /* save variable content pointer for kfd */ args->content_ptr = (uint64_t)run_ptr; /* insert items, and increment pointer accordingly */ - - *((HSA_DBG_WAVEOP*)run_ptr) = Operand; + *((HSA_DBG_WAVEOP *)run_ptr) = Operand; run_ptr += sizeof(Operand); - - *((HSA_DBG_WAVEMODE*)run_ptr) = Mode; + *((HSA_DBG_WAVEMODE *)run_ptr) = Mode; run_ptr += sizeof(Mode); - - *((HSAuint32*)run_ptr) = TrapId; + *((HSAuint32 *)run_ptr) = TrapId; run_ptr += sizeof(TrapId); - *((HsaDbgWaveMessageAMD*)run_ptr) = DbgWaveMsgRing->DbgWaveMsg; - run_ptr += sizeof(DbgWaveMsgRing->DbgWaveMsg); + *((HsaDbgWaveMessageAMD *)run_ptr) = DbgWaveMsgRing->DbgWaveMsg; + run_ptr += sizeof(DbgWaveMsgRing->DbgWaveMsg); - *((void**)run_ptr) = DbgWaveMsgRing->MemoryVA; + *((void **)run_ptr) = DbgWaveMsgRing->MemoryVA; run_ptr += sizeof(DbgWaveMsgRing->MemoryVA); /* send to kernel */ - long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_WAVE_CONTROL, args); - free (args); + free(args); - if (err == 0) - return HSAKMT_STATUS_SUCCESS; - else + if (err) return HSAKMT_STATUS_ERROR; + + return HSAKMT_STATUS_SUCCESS; } - -/* =============================================================================== */ - -HSAKMT_STATUS -HSAKMTAPI -hsaKmtDbgAddressWatch( - HSAuint32 NodeId, //IN - HSAuint32 NumWatchPoints, //IN - HSA_DBG_WATCH_MODE WatchMode[], //IN - void* WatchAddress[], //IN - HSAuint64 WatchMask[], //IN, optional - HsaEvent* WatchEvent[] //IN, optional - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtDbgAddressWatch(HSAuint32 NodeId, + HSAuint32 NumWatchPoints, + HSA_DBG_WATCH_MODE WatchMode[], + void *WatchAddress[], + HSAuint64 WatchMask[], + HsaEvent *WatchEvent[]) { HSAKMT_STATUS result; uint32_t gpu_id; /* determine the size of the watch mask and event buffers - * the value is NULL if and only if no vector data should be attached - * + * the value is NULL if and only if no vector data should be attached */ - - uint32_t watch_mask_items = WatchMask[0] > 0 ? NumWatchPoints:1; - uint32_t watch_event_items = WatchEvent != NULL ? NumWatchPoints:0; + uint32_t watch_mask_items = WatchMask[0] > 0 ? NumWatchPoints:1; + uint32_t watch_event_items = WatchEvent != NULL ? NumWatchPoints:0; struct kfd_ioctl_dbg_address_watch_args *args; HSAuint32 i = 0; @@ -229,23 +199,18 @@ hsaKmtDbgAddressWatch( if (NumWatchPoints > MAX_ALLOWED_NUM_POINTS) return HSAKMT_STATUS_INVALID_PARAMETER; -/* Size and structure of the ioctl buffer is dynamic in this case - * Here we calculate the buff size. - */ + /* Size and structure of the ioctl buffer is dynamic in this case + * Here we calculate the buff size. + */ + uint32_t buff_size = sizeof(NumWatchPoints) + + (sizeof(WatchMode[0]) + sizeof(WatchAddress[0])) * + NumWatchPoints + + watch_mask_items * sizeof(HSAuint64) + + watch_event_items * sizeof(HsaEvent *) + sizeof(*args); - uint32_t buff_size =sizeof(NumWatchPoints)+ - ( sizeof(WatchMode[0]) + - sizeof(WatchAddress[0]))*NumWatchPoints + - watch_mask_items*sizeof(HSAuint64) + - watch_event_items*sizeof(HsaEvent*)+ - sizeof(*args); - - - args = (struct kfd_ioctl_dbg_address_watch_args*) malloc(buff_size); - if (args == NULL) - { + args = (struct kfd_ioctl_dbg_address_watch_args *) malloc(buff_size); + if (!args) return HSAKMT_STATUS_ERROR; - } memset(args, 0, buff_size); @@ -254,64 +219,51 @@ hsaKmtDbgAddressWatch( /* increment pointer to the start of the non fixed part */ - - unsigned char* run_ptr = (unsigned char*)args + sizeof(*args); + unsigned char *run_ptr = (unsigned char *)args + sizeof(*args); /* save variable content pointer for kfd */ args->content_ptr = (uint64_t)run_ptr; /* insert items, and increment pointer accordingly */ - *((HSAuint32*)run_ptr) = NumWatchPoints; + *((HSAuint32 *)run_ptr) = NumWatchPoints; run_ptr += sizeof(NumWatchPoints); - for (i=0; i < NumWatchPoints; i++) - { - *((HSA_DBG_WATCH_MODE*)run_ptr) = WatchMode[i]; - run_ptr += sizeof(WatchMode[i]); + for (i = 0; i < NumWatchPoints; i++) { + *((HSA_DBG_WATCH_MODE *)run_ptr) = WatchMode[i]; + run_ptr += sizeof(WatchMode[i]); } - for (i=0; i < NumWatchPoints; i++) - { - *((void**)run_ptr) = WatchAddress[i]; - run_ptr += sizeof(WatchAddress[i]); + for (i = 0; i < NumWatchPoints; i++) { + *((void **)run_ptr) = WatchAddress[i]; + run_ptr += sizeof(WatchAddress[i]); } - for (i=0; i < watch_mask_items; i++) - { - *((HSAuint64*)run_ptr) = WatchMask[i]; - run_ptr += sizeof(WatchMask[i]); + for (i = 0; i < watch_mask_items; i++) { + *((HSAuint64 *)run_ptr) = WatchMask[i]; + run_ptr += sizeof(WatchMask[i]); } - for (i=0; i < watch_event_items; i++) - { - *((HsaEvent**)run_ptr) = WatchEvent[i]; - run_ptr += sizeof(WatchEvent[i]); + for (i = 0; i < watch_event_items; i++) { + *((HsaEvent **)run_ptr) = WatchEvent[i]; + run_ptr += sizeof(WatchEvent[i]); } /* send to kernel */ - long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_ADDRESS_WATCH, args); - free (args); + free(args); - if (err == 0) - { - return HSAKMT_STATUS_SUCCESS; - } - else - { + if (err) return HSAKMT_STATUS_ERROR; - } + return HSAKMT_STATUS_SUCCESS; } -/* =============================================================================== */ -int debug_get_reg_status(uint32_t node_id, bool* is_debugged) +int debug_get_reg_status(uint32_t node_id, bool *is_debugged) { *is_debugged = NULL; - if (is_device_debugged == NULL) + if (!is_device_debugged) return -1; - else { - *is_debugged = is_device_debugged[node_id]; - return 0; - } + + *is_debugged = is_device_debugged[node_id]; + return 0; } diff --git a/projects/rocr-runtime/src/events.c b/projects/rocr-runtime/src/events.c index 373018aa29..d934391c6d 100644 --- a/projects/rocr-runtime/src/events.c +++ b/projects/rocr-runtime/src/events.c @@ -47,31 +47,24 @@ static bool IsSystemEventType(HSA_EVENTTYPE type) return (type != HSA_EVENTTYPE_SIGNAL && type != HSA_EVENTTYPE_DEBUG_EVENT); } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtCreateEvent( - HsaEventDescriptor* EventDesc, //IN - bool ManualReset, //IN - bool IsSignaled, //IN - HsaEvent** Event //OUT - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc, + bool ManualReset, bool IsSignaled, + HsaEvent **Event) { CHECK_KFD_OPEN(); if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID) - { return HSAKMT_STATUS_INVALID_PARAMETER; - } - HsaEvent* e = malloc(sizeof(HsaEvent)); - if (e == NULL) - { + HsaEvent *e = malloc(sizeof(HsaEvent)); + + if (!e) return HSAKMT_STATUS_ERROR; - } memset(e, 0, sizeof(*e)); struct kfd_ioctl_create_event_args args; + memset(&args, 0, sizeof(args)); args.event_type = EventDesc->EventType; @@ -81,7 +74,7 @@ hsaKmtCreateEvent( /* dGPU code */ pthread_mutex_lock(&hsakmt_mutex); - if (is_dgpu && events_page == NULL) { + if (is_dgpu && !events_page) { events_page = allocate_exec_aligned_memory_gpu( KFD_SIGNAL_EVENT_LIMIT * 8, PAGE_SIZE, 0, true); if (!events_page) { @@ -100,7 +93,7 @@ hsaKmtCreateEvent( e->EventId = args.event_id; - if (events_page == NULL && args.event_page_offset > 0) { + if (!events_page && args.event_page_offset > 0) { events_page = mmap(NULL, KFD_SIGNAL_EVENT_LIMIT * 8, PROT_WRITE | PROT_READ, MAP_SHARED, kfd_fd, args.event_page_offset); if (events_page == MAP_FAILED) { @@ -127,6 +120,7 @@ hsaKmtCreateEvent( if (IsSignaled && !IsSystemEventType(e->EventData.EventType)) { struct kfd_ioctl_set_event_args set_args; + memset(&set_args, 0, sizeof(set_args)); set_args.event_id = args.event_id; @@ -138,11 +132,7 @@ hsaKmtCreateEvent( return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtDestroyEvent( - HsaEvent* Event //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEvent(HsaEvent *Event) { CHECK_KFD_OPEN(); @@ -150,34 +140,33 @@ hsaKmtDestroyEvent( return HSAKMT_STATUS_INVALID_HANDLE; struct kfd_ioctl_destroy_event_args args; + memset(&args, 0, sizeof(args)); args.event_id = Event->EventId; - if (kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0) { + if (kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0) return HSAKMT_STATUS_ERROR; - } free(Event); return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtSetEvent( - HsaEvent* Event //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtSetEvent(HsaEvent *Event) { CHECK_KFD_OPEN(); if (!Event) return HSAKMT_STATUS_INVALID_HANDLE; - /* Although the spec is doesn't say, don't allow system-defined events to be signaled. */ + /* Although the spec is doesn't say, don't allow system-defined events + * to be signaled. + */ if (IsSystemEventType(Event->EventData.EventType)) return HSAKMT_STATUS_ERROR; struct kfd_ioctl_set_event_args args; + memset(&args, 0, sizeof(args)); args.event_id = Event->EventId; @@ -188,22 +177,21 @@ hsaKmtSetEvent( return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtResetEvent( - HsaEvent* Event //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtResetEvent(HsaEvent *Event) { CHECK_KFD_OPEN(); if (!Event) return HSAKMT_STATUS_INVALID_HANDLE; - /* Although the spec is doesn't say, don't allow system-defined events to be signaled. */ + /* Although the spec is doesn't say, don't allow system-defined events + * to be signaled. + */ if (IsSystemEventType(Event->EventData.EventType)) return HSAKMT_STATUS_ERROR; struct kfd_ioctl_reset_event_args args; + memset(&args, 0, sizeof(args)); args.event_id = Event->EventId; @@ -214,11 +202,7 @@ hsaKmtResetEvent( return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtQueryEventState( - HsaEvent* Event //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event) { CHECK_KFD_OPEN(); @@ -228,12 +212,8 @@ hsaKmtQueryEventState( return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtWaitOnEvent( - HsaEvent* Event, //IN - HSAuint32 Milliseconds //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent(HsaEvent *Event, + HSAuint32 Milliseconds) { if (!Event) return HSAKMT_STATUS_INVALID_HANDLE; @@ -241,14 +221,10 @@ hsaKmtWaitOnEvent( return hsaKmtWaitOnMultipleEvents(&Event, 1, true, Milliseconds); } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtWaitOnMultipleEvents( - HsaEvent* Events[], //IN - HSAuint32 NumEvents, //IN - bool WaitOnAll, //IN - HSAuint32 Milliseconds //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[], + HSAuint32 NumEvents, + bool WaitOnAll, + HSAuint32 Milliseconds) { CHECK_KFD_OPEN(); @@ -256,12 +232,14 @@ hsaKmtWaitOnMultipleEvents( return HSAKMT_STATUS_INVALID_HANDLE; struct kfd_event_data *event_data = calloc(NumEvents, sizeof(struct kfd_event_data)); + for (HSAuint32 i = 0; i < NumEvents; i++) { event_data[i].event_id = Events[i]->EventId; event_data[i].kfd_event_data_ext = (uint64_t)(uintptr_t)NULL; } struct kfd_ioctl_wait_events_args args; + memset(&args, 0, sizeof(args)); args.wait_for_all = WaitOnAll; @@ -271,12 +249,10 @@ hsaKmtWaitOnMultipleEvents( HSAKMT_STATUS result; - if (kmtIoctl(kfd_fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1) { + if (kmtIoctl(kfd_fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1) result = HSAKMT_STATUS_ERROR; - } - else if (args.wait_result == KFD_IOC_WAIT_RESULT_TIMEOUT) { + else if (args.wait_result == KFD_IOC_WAIT_RESULT_TIMEOUT) result = HSAKMT_STATUS_WAIT_TIMEOUT; - } else { result = HSAKMT_STATUS_SUCCESS; for (HSAuint32 i = 0; i < NumEvents; i++) { diff --git a/projects/rocr-runtime/src/fmm.c b/projects/rocr-runtime/src/fmm.c index 2f2eb73f8b..88e6052851 100644 --- a/projects/rocr-runtime/src/fmm.c +++ b/projects/rocr-runtime/src/fmm.c @@ -37,7 +37,7 @@ #define NON_VALID_GPU_ID 0 -#define INIT_MANAGEBLE_APERTURE(base_value, limit_value) { \ +#define INIT_MANAGEABLE_APERTURE(base_value, limit_value) { \ .base = (void *) base_value, \ .limit = (void *) limit_value, \ .align = 0, \ @@ -52,25 +52,20 @@ struct vm_object { void *userptr; uint64_t userptr_size; uint64_t size; /* size allocated on GPU. When the user requests a random - * size, Thunk aligns it to page size and allocates this - * aligned size on GPU - */ + * size, Thunk aligns it to page size and allocates this + * aligned size on GPU + */ uint64_t handle; /* opaque */ uint32_t node_id; struct vm_object *next; struct vm_object *prev; uint32_t flags; /* memory allocation flags */ - /* - * Registered nodes to map on SVM mGPU - */ + /* Registered nodes to map on SVM mGPU */ uint32_t *registered_device_id_array; uint32_t registered_device_id_array_size; uint32_t *registered_node_id_array; - uint32_t registration_count; /* the same memory region can be - registered multiple times */ - /* - * Nodes that mapped already - */ + uint32_t registration_count; /* the same memory region can be registered multiple times */ + /* Nodes that mapped already */ uint32_t *mapped_device_id_array; uint32_t mapped_device_id_array_size; uint32_t *mapped_node_id_array; @@ -101,7 +96,7 @@ typedef struct { vm_area_t *vm_ranges; vm_object_t *vm_objects; pthread_mutex_t fmm_mutex; -} manageble_aperture_t; +} manageable_aperture_t; typedef struct { void *base; @@ -114,23 +109,25 @@ typedef struct { uint32_t node_id; uint64_t local_mem_size; aperture_t lds_aperture; - manageble_aperture_t scratch_aperture; - manageble_aperture_t scratch_physical; /* For dGPU, scratch physical - is allocated from dgpu_aperture. When requested by RT, each - GPU will get a differnt range */ - manageble_aperture_t gpuvm_aperture; /* used for GPUVM on APU, outside - * the canonical address range */ + manageable_aperture_t scratch_aperture; + manageable_aperture_t scratch_physical; /* For dGPU, scratch physical is allocated from + * dgpu_aperture. When requested by RT, each + * GPU will get a differnt range + */ + manageable_aperture_t gpuvm_aperture; /* used for GPUVM on APU, outsidethe canonical address range */ } gpu_mem_t; /* The main structure for dGPU Shared Virtual Memory Management */ typedef struct { /* used for non-coherent system and invisible device mem on dGPU. - * This aperture is shared by all dGPUs */ - manageble_aperture_t dgpu_aperture; + * This aperture is shared by all dGPUs + */ + manageable_aperture_t dgpu_aperture; /* used for coherent (fine-grain) system memory on dGPU, - * This aperture is shared by all dGPUs */ - manageble_aperture_t dgpu_alt_aperture; + * This aperture is shared by all dGPUs + */ + manageable_aperture_t dgpu_alt_aperture; /* whether to use userptr for paged memory */ bool userptr_for_paged_mem; @@ -140,16 +137,17 @@ typedef struct { } svm_t; /* The other apertures are specific to each GPU. gpu_mem_t manages GPU -* specific memory apertures. */ + * specific memory apertures. + */ static gpu_mem_t *gpu_mem; static unsigned int gpu_mem_count; -static void *dgpu_shared_aperture_base = NULL; -static void *dgpu_shared_aperture_limit = NULL; +static void *dgpu_shared_aperture_base; +static void *dgpu_shared_aperture_limit; static svm_t svm = { - INIT_MANAGEBLE_APERTURE(0, 0), - INIT_MANAGEBLE_APERTURE(0, 0), + INIT_MANAGEABLE_APERTURE(0, 0), + INIT_MANAGEABLE_APERTURE(0, 0), true, false }; @@ -158,11 +156,11 @@ static svm_t svm = { * via GPU driver, they are not managed by GPUVM. cpuvm_aperture keeps track * of this part of memory. */ -static manageble_aperture_t cpuvm_aperture = INIT_MANAGEBLE_APERTURE(0, 0); +static manageable_aperture_t cpuvm_aperture = INIT_MANAGEABLE_APERTURE(0, 0); /* GPU node array for default mappings */ -static uint32_t all_gpu_id_array_size = 0; -static uint32_t *all_gpu_id_array = NULL; +static uint32_t all_gpu_id_array_size; +static uint32_t *all_gpu_id_array; /* IPC structures and helper functions */ typedef enum _HSA_APERTURE { @@ -185,34 +183,35 @@ typedef struct _HsaSharedMemoryStruct { HSAuint32 ExportGpuId; } HsaSharedMemoryStruct; -static inline const HsaSharedMemoryStruct * to_const_hsa_shared_memory_struct( +static inline const HsaSharedMemoryStruct *to_const_hsa_shared_memory_struct( const HsaSharedMemoryHandle *SharedMemoryHandle) { return (const HsaSharedMemoryStruct *)SharedMemoryHandle; } -static inline HsaSharedMemoryStruct * to_hsa_shared_memory_struct( +static inline HsaSharedMemoryStruct *to_hsa_shared_memory_struct( HsaSharedMemoryHandle *SharedMemoryHandle) { return (HsaSharedMemoryStruct *)SharedMemoryHandle; } -static inline HsaSharedMemoryHandle * to_hsa_shared_memory_handle( +static inline HsaSharedMemoryHandle *to_hsa_shared_memory_handle( HsaSharedMemoryStruct *SharedMemoryStruct) { return (HsaSharedMemoryHandle *)SharedMemoryStruct; } -extern int debug_get_reg_status(uint32_t node_id, bool* is_debugged); +extern int debug_get_reg_status(uint32_t node_id, bool *is_debugged); static HSAKMT_STATUS dgpu_mem_init(uint32_t node_id, void **base, void **limit); static int set_dgpu_aperture(uint32_t gpu_id, uint64_t base, uint64_t limit); -static void __fmm_release(void *address, manageble_aperture_t *aperture); +static void __fmm_release(void *address, manageable_aperture_t *aperture); static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id, - manageble_aperture_t *aperture, + manageable_aperture_t *aperture, void *address); static void print_device_id_array(uint32_t *device_id_array, uint32_t device_id_array_size); -static int32_t find_first_dgpu(HSAuint32 *gpu_id) { +static int32_t find_first_dgpu(HSAuint32 *gpu_id) +{ int32_t i; *gpu_id = NON_VALID_GPU_ID; @@ -270,7 +269,7 @@ static vm_object_t *vm_create_and_init_object(void *start, uint64_t size, } -static void vm_remove_area(manageble_aperture_t *app, vm_area_t *area) +static void vm_remove_area(manageable_aperture_t *app, vm_area_t *area) { vm_area_t *next; vm_area_t *prev; @@ -278,7 +277,7 @@ static void vm_remove_area(manageble_aperture_t *app, vm_area_t *area) next = area->next; prev = area->prev; - if (prev == NULL) /* The first element */ + if (!prev) /* The first element */ app->vm_ranges = next; else prev->next = next; @@ -289,7 +288,7 @@ static void vm_remove_area(manageble_aperture_t *app, vm_area_t *area) free(area); } -static void vm_remove_object(manageble_aperture_t *app, vm_object_t *object) +static void vm_remove_object(manageable_aperture_t *app, vm_object_t *object) { vm_object_t *next; vm_object_t *prev; @@ -305,11 +304,10 @@ static void vm_remove_object(manageble_aperture_t *app, vm_object_t *object) free(object->registered_device_id_array); object->registered_device_id_array_size = 0; } - if (object->mapped_device_id_array != NULL && + if (object->mapped_device_id_array && object->mapped_device_id_array_size > 0 && object->mapped_device_id_array != all_gpu_id_array && - object->mapped_device_id_array != object->registered_device_id_array) - { + object->mapped_device_id_array != object->registered_device_id_array) { free(object->mapped_device_id_array); object->mapped_device_id_array_size = 0; } @@ -326,7 +324,7 @@ static void vm_remove_object(manageble_aperture_t *app, vm_object_t *object) next = object->next; prev = object->prev; - if (prev == NULL) /* The first element */ + if (!prev) /* The first element */ app->vm_objects = next; else prev->next = next; @@ -362,7 +360,7 @@ static void vm_add_object_before(vm_object_t *before_this, prev->next = new_object; } -static void vm_split_area(manageble_aperture_t *app, vm_area_t *area, +static void vm_split_area(manageable_aperture_t *app, vm_area_t *area, void *address, uint64_t MemorySizeInBytes) { /* @@ -379,7 +377,7 @@ static void vm_split_area(manageble_aperture_t *app, vm_area_t *area, vm_add_area_after(area, new_area); } -static vm_object_t *vm_find_object_by_address(manageble_aperture_t *app, +static vm_object_t *vm_find_object_by_address(manageable_aperture_t *app, const void *address, uint64_t size) { vm_object_t *cur = app->vm_objects; @@ -396,8 +394,8 @@ static vm_object_t *vm_find_object_by_address(manageble_aperture_t *app, return cur; /* NULL if not found */ } -static vm_object_t *vm_find_object_by_address_range(manageble_aperture_t *app, - const void *address) +static vm_object_t *vm_find_object_by_address_range(manageable_aperture_t *app, + const void *address) { vm_object_t *cur = app->vm_objects; @@ -411,14 +409,15 @@ static vm_object_t *vm_find_object_by_address_range(manageble_aperture_t *app, return cur; /* NULL if not found */ } -static vm_object_t *vm_find_object_by_userptr(manageble_aperture_t *app, +static vm_object_t *vm_find_object_by_userptr(manageable_aperture_t *app, const void *address, HSAuint64 size) { vm_object_t *cur = app->vm_objects, *obj; uint32_t found = 0; /* Look up the userptr that matches the address. If size is specified, - the size needs to match too. */ + * the size needs to match too. + */ while (cur) { if ((cur->userptr == address) && ((cur->userptr_size == size) || !size)) { @@ -429,7 +428,8 @@ static vm_object_t *vm_find_object_by_userptr(manageble_aperture_t *app, } /* If size is not specified, we need to ensure the vm_obj found is the - only obj having this address. */ + * only obj having this address. + */ if (found && !size) { obj = cur->next; while (obj) { @@ -444,7 +444,7 @@ static vm_object_t *vm_find_object_by_userptr(manageble_aperture_t *app, return cur; /* NULL if any look-up failure */ } -static vm_object_t *vm_find_object_by_userptr_range(manageble_aperture_t *app, +static vm_object_t *vm_find_object_by_userptr_range(manageable_aperture_t *app, const void *address) { vm_object_t *cur = app->vm_objects; @@ -460,7 +460,7 @@ static vm_object_t *vm_find_object_by_userptr_range(manageble_aperture_t *app, return cur; /* NULL if not found */ } -static vm_area_t *vm_find(manageble_aperture_t *app, void *address) +static vm_area_t *vm_find(manageable_aperture_t *app, void *address) { vm_area_t *cur = app->vm_ranges; @@ -486,7 +486,7 @@ static bool aperture_is_valid(void *app_base, void *app_limit) * Leave at least one guard page after every object to catch * out-of-bounds accesses with VM faults. */ -static uint64_t vm_align_area_size(manageble_aperture_t *app, uint64_t size) +static uint64_t vm_align_area_size(manageable_aperture_t *app, uint64_t size) { return ALIGN_UP(size + (uint64_t)app->guard_pages * PAGE_SIZE, app->align); @@ -495,7 +495,7 @@ static uint64_t vm_align_area_size(manageble_aperture_t *app, uint64_t size) /* * Assumes that fmm_mutex is locked on entry. */ -static void aperture_release_area(manageble_aperture_t *app, void *address, +static void aperture_release_area(manageable_aperture_t *app, void *address, uint64_t MemorySizeInBytes) { vm_area_t *area; @@ -531,7 +531,7 @@ static void aperture_release_area(manageble_aperture_t *app, void *address, * returns allocated address or NULL. Assumes, that fmm_mutex is locked * on entry. */ -static void *aperture_allocate_area_aligned(manageble_aperture_t *app, +static void *aperture_allocate_area_aligned(manageable_aperture_t *app, uint64_t MemorySizeInBytes, uint64_t offset, uint64_t align) @@ -584,7 +584,7 @@ static void *aperture_allocate_area_aligned(manageble_aperture_t *app, return start; } -static void *aperture_allocate_area(manageble_aperture_t *app, +static void *aperture_allocate_area(manageable_aperture_t *app, uint64_t MemorySizeInBytes, uint64_t offset) { @@ -592,7 +592,7 @@ static void *aperture_allocate_area(manageble_aperture_t *app, } /* returns 0 on success. Assumes, that fmm_mutex is locked on entry */ -static vm_object_t *aperture_allocate_object(manageble_aperture_t *app, +static vm_object_t *aperture_allocate_object(manageable_aperture_t *app, void *new_address, uint64_t handle, uint64_t MemorySizeInBytes, @@ -610,7 +610,7 @@ static vm_object_t *aperture_allocate_object(manageble_aperture_t *app, return NULL; /* check for non-empty list */ - if (app->vm_objects != NULL) + if (app->vm_objects) /* Add it before the first element */ vm_add_object_before(app->vm_objects, new_object); @@ -630,25 +630,25 @@ static int32_t gpu_mem_find_by_gpu_id(uint32_t gpu_id) return -1; } -static manageble_aperture_t *fmm_get_aperture(HsaApertureInfo info) +static manageable_aperture_t *fmm_get_aperture(HsaApertureInfo info) { switch (info.type) { - case HSA_APERTURE_DGPU: - return &svm.dgpu_aperture; - case HSA_APERTURE_DGPU_ALT: - return &svm.dgpu_alt_aperture; - case HSA_APERTURE_GPUVM: - return &gpu_mem[info.idx].gpuvm_aperture; - case HSA_APERTURE_CPUVM: - return &cpuvm_aperture; - default: - return NULL; + case HSA_APERTURE_DGPU: + return &svm.dgpu_aperture; + case HSA_APERTURE_DGPU_ALT: + return &svm.dgpu_alt_aperture; + case HSA_APERTURE_GPUVM: + return &gpu_mem[info.idx].gpuvm_aperture; + case HSA_APERTURE_CPUVM: + return &cpuvm_aperture; + default: + return NULL; } } -static manageble_aperture_t *fmm_find_aperture(const void *address, +static manageable_aperture_t *fmm_find_aperture(const void *address, HsaApertureInfo *info) { - manageble_aperture_t *aperture = NULL; + manageable_aperture_t *aperture = NULL; uint32_t i; HsaApertureInfo _info = { .type = HSA_APERTURE_UNSUPPORTED, .idx = 0}; @@ -657,19 +657,16 @@ static manageble_aperture_t *fmm_find_aperture(const void *address, address <= svm.dgpu_aperture.limit) { aperture = &svm.dgpu_aperture; _info.type = HSA_APERTURE_DGPU; - } - else if (address >= svm.dgpu_alt_aperture.base && + } else if (address >= svm.dgpu_alt_aperture.base && address <= svm.dgpu_alt_aperture.limit) { aperture = &svm.dgpu_alt_aperture; _info.type = HSA_APERTURE_DGPU_ALT; - } - else { + } else { /* Not in SVM, it can be system memory registered by userptr */ aperture = &svm.dgpu_aperture; _info.type = HSA_APERTURE_DGPU; } - } - else { /* APU */ + } else { /* APU */ for (i = 0; i < gpu_mem_count; i++) { if ((address >= gpu_mem[i].gpuvm_aperture.base) && (address <= gpu_mem[i].gpuvm_aperture.limit)) { @@ -696,7 +693,7 @@ static manageble_aperture_t *fmm_find_aperture(const void *address, */ static vm_object_t *fmm_allocate_memory_in_device(uint32_t gpu_id, void *mem, uint64_t MemorySizeInBytes, - manageble_aperture_t *aperture, + manageable_aperture_t *aperture, uint64_t *mmap_offset, uint32_t flags) { @@ -727,8 +724,9 @@ static vm_object_t *fmm_allocate_memory_in_device(uint32_t gpu_id, void *mem, /* Allocate object */ pthread_mutex_lock(&aperture->fmm_mutex); - if (!(vm_obj = aperture_allocate_object(aperture, mem, args.handle, - MemorySizeInBytes, flags))) + vm_obj = aperture_allocate_object(aperture, mem, args.handle, + MemorySizeInBytes, flags); + if (!vm_obj) goto err_object_allocation_failed; pthread_mutex_unlock(&aperture->fmm_mutex); @@ -773,7 +771,7 @@ static void aperture_print(aperture_t *app) printf("\t Limit: %p\n", app->limit); } -static void manageble_aperture_print(manageble_aperture_t *app) +static void manageable_aperture_print(manageable_aperture_t *app) { vm_area_t *cur = app->vm_ranges; vm_object_t *object = app->vm_objects; @@ -801,17 +799,17 @@ void fmm_print(uint32_t gpu_id) printf("LDS aperture:\n"); aperture_print(&gpu_mem[gpu_mem_id].lds_aperture); printf("GPUVM aperture:\n"); - manageble_aperture_print(&gpu_mem[gpu_mem_id].gpuvm_aperture); + manageable_aperture_print(&gpu_mem[gpu_mem_id].gpuvm_aperture); printf("Scratch aperture:\n"); - manageble_aperture_print(&gpu_mem[gpu_mem_id].scratch_aperture); + manageable_aperture_print(&gpu_mem[gpu_mem_id].scratch_aperture); printf("Scratch backing memory:\n"); - manageble_aperture_print(&gpu_mem[gpu_mem_id].scratch_physical); + manageable_aperture_print(&gpu_mem[gpu_mem_id].scratch_physical); } printf("dGPU aperture:\n"); - manageble_aperture_print(&svm.dgpu_aperture); + manageable_aperture_print(&svm.dgpu_aperture); printf("dGPU alt aperture:\n"); - manageble_aperture_print(&svm.dgpu_alt_aperture); + manageable_aperture_print(&svm.dgpu_alt_aperture); } #else @@ -825,7 +823,7 @@ static void fmm_release_scratch(uint32_t gpu_id) int32_t gpu_mem_id; uint64_t size; vm_object_t *obj; - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id); if (gpu_mem_id < 0) @@ -840,6 +838,7 @@ static void fmm_release_scratch(uint32_t gpu_id) pthread_mutex_lock(&aperture->fmm_mutex); while ((obj = aperture->vm_objects)) { void *obj_addr = obj->start; + pthread_mutex_unlock(&aperture->fmm_mutex); _fmm_unmap_from_gpu_scratch(gpu_id, aperture, obj_addr); @@ -866,7 +865,7 @@ static void fmm_release_scratch(uint32_t gpu_id) #define SCRATCH_ALIGN 0x10000 void *fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes) { - manageble_aperture_t *aperture_phy; + manageable_aperture_t *aperture_phy; struct kfd_ioctl_alloc_memory_of_scratch_args args; int32_t gpu_mem_id; void *mem = NULL; @@ -878,7 +877,7 @@ void *fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes) return NULL; aperture_phy = &gpu_mem[gpu_mem_id].scratch_physical; - if (aperture_phy->base != NULL || aperture_phy->limit != NULL) + if (aperture_phy->base || aperture_phy->limit) /* Scratch was already allocated for this GPU */ return NULL; @@ -893,10 +892,11 @@ void *fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes) uint64_t aligned_padded_size = aligned_size + SCRATCH_ALIGN - PAGE_SIZE; void *padded_end, *aligned_start, *aligned_end; + mem = mmap(0, aligned_padded_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (mem == NULL) + if (!mem) return NULL; /* align start and unmap padding */ padded_end = VOID_PTR_ADD(mem, aligned_padded_size); @@ -927,8 +927,8 @@ void *fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes) return mem; } -static void* __fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, - manageble_aperture_t *aperture, uint64_t offset, uint64_t *mmap_offset, +static void *__fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, + manageable_aperture_t *aperture, uint64_t offset, uint64_t *mmap_offset, uint32_t flags, vm_object_t **vm_obj) { void *mem = NULL; @@ -950,7 +950,7 @@ static void* __fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, */ obj = fmm_allocate_memory_in_device(gpu_id, mem, MemorySizeInBytes, aperture, mmap_offset, flags); - if (obj == NULL) { + if (!obj) { /* * allocation of memory in device failed. * Release region in aperture @@ -975,7 +975,7 @@ static void* __fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, #define GPUVM_APP_OFFSET 0x10000 void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFlags flags) { - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; int32_t gpu_mem_id; uint32_t ioc_flags = KFD_IOC_ALLOC_MEM_FLAGS_VRAM, offset; uint64_t size, mmap_offset; @@ -1019,7 +1019,7 @@ void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFla if (mem && flags.ui32.HostAccess) { void *ret = mmap(mem, MemorySizeInBytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, kfd_fd , mmap_offset); + MAP_SHARED | MAP_FIXED, kfd_fd, mmap_offset); if (ret == MAP_FAILED) { __fmm_release(mem, aperture); return NULL; @@ -1032,7 +1032,7 @@ void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFla void *fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, uint64_t doorbell_offset) { - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; int32_t gpu_mem_id; uint32_t ioc_flags; void *mem; @@ -1081,7 +1081,7 @@ void *fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, return mem; } -static void* fmm_allocate_host_cpu(uint64_t MemorySizeInBytes, +static void *fmm_allocate_host_cpu(uint64_t MemorySizeInBytes, HsaMemFlags flags) { int err; @@ -1114,11 +1114,11 @@ static void* fmm_allocate_host_cpu(uint64_t MemorySizeInBytes, return mem; } -static void* fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes, +static void *fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes, HsaMemFlags flags) { void *mem; - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; uint64_t mmap_offset; uint32_t ioc_flags; uint64_t size; @@ -1144,13 +1144,14 @@ static void* fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes, } /* Paged memory is allocated as a userptr mapping, non-paged - * memory is allocated from KFD */ + * memory is allocated from KFD + */ if (!flags.ui32.NonPaged && svm.userptr_for_paged_mem) { /* Allocate address space */ pthread_mutex_lock(&aperture->fmm_mutex); mem = aperture_allocate_area(aperture, size, 0); pthread_mutex_unlock(&aperture->fmm_mutex); - if (mem == NULL) + if (!mem) return NULL; /* Map anonymous pages */ @@ -1166,7 +1167,8 @@ static void* fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes, /* Mappings in the DGPU aperture don't need to be copied on * fork. This avoids MMU notifiers and evictions due to user - * memory mappings on fork. */ + * memory mappings on fork. + */ madvise(mem, MemorySizeInBytes, MADV_DONTFORK); /* Create userptr BO */ @@ -1181,7 +1183,8 @@ static void* fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes, aperture_release_area(aperture, mem, size); pthread_mutex_unlock(&aperture->fmm_mutex); /* Remove any CPU mapping, but keep the - * address range reserved */ + * address range reserved + */ mmap(mem, MemorySizeInBytes, PROT_NONE, MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED, -1, 0); @@ -1196,7 +1199,7 @@ static void* fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes, if (mem && flags.ui32.HostAccess) { void *ret = mmap(mem, MemorySizeInBytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, kfd_fd , mmap_offset); + MAP_SHARED | MAP_FIXED, kfd_fd, mmap_offset); if (ret == MAP_FAILED) { __fmm_release(mem, aperture); return NULL; @@ -1204,10 +1207,11 @@ static void* fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes, if (flags.ui32.AQLQueueMemory) { uint64_t my_buf_size = ALIGN_UP(size, aperture->align) / 2; + memset(ret, 0, MemorySizeInBytes); mmap(VOID_PTR_ADD(mem, my_buf_size), MemorySizeInBytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, kfd_fd , mmap_offset); + MAP_SHARED | MAP_FIXED, kfd_fd, mmap_offset); } } } @@ -1223,7 +1227,7 @@ static void* fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes, return mem; } -void* fmm_allocate_host(uint32_t node_id, uint64_t MemorySizeInBytes, +void *fmm_allocate_host(uint32_t node_id, uint64_t MemorySizeInBytes, HsaMemFlags flags) { if (is_dgpu) @@ -1290,7 +1294,7 @@ out: return NULL; } -static void __fmm_release(void *address, manageble_aperture_t *aperture) +static void __fmm_release(void *address, manageable_aperture_t *aperture) { struct kfd_ioctl_free_memory_of_gpu_args args; vm_object_t *object; @@ -1310,7 +1314,8 @@ static void __fmm_release(void *address, manageble_aperture_t *aperture) /* If memory is user memory and it's still GPU mapped, munmap * would cause an eviction. If the restore happens quickly * enough, restore would also fail with an error message. So - * free the BO before unmapping the pages. */ + * free the BO before unmapping the pages. + */ args.handle = object->handle; kmtIoctl(kfd_fd, AMDKFD_IOC_FREE_MEMORY_OF_GPU, &args); @@ -1356,8 +1361,7 @@ void fmm_release(void *address) found = true; __fmm_release(address, &svm.dgpu_aperture); fmm_print(gpu_mem[i].gpu_id); - } - else if (address >= svm.dgpu_alt_aperture.base && + } else if (address >= svm.dgpu_alt_aperture.base && address <= svm.dgpu_alt_aperture.limit) { found = true; __fmm_release(address, &svm.dgpu_alt_aperture); @@ -1411,13 +1415,13 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) { struct kfd_ioctl_get_process_apertures_new_args args; uint32_t i = 0; - int32_t gpu_mem_id =0; + int32_t gpu_mem_id = 0; uint32_t gpu_id; HsaNodeProperties props; - struct kfd_process_device_apertures * process_apertures; + struct kfd_process_device_apertures *process_apertures; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; char *disableCache, *pagedUserptr, *checkUserptr, *guardPagesStr; - unsigned guardPages = 1; + unsigned int guardPages = 1; struct pci_access *pacc; /* If HSA_DISABLE_CACHE is set to a non-0 value, disable caching */ @@ -1426,7 +1430,8 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) disableCache = NULL; /* If HSA_USERPTR_FOR_PAGED_MEM is set to a non-0 value, - * enable userptr for all paged memory allocations */ + * enable userptr for all paged memory allocations + */ pagedUserptr = getenv("HSA_USERPTR_FOR_PAGED_MEM"); svm.userptr_for_paged_mem = (pagedUserptr && strcmp(pagedUserptr, "0")); @@ -1442,15 +1447,17 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) guardPages = 1; /* Trade off - NumNodes includes GPU nodes + CPU Node. So in - * systems with CPU node, slightly more memory is allocated than - * necessary*/ + * systems with CPU node, slightly more memory is allocated than + * necessary + */ gpu_mem = (gpu_mem_t *)calloc(NumNodes, sizeof(gpu_mem_t)); - if (gpu_mem == NULL) + if (!gpu_mem) return HSAKMT_STATUS_NO_MEMORY; - /* Initialize gpu_mem[] from sysfs topology. Rest of the members are set to - * 0 by calloc. This is necessary because this function - * gets called before hsaKmtAcquireSystemProperties() is called.*/ + /* Initialize gpu_mem[] from sysfs topology. Rest of the members are + * set to 0 by calloc. This is necessary because this function + * gets called before hsaKmtAcquireSystemProperties() is called. + */ gpu_mem_count = 0; pacc = pci_alloc(); pci_init(pacc); @@ -1480,11 +1487,13 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) } pci_cleanup(pacc); - /* The ioctl will also return Number of Nodes if args.kfd_process_device_apertures_ptr - * is set to NULL. This is not required since Number of nodes is already known. Kernel - * will fill in the apertures in kfd_process_device_apertures_ptr */ + /* The ioctl will also return Number of Nodes if + * args.kfd_process_device_apertures_ptr is set to NULL. This is not + * required since Number of nodes is already known. Kernel will fill in + * the apertures in kfd_process_device_apertures_ptr + */ process_apertures = malloc(gpu_mem_count * sizeof(struct kfd_process_device_apertures)); - if (process_apertures == NULL) { + if (!process_apertures) { ret = HSAKMT_STATUS_NO_MEMORY; goto sysfs_parse_failed; } @@ -1501,15 +1510,16 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) all_gpu_id_array = NULL; if (args.num_of_nodes > 0) { all_gpu_id_array = malloc(sizeof(uint32_t) * args.num_of_nodes); - if (all_gpu_id_array == NULL) { + if (!all_gpu_id_array) { ret = HSAKMT_STATUS_NO_MEMORY; goto get_aperture_ioctl_failed; } } for (i = 0 ; i < args.num_of_nodes ; i++) { - /* Map Kernel process device data node i <--> gpu_mem_id which indexes into gpu_mem[] - * based on gpu_id */ + /* Map Kernel process device data node i <--> gpu_mem_id which + * indexes into gpu_mem[] based on gpu_id + */ gpu_mem_id = gpu_mem_find_by_gpu_id(process_apertures[i].gpu_id); if (gpu_mem_id < 0) { ret = HSAKMT_STATUS_ERROR; @@ -1558,13 +1568,15 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) svm.dgpu_aperture.guard_pages = guardPages; /* Non-canonical per-ASIC GPUVM aperture does - * not exist on dGPUs in GPUVM64 address mode */ + * not exist on dGPUs in GPUVM64 address mode + */ gpu_mem[gpu_mem_id].gpuvm_aperture.base = NULL; gpu_mem[gpu_mem_id].gpuvm_aperture.limit = NULL; /* Use the first 1/4 of the dGPU aperture as - * alternate aperture for coherent access. - * Base and size must be 64KB aligned. */ + * alternate aperture for coherent access. + * Base and size must be 64KB aligned. + */ alt_base = (uintptr_t)svm.dgpu_aperture.base; alt_size = (VOID_PTRS_SUB(svm.dgpu_aperture.limit, svm.dgpu_aperture.base) + 1) >> 2; @@ -1580,7 +1592,8 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) KFD_IOC_CACHE_POLICY_COHERENT, alt_base, alt_size); if (err != 0) { - fprintf(stderr, "Error! Failed to set alt aperture for GPU [0x%x]\n", gpu_mem[gpu_mem_id].gpu_id); + fprintf(stderr, "Error! Failed to set alt aperture for GPU [0x%x]\n", + gpu_mem[gpu_mem_id].gpu_id); ret = HSAKMT_STATUS_ERROR; } svm.dgpu_alt_aperture.align = vm_alignment; @@ -1595,7 +1608,7 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) return ret; get_aperture_ioctl_failed: -invalid_gpu_id : +invalid_gpu_id: free(process_apertures); sysfs_parse_failed: fmm_destroy_process_apertures(); @@ -1647,7 +1660,8 @@ HSAKMT_STATUS fmm_get_aperture_base_and_limit(aperture_type_e aperture_type, HSA case FMM_SVM: /* Report single SVM aperture, starting at base of - * fine-grained, ending at limit of coarse-grained */ + * fine-grained, ending at limit of coarse-grained + */ if (aperture_is_valid(svm.dgpu_alt_aperture.base, svm.dgpu_aperture.limit)) { *aperture_base = PORT_VPTR_TO_UINT64(svm.dgpu_alt_aperture.base); @@ -1662,7 +1676,7 @@ HSAKMT_STATUS fmm_get_aperture_base_and_limit(aperture_type_e aperture_type, HSA return err; } -static int _fmm_map_to_gpu_gtt(manageble_aperture_t *aperture, +static int _fmm_map_to_gpu_gtt(manageable_aperture_t *aperture, void *address, uint64_t size, vm_object_t *obj) { struct kfd_ioctl_map_memory_to_gpu_args args; @@ -1709,7 +1723,7 @@ static int _fmm_map_to_gpu_gtt(manageble_aperture_t *aperture, print_device_id_array((void *)args.device_ids_array_ptr, args.device_ids_array_size); - if (object->mapped_device_id_array != NULL && + if (object->mapped_device_id_array && object->mapped_device_id_array_size > 0 && object->mapped_device_id_array != all_gpu_id_array && object->mapped_device_id_array != object->registered_device_id_array) @@ -1735,7 +1749,7 @@ err_object_not_found: return -1; } -static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageble_aperture_t *aperture, +static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageable_aperture_t *aperture, void *address, uint64_t size) { int32_t gpu_mem_id; @@ -1750,12 +1764,12 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageble_aperture_t *apertu if (gpu_mem_id < 0) return -1; - if (!topology_is_dgpu(gpu_mem[gpu_mem_id].device_id)) + if (!topology_is_dgpu(gpu_mem[gpu_mem_id].device_id)) return 0; /* Nothing to do on APU */ /* sanity check the address */ if (address < aperture->base || - VOID_PTR_ADD(address, size -1) > aperture->limit) + VOID_PTR_ADD(address, size - 1) > aperture->limit) return -1; ret = debug_get_reg_status(gpu_mem[gpu_mem_id].node_id, &is_debugger); @@ -1764,7 +1778,7 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageble_aperture_t *apertu vm_object_t *obj = fmm_allocate_memory_in_device( gpu_id, address, size, aperture, NULL, KFD_IOC_ALLOC_MEM_FLAGS_VRAM); - if (obj == NULL) + if (!obj) return -1; } else { fmm_allocate_memory_in_device(gpu_id, @@ -1792,7 +1806,7 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageble_aperture_t *apertu return ret; } -static int _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t *aperture, +static int _fmm_map_to_gpu(uint32_t gpu_id, manageable_aperture_t *aperture, void *address, uint64_t size, uint64_t *gpuvm_address) { @@ -1801,7 +1815,7 @@ static int _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t *aperture, void *temp_mapped_id_array = NULL; /* Check that address space was previously reserved */ - if (vm_find(aperture, address) == NULL) + if (!vm_find(aperture, address)) return -1; pthread_mutex_lock(&aperture->fmm_mutex); @@ -1829,7 +1843,7 @@ static int _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t *aperture, if (kmtIoctl(kfd_fd, AMDKFD_IOC_MAP_MEMORY_TO_GPU, &args)) goto err_map_ioctl_failed; - if (object->mapped_device_id_array != NULL && + if (object->mapped_device_id_array && object->mapped_device_id_array_size > 0 && object->mapped_device_id_array != all_gpu_id_array && object->mapped_device_id_array != object->registered_device_id_array) @@ -1860,7 +1874,7 @@ err_object_not_found: static int _fmm_map_to_gpu_userptr(void *addr, uint64_t size, uint64_t *gpuvm_addr, vm_object_t *object) { - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; vm_object_t *obj; void *svm_addr; HSAuint64 svm_size; @@ -1876,7 +1890,7 @@ static int _fmm_map_to_gpu_userptr(void *addr, uint64_t size, obj = object; if (!obj) { obj = vm_find_object_by_userptr(aperture, addr, size); - if (obj == NULL) { + if (!obj) { pthread_mutex_unlock(&aperture->fmm_mutex); return HSAKMT_STATUS_ERROR; } @@ -1885,7 +1899,8 @@ static int _fmm_map_to_gpu_userptr(void *addr, uint64_t size, svm_size = obj->size; /* Map and return the GPUVM address adjusted by the offset - * from the start of the page */ + * from the start of the page + */ ret = _fmm_map_to_gpu_gtt(aperture, svm_addr, svm_size, obj); if (ret == 0 && gpuvm_addr) *gpuvm_addr = (uint64_t)svm_addr + page_offset; @@ -1962,7 +1977,7 @@ static void print_device_id_array(uint32_t *device_id_array, uint32_t device_id_ #endif } -static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address, +static int _fmm_unmap_from_gpu(manageable_aperture_t *aperture, void *address, uint32_t *device_ids_array, uint32_t device_ids_array_size, vm_object_t *obj) { @@ -2015,7 +2030,7 @@ static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address, goto out; /* Clearing all mapped nodes list */ - if (object->mapped_device_id_array != NULL && + if (object->mapped_device_id_array && object->mapped_device_id_array_size > 0 && object->mapped_device_id_array != all_gpu_id_array && object->mapped_device_id_array != object->registered_device_id_array) @@ -2035,7 +2050,7 @@ out: } static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id, - manageble_aperture_t *aperture, + manageable_aperture_t *aperture, void *address) { int32_t gpu_mem_id; @@ -2047,7 +2062,7 @@ static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id, if (gpu_mem_id < 0) return -1; - if (!topology_is_dgpu(gpu_mem[gpu_mem_id].device_id)) + if (!topology_is_dgpu(gpu_mem[gpu_mem_id].device_id)) return 0; /* Nothing to do on APU */ pthread_mutex_lock(&aperture->fmm_mutex); @@ -2057,7 +2072,7 @@ static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id, if (!object) goto err; - if (object->mapped_device_id_array == NULL || + if (!object->mapped_device_id_array || object->mapped_device_id_array_size == 0) { pthread_mutex_unlock(&aperture->fmm_mutex); return 0; @@ -2071,7 +2086,7 @@ static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id, kmtIoctl(kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, &args); /* Clearing all mapped nodes list */ - if (object->mapped_device_id_array != NULL && + if (object->mapped_device_id_array && object->mapped_device_id_array_size > 0 && object->mapped_device_id_array != all_gpu_id_array && object->mapped_device_id_array != object->registered_device_id_array) @@ -2097,7 +2112,7 @@ err: static int _fmm_unmap_from_gpu_userptr(void *addr) { - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; vm_object_t *obj; void *svm_addr; @@ -2106,7 +2121,7 @@ static int _fmm_unmap_from_gpu_userptr(void *addr) /* Find the start address in SVM space for GPU unmapping */ pthread_mutex_lock(&aperture->fmm_mutex); obj = vm_find_object_by_userptr(aperture, addr, 0); - if (obj == NULL) { + if (!obj) { pthread_mutex_unlock(&aperture->fmm_mutex); return HSAKMT_STATUS_ERROR; } @@ -2175,7 +2190,7 @@ static int set_dgpu_aperture(uint32_t gpu_id, uint64_t base, uint64_t limit) return kmtIoctl(kfd_fd, AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE, &args); } -static void *reserve_address(void *addr, long long unsigned int len) +static void *reserve_address(void *addr, unsigned long long int len) { void *ret_addr; @@ -2251,24 +2266,25 @@ static HSAKMT_STATUS dgpu_mem_init(uint32_t gpu_mem_id, void **base, void **limi ret_addr = reserve_address(addr, len); if (!ret_addr) break; - if ((HSAuint64)ret_addr + (len>>1) < max_vm_limit) + if ((HSAuint64)ret_addr + (len >> 1) < max_vm_limit) /* At least half the returned address * space is GPU addressable, we'll - * take it */ + * take it + */ break; - munmap (ret_addr, len); + munmap(ret_addr, len); } if (!ret_addr) { fprintf(stderr, "Failed to reserve %uGB for SVM ...\n", - (unsigned)(len >> 30)); + (unsigned int)(len >> 30)); continue; } if ((HSAuint64)ret_addr + min_vm_size - 1 > max_vm_limit) { /* addressable size is less than the minimum */ fprintf(stderr, "Got %uGB for SVM at %p with only %dGB usable ...\n", - (unsigned)(len >> 30), ret_addr, + (unsigned int)(len >> 30), ret_addr, (int)(((HSAint64)max_vm_limit - (HSAint64)ret_addr) >> 30)); munmap(ret_addr, len); @@ -2306,7 +2322,7 @@ static HSAKMT_STATUS dgpu_mem_init(uint32_t gpu_mem_id, void **base, void **limi bool fmm_get_handle(void *address, uint64_t *handle) { uint32_t i; - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; vm_object_t *object; bool found; @@ -2329,8 +2345,7 @@ bool fmm_get_handle(void *address, uint64_t *handle) if ((address >= svm.dgpu_aperture.base) && (address <= svm.dgpu_aperture.limit)) { aperture = &svm.dgpu_aperture; - } - else if ((address >= svm.dgpu_alt_aperture.base) && + } else if ((address >= svm.dgpu_alt_aperture.base) && (address <= svm.dgpu_alt_aperture.limit)) { aperture = &svm.dgpu_alt_aperture; } @@ -2372,7 +2387,7 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, vm_obj { int32_t i; HSAuint32 gpu_id; - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; void *svm_addr = NULL; vm_object_t *obj; HSAuint32 page_offset = (HSAuint64)addr & (PAGE_SIZE-1); @@ -2388,7 +2403,7 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, vm_obj /* Check if this address was already registered */ pthread_mutex_lock(&aperture->fmm_mutex); obj = vm_find_object_by_userptr(aperture, addr, size); - if (obj != NULL) { + if (obj) { ++obj->registration_count; pthread_mutex_unlock(&aperture->fmm_mutex); *obj_ret = obj; @@ -2403,7 +2418,7 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, vm_obj /* Allocate BO, userptr address is passed in mmap_offset */ svm_addr = __fmm_allocate_device(gpu_id, aligned_size, aperture, 0, &aligned_addr, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, &obj); - if (svm_addr == NULL) + if (!svm_addr) return HSAKMT_STATUS_ERROR; if (obj) { @@ -2413,8 +2428,7 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, vm_obj obj->userptr_size = size; obj->registration_count = 1; pthread_mutex_unlock(&aperture->fmm_mutex); - } - else + } else return HSAKMT_STATUS_ERROR; if (obj_ret) @@ -2426,11 +2440,11 @@ HSAKMT_STATUS fmm_register_memory(void *address, uint64_t size_in_bytes, uint32_t *gpu_id_array, uint32_t gpu_id_array_size) { - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; vm_object_t *object = NULL; HSAKMT_STATUS ret; - if (gpu_id_array_size > 0 && gpu_id_array == NULL) + if (gpu_id_array_size > 0 && !gpu_id_array) return HSAKMT_STATUS_INVALID_PARAMETER; if ((address >= svm.dgpu_aperture.base) && @@ -2469,8 +2483,7 @@ HSAKMT_STATUS fmm_register_memory(void *address, uint64_t size_in_bytes, fprintf(stderr, "Error. Changing nodes in a registered addr.\n"); return HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED; - } - else + } else return HSAKMT_STATUS_SUCCESS; } @@ -2491,7 +2504,7 @@ HSAKMT_STATUS fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle, struct kfd_ioctl_get_dmabuf_info_args infoArgs; struct kfd_ioctl_import_dmabuf_args importArgs; struct kfd_ioctl_free_memory_of_gpu_args freeArgs; - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; vm_object_t *obj; void *metadata; void *mem, *aperture_base; @@ -2500,7 +2513,7 @@ HSAKMT_STATUS fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle, int r; HSAKMT_STATUS status = HSAKMT_STATUS_ERROR; - if (gpu_id_array_size > 0 && gpu_id_array == NULL) + if (gpu_id_array_size > 0 && !gpu_id_array) return HSAKMT_STATUS_INVALID_PARAMETER; infoArgs.dmabuf_fd = GraphicsResourceHandle; @@ -2541,7 +2554,7 @@ HSAKMT_STATUS fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle, pthread_mutex_lock(&aperture->fmm_mutex); mem = aperture_allocate_area(aperture, infoArgs.size, offset); pthread_mutex_unlock(&aperture->fmm_mutex); - if (mem == NULL) + if (!mem) goto error_free_metadata; /* Import DMA buffer */ @@ -2569,7 +2582,7 @@ HSAKMT_STATUS fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle, GraphicsResourceInfo->SizeInBytes = infoArgs.size; GraphicsResourceInfo->Metadata = (void *)(unsigned long)infoArgs.metadata_ptr; GraphicsResourceInfo->MetadataSizeInBytes = infoArgs.metadata_size; - GraphicsResourceInfo->Reserved = 0; + GraphicsResourceInfo->Reserved = 0; return HSAKMT_STATUS_SUCCESS; @@ -2584,14 +2597,14 @@ error_free_metadata: return status; } -HSAKMT_STATUS fmm_share_memory(void* MemoryAddress, +HSAKMT_STATUS fmm_share_memory(void *MemoryAddress, HSAuint64 SizeInBytes, HsaSharedMemoryHandle *SharedMemoryHandle) { int r = 0; HSAuint32 gpu_id = 0; vm_object_t *obj = NULL; - manageble_aperture_t * aperture = NULL; + manageable_aperture_t *aperture = NULL; struct kfd_ioctl_ipc_export_handle_args exportArgs; HsaApertureInfo ApeInfo; HsaSharedMemoryStruct *SharedMemoryStruct = @@ -2641,13 +2654,13 @@ HSAKMT_STATUS fmm_register_shared_memory(const HsaSharedMemoryHandle *SharedMemo HSAKMT_STATUS err = HSAKMT_STATUS_ERROR; vm_object_t *obj = NULL; void *reservedMem = NULL; - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; struct kfd_ioctl_ipc_import_handle_args importArgs; struct kfd_ioctl_free_memory_of_gpu_args freeArgs; const HsaSharedMemoryStruct *SharedMemoryStruct = to_const_hsa_shared_memory_struct(SharedMemoryHandle); - if (gpu_id_array_size > 0 && gpu_id_array == NULL) + if (gpu_id_array_size > 0 && !gpu_id_array) return HSAKMT_STATUS_INVALID_PARAMETER; memcpy(importArgs.share_handle, SharedMemoryStruct->ShareHandle, @@ -2719,7 +2732,7 @@ err_import: static HSAKMT_STATUS fmm_deregister_user_memory(void *addr) { - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; vm_object_t *obj; void *svm_addr; @@ -2728,7 +2741,7 @@ static HSAKMT_STATUS fmm_deregister_user_memory(void *addr) /* Find the size and start address in SVM space */ pthread_mutex_lock(&aperture->fmm_mutex); obj = vm_find_object_by_userptr(aperture, addr, 0); - if ((obj == NULL) || (obj->registration_count > 1)) { + if (!obj || obj->registration_count > 1) { pthread_mutex_unlock(&aperture->fmm_mutex); return HSAKMT_STATUS_ERROR; } @@ -2743,9 +2756,9 @@ static HSAKMT_STATUS fmm_deregister_user_memory(void *addr) HSAKMT_STATUS fmm_deregister_memory(void *address) { - manageble_aperture_t *aperture = NULL; + manageable_aperture_t *aperture = NULL; vm_object_t *object = NULL; - unsigned i; + unsigned int i; HSAuint32 page_offset = (HSAint64)address & (PAGE_SIZE - 1); if ((address >= svm.dgpu_aperture.base) && @@ -2832,7 +2845,7 @@ HSAKMT_STATUS fmm_map_to_gpu_nodes(void *address, uint64_t size, uint32_t *nodes_to_map, uint32_t nodes_to_map_size, uint64_t *gpuvm_address) { - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; vm_object_t *object = NULL; uint32_t i, j, temp_node; bool found, userptr = false; @@ -2841,7 +2854,7 @@ HSAKMT_STATUS fmm_map_to_gpu_nodes(void *address, uint64_t size, HSAKMT_STATUS ret = HSAKMT_STATUS_ERROR; int retcode = 0; - if ((nodes_to_map_size > 0 && nodes_to_map == NULL) || address == NULL) + if ((nodes_to_map_size > 0 && !nodes_to_map) || !address) return HSAKMT_STATUS_INVALID_PARAMETER; @@ -2884,7 +2897,7 @@ HSAKMT_STATUS fmm_map_to_gpu_nodes(void *address, uint64_t size, registered_node_id_array = all_gpu_id_array; registered_node_id_array_size = all_gpu_id_array_size; if (object->registered_device_id_array_size > 0 && - object->registered_device_id_array != NULL) { + object->registered_device_id_array) { registered_node_id_array = object->registered_device_id_array; registered_node_id_array_size = object->registered_device_id_array_size; } @@ -2959,7 +2972,7 @@ HSAKMT_STATUS fmm_get_mem_info(const void *address, HsaPointerInfo *info) { HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; uint32_t i; - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; vm_object_t *vm_obj; memset(info, 0, sizeof(HsaPointerInfo)); @@ -2996,7 +3009,7 @@ HSAKMT_STATUS fmm_get_mem_info(const void *address, HsaPointerInfo *info) * freed whenever the registration is deregistered or the * memory being freed */ - for (i=0; iNRegisteredNodes; i++) + for (i = 0; i < info->NRegisteredNodes; i++) gpuid_to_nodeid(vm_obj->registered_device_id_array[i], &vm_obj->registered_node_id_array[i]); } @@ -3010,7 +3023,7 @@ HSAKMT_STATUS fmm_get_mem_info(const void *address, HsaPointerInfo *info) /* vm_obj->mapped_node_id_array allocated here will be * freed whenever the mapping is unmapped or memory being freed */ - for (i=0; iNMappedNodes; i++) + for (i = 0; i < info->NMappedNodes; i++) gpuid_to_nodeid(vm_obj->mapped_device_id_array[i], &vm_obj->mapped_node_id_array[i]); } @@ -3020,9 +3033,8 @@ HSAKMT_STATUS fmm_get_mem_info(const void *address, HsaPointerInfo *info) if (info->Type == HSA_POINTER_REGISTERED_USER) { info->CPUAddress = vm_obj->userptr; info->SizeInBytes = vm_obj->userptr_size; - info->GPUAddress += ((HSAuint64)info->CPUAddress & (PAGE_SIZE-1)); - } - else if (info->Type == HSA_POINTER_ALLOCATED) { + info->GPUAddress += ((HSAuint64)info->CPUAddress & (PAGE_SIZE - 1)); + } else if (info->Type == HSA_POINTER_ALLOCATED) { info->MemFlags.Value = vm_obj->flags; info->CPUAddress = vm_obj->start; } @@ -3033,7 +3045,7 @@ exit: HSAKMT_STATUS fmm_set_mem_user_data(const void *mem, void *usr_data) { - manageble_aperture_t *aperture; + manageable_aperture_t *aperture; vm_object_t *vm_obj; aperture = fmm_find_aperture(mem, NULL); @@ -3048,7 +3060,7 @@ HSAKMT_STATUS fmm_set_mem_user_data(const void *mem, void *usr_data) return HSAKMT_STATUS_SUCCESS; } -static void fmm_clear_aperture(manageble_aperture_t *app) +static void fmm_clear_aperture(manageable_aperture_t *app) { while (app->vm_objects) vm_remove_object(app, app->vm_objects); diff --git a/projects/rocr-runtime/src/fmm.h b/projects/rocr-runtime/src/fmm.h index 2fb1d60cf9..f5b65e4209 100644 --- a/projects/rocr-runtime/src/fmm.h +++ b/projects/rocr-runtime/src/fmm.h @@ -41,27 +41,25 @@ typedef enum { typedef struct { aperture_type_e app_type; uint64_t size; - void* start_address; + void *start_address; } aperture_properties_t; HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes); void fmm_destroy_process_apertures(void); -/* - * Memory interface - */ -void* fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes); -void* fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFlags flags); -void* fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, uint64_t doorbell_offset); -void* fmm_allocate_host(uint32_t node_id, uint64_t MemorySizeInBytes, +/* Memory interface */ +void *fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes); +void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFlags flags); +void *fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, uint64_t doorbell_offset); +void *fmm_allocate_host(uint32_t node_id, uint64_t MemorySizeInBytes, HsaMemFlags flags); -void* fmm_open_graphic_handle(uint32_t gpu_id, - int32_t graphic_device_handle, - uint32_t graphic_handle, - uint64_t MemorySizeInBytes); +void *fmm_open_graphic_handle(uint32_t gpu_id, + int32_t graphic_device_handle, + uint32_t graphic_handle, + uint64_t MemorySizeInBytes); void fmm_print(uint32_t node); -bool fmm_is_inside_some_aperture(void* address); -void fmm_release(void* address); +bool fmm_is_inside_some_aperture(void *address); +void fmm_release(void *address); int fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address); int fmm_unmap_from_gpu(void *address); bool fmm_get_handle(void *address, uint64_t *handle); @@ -75,14 +73,14 @@ HSAKMT_STATUS fmm_get_aperture_base_and_limit(aperture_type_e aperture_type, HSA HSAuint64 *aperture_base, HSAuint64 *aperture_limit); HSAKMT_STATUS fmm_register_memory(void *address, uint64_t size_in_bytes, - uint32_t *gpu_id_array, - uint32_t gpu_id_array_size); + uint32_t *gpu_id_array, + uint32_t gpu_id_array_size); HSAKMT_STATUS fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo *GraphicsResourceInfo, uint32_t *gpu_id_array, uint32_t gpu_id_array_size); HSAKMT_STATUS fmm_deregister_memory(void *address); -HSAKMT_STATUS fmm_share_memory(void* MemoryAddress, +HSAKMT_STATUS fmm_share_memory(void *MemoryAddress, HSAuint64 SizeInBytes, HsaSharedMemoryHandle *SharedMemoryHandle); HSAKMT_STATUS fmm_register_shared_memory(const HsaSharedMemoryHandle *SharedMemoryHandle, diff --git a/projects/rocr-runtime/src/globals.c b/projects/rocr-runtime/src/globals.c index 15d6f11456..3d75b9f1b7 100644 --- a/projects/rocr-runtime/src/globals.c +++ b/projects/rocr-runtime/src/globals.c @@ -31,6 +31,6 @@ int kfd_fd; unsigned long kfd_open_count; unsigned long system_properties_count; pthread_mutex_t hsakmt_mutex = PTHREAD_MUTEX_INITIALIZER; -bool is_dgpu = false; +bool is_dgpu; int PAGE_SIZE; int PAGE_SHIFT; diff --git a/projects/rocr-runtime/src/libhsakmt.c b/projects/rocr-runtime/src/libhsakmt.c index d7f79d3562..4d0775ccbe 100644 --- a/projects/rocr-runtime/src/libhsakmt.c +++ b/projects/rocr-runtime/src/libhsakmt.c @@ -3,16 +3,14 @@ #include "libhsakmt.h" -/** - * Call ioctl, restarting if it is interupted - */ -int -kmtIoctl(int fd, unsigned long request, void *arg) +/* Call ioctl, restarting if it is interrupted */ +int kmtIoctl(int fd, unsigned long request, void *arg) { - int ret; + int ret; - do { - ret = ioctl(fd, request, arg); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - return ret; + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; } diff --git a/projects/rocr-runtime/src/memory.c b/projects/rocr-runtime/src/memory.c index b3883c1477..00aff3cfe3 100644 --- a/projects/rocr-runtime/src/memory.c +++ b/projects/rocr-runtime/src/memory.c @@ -34,15 +34,11 @@ #include #include "fmm.h" -HSAKMT_STATUS -HSAKMTAPI -hsaKmtSetMemoryPolicy( - HSAuint32 Node, - HSAuint32 DefaultPolicy, - HSAuint32 AlternatePolicy, - void *MemoryAddressAlternate, - HSAuint64 MemorySizeInBytes -) +HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node, + HSAuint32 DefaultPolicy, + HSAuint32 AlternatePolicy, + void *MemoryAddressAlternate, + HSAuint64 MemorySizeInBytes) { struct kfd_ioctl_set_memory_policy_args args; HSAKMT_STATUS result; @@ -54,7 +50,8 @@ hsaKmtSetMemoryPolicy( /* This is a legacy API useful on Kaveri only. On dGPU * the alternate aperture is setup and used * automatically for coherent allocations. Don't let - * app override it. */ + * app override it. + */ return HSAKMT_STATUS_NOT_IMPLEMENTED; result = validate_nodeid(Node, &gpu_id); @@ -106,14 +103,10 @@ HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags) } } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtAllocMemory( - HSAuint32 PreferredNode, /* IN */ - HSAuint64 SizeInBytes, /* IN (multiple of page size) */ - HsaMemFlags MemFlags, /* IN */ - void **MemoryAddress /* OUT (page-aligned) */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemory(HSAuint32 PreferredNode, + HSAuint64 SizeInBytes, + HsaMemFlags MemFlags, + void **MemoryAddress) { HSAKMT_STATUS result; uint32_t gpu_id; @@ -127,16 +120,14 @@ hsaKmtAllocMemory( page_size = PageSizeFromFlags(MemFlags.ui32.PageSize); - if ((!MemoryAddress) || (!SizeInBytes) || - (SizeInBytes & (page_size-1))) { + if (!MemoryAddress || !SizeInBytes || (SizeInBytes & (page_size-1))) return HSAKMT_STATUS_INVALID_PARAMETER; - } if (gpu_id == 0 && !MemFlags.ui32.Scratch) { *MemoryAddress = fmm_allocate_host(PreferredNode, SizeInBytes, MemFlags); - if (*MemoryAddress == NULL) + if (!(*MemoryAddress)) return HSAKMT_STATUS_ERROR; return HSAKMT_STATUS_SUCCESS; @@ -145,27 +136,28 @@ hsaKmtAllocMemory( if (gpu_id && MemFlags.ui32.NonPaged && !MemFlags.ui32.Scratch) { *MemoryAddress = fmm_allocate_device(gpu_id, SizeInBytes, MemFlags); - if (*MemoryAddress == NULL) + if (!(*MemoryAddress)) return HSAKMT_STATUS_NO_MEMORY; return HSAKMT_STATUS_SUCCESS; } - if (MemFlags.ui32.Scratch ) { + if (MemFlags.ui32.Scratch) { *MemoryAddress = fmm_allocate_scratch(gpu_id, SizeInBytes); - if (*MemoryAddress == NULL) + if (!(*MemoryAddress)) return HSAKMT_STATUS_NO_MEMORY; return HSAKMT_STATUS_SUCCESS; } /* Backwards compatibility hack: Allocate system memory if app - * asks for paged memory from a GPU node. */ + * asks for paged memory from a GPU node. + */ if (gpu_id && !MemFlags.ui32.NonPaged && !MemFlags.ui32.Scratch) { *MemoryAddress = fmm_allocate_host(PreferredNode, SizeInBytes, MemFlags); - if (*MemoryAddress == NULL) + if (!(*MemoryAddress)) return HSAKMT_STATUS_ERROR; return HSAKMT_STATUS_SUCCESS; @@ -174,16 +166,12 @@ hsaKmtAllocMemory( return HSAKMT_STATUS_INVALID_PARAMETER; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtFreeMemory( - void *MemoryAddress, /* IN (page-aligned) */ - HSAuint64 SizeInBytes /* IN */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress, + HSAuint64 SizeInBytes) { CHECK_KFD_OPEN(); - if (MemoryAddress == NULL) { + if (!MemoryAddress) { fprintf(stderr, "FIXME: freeing NULL pointer\n"); return HSAKMT_STATUS_ERROR; } @@ -192,12 +180,8 @@ hsaKmtFreeMemory( return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtRegisterMemory( - void *MemoryAddress, /* IN (cache-aligned) */ - HSAuint64 MemorySizeInBytes /* IN (cache-aligned) */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress, + HSAuint64 MemorySizeInBytes) { CHECK_KFD_OPEN(); @@ -206,17 +190,13 @@ hsaKmtRegisterMemory( return HSAKMT_STATUS_SUCCESS; return fmm_register_memory(MemoryAddress, MemorySizeInBytes, - NULL, 0); + NULL, 0); } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtRegisterMemoryToNodes( - void *MemoryAddress, /* IN (cache-aligned) */ - HSAuint64 MemorySizeInBytes, /* IN (cache-aligned) */ - HSAuint64 NumberOfNodes, /* IN */ - HSAuint32* NodeArray /* IN */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress, + HSAuint64 MemorySizeInBytes, + HSAuint64 NumberOfNodes, + HSAuint32 *NodeArray) { CHECK_KFD_OPEN(); uint32_t *gpu_id_array; @@ -240,14 +220,10 @@ hsaKmtRegisterMemoryToNodes( return ret; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtRegisterGraphicsHandleToNodes( - HSAuint64 GraphicsResourceHandle, /* IN */ - HsaGraphicsResourceInfo *GraphicsResourceInfo, /* OUT */ - HSAuint64 NumberOfNodes, /* IN */ - HSAuint32* NodeArray /* IN */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsResourceHandle, + HsaGraphicsResourceInfo *GraphicsResourceInfo, + HSAuint64 NumberOfNodes, + HSAuint32 *NodeArray) { CHECK_KFD_OPEN(); uint32_t *gpu_id_array; @@ -259,7 +235,7 @@ hsaKmtRegisterGraphicsHandleToNodes( if (ret == HSAKMT_STATUS_SUCCESS) { ret = fmm_register_graphics_handle( GraphicsResourceHandle, GraphicsResourceInfo, - gpu_id_array,NumberOfNodes*sizeof(uint32_t)); + gpu_id_array, NumberOfNodes * sizeof(uint32_t)); if (ret != HSAKMT_STATUS_SUCCESS) free(gpu_id_array); } @@ -267,13 +243,9 @@ hsaKmtRegisterGraphicsHandleToNodes( return ret; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtShareMemory( - void *MemoryAddress, /* IN */ - HSAuint64 SizeInBytes, /* IN */ - HsaSharedMemoryHandle *SharedMemoryHandle /* OUT */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress, + HSAuint64 SizeInBytes, + HsaSharedMemoryHandle *SharedMemoryHandle) { CHECK_KFD_OPEN(); @@ -283,13 +255,9 @@ hsaKmtShareMemory( return fmm_share_memory(MemoryAddress, SizeInBytes, SharedMemoryHandle); } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtRegisterSharedHandle( - const HsaSharedMemoryHandle *SharedMemoryHandle, /* IN */ - void **MemoryAddress, /* OUT */ - HSAuint64 *SizeInBytes /* OUT */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandle(const HsaSharedMemoryHandle *SharedMemoryHandle, + void **MemoryAddress, + HSAuint64 *SizeInBytes) { CHECK_KFD_OPEN(); @@ -300,15 +268,11 @@ hsaKmtRegisterSharedHandle( NULL); } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtRegisterSharedHandleToNodes( - const HsaSharedMemoryHandle *SharedMemoryHandle, /* IN */ - void **MemoryAddress, /* OUT */ - HSAuint64 *SizeInBytes, /* OUT */ - HSAuint64 NumberOfNodes, /* OUT */ - HSAuint32* NodeArray /* OUT */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodes(const HsaSharedMemoryHandle *SharedMemoryHandle, + void **MemoryAddress, + HSAuint64 *SizeInBytes, + HSAuint64 NumberOfNodes, + HSAuint32 *NodeArray) { CHECK_KFD_OPEN(); @@ -340,32 +304,26 @@ error: return ret; } -static uint64_t convertHsaToKfdRange( - HsaMemoryRange *HsaRange) +static uint64_t convertHsaToKfdRange(HsaMemoryRange *HsaRange) { if (sizeof(struct kfd_memory_range) != sizeof(HsaMemoryRange)) { - fprintf(stderr, "Struct size mismatch in thunk." - "Cannot cast Hsa Range to KFD IOCTL range\n"); + fprintf(stderr, "Struct size mismatch in thunk. Cannot cast Hsa Range to KFD IOCTL range\n"); return 0; } return (uint64_t) HsaRange; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtProcessVMRead( - HSAuint32 Pid, /* IN */ - HsaMemoryRange *LocalMemoryArray, /* IN */ - HSAuint64 LocalMemoryArrayCount, /* IN */ - HsaMemoryRange *RemoteMemoryArray, /* IN */ - HSAuint64 RemoteMemoryArrayCount,/* IN */ - HSAuint64 *SizeCopied /* OUT */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtProcessVMRead(HSAuint32 Pid, + HsaMemoryRange *LocalMemoryArray, + HSAuint64 LocalMemoryArrayCount, + HsaMemoryRange *RemoteMemoryArray, + HSAuint64 RemoteMemoryArrayCount, + HSAuint64 *SizeCopied) { struct kfd_ioctl_cross_memory_copy_args args; - if (LocalMemoryArray == NULL || RemoteMemoryArray == NULL || + if (!LocalMemoryArray || !RemoteMemoryArray || LocalMemoryArrayCount == 0 || RemoteMemoryArrayCount == 0) return HSAKMT_STATUS_ERROR; @@ -379,6 +337,7 @@ hsaKmtProcessVMRead( args.bytes_copied = 0; int err = kmtIoctl(kfd_fd, AMDKFD_IOC_CROSS_MEMORY_COPY, &args); + if (err) return HSAKMT_STATUS_ERROR; @@ -388,20 +347,16 @@ hsaKmtProcessVMRead( return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtProcessVMWrite( - HSAuint32 Pid, /* IN */ - HsaMemoryRange *LocalMemoryArray, /* IN */ - HSAuint64 LocalMemoryArrayCount, /* IN */ - HsaMemoryRange *RemoteMemoryArray, /* IN */ - HSAuint64 RemoteMemoryArrayCount, /* IN */ - HSAuint64 *SizeCopied /* OUT */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtProcessVMWrite(HSAuint32 Pid, + HsaMemoryRange *LocalMemoryArray, + HSAuint64 LocalMemoryArrayCount, + HsaMemoryRange *RemoteMemoryArray, + HSAuint64 RemoteMemoryArrayCount, + HSAuint64 *SizeCopied) { struct kfd_ioctl_cross_memory_copy_args args; - if (LocalMemoryArray == NULL || RemoteMemoryArray == NULL || + if (!LocalMemoryArray || !RemoteMemoryArray || LocalMemoryArrayCount == 0 || RemoteMemoryArrayCount == 0) return HSAKMT_STATUS_ERROR; @@ -415,6 +370,7 @@ hsaKmtProcessVMWrite( args.bytes_copied = 0; int err = kmtIoctl(kfd_fd, AMDKFD_IOC_CROSS_MEMORY_COPY, &args); + if (err) return HSAKMT_STATUS_ERROR; @@ -425,28 +381,20 @@ hsaKmtProcessVMWrite( } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtDeregisterMemory( - void *MemoryAddress /* IN */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtDeregisterMemory(void *MemoryAddress) { CHECK_KFD_OPEN(); return fmm_deregister_memory(MemoryAddress); } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtMapMemoryToGPU( - void *MemoryAddress, /* IN (page-aligned) */ - HSAuint64 MemorySizeInBytes, /* IN (page-aligned) */ - HSAuint64 *AlternateVAGPU /* OUT (page-aligned) */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPU(void *MemoryAddress, + HSAuint64 MemorySizeInBytes, + HSAuint64 *AlternateVAGPU) { CHECK_KFD_OPEN(); - if (MemoryAddress == NULL) { + if (!MemoryAddress) { fprintf(stderr, "FIXME: mapping NULL pointer\n"); return HSAKMT_STATUS_ERROR; } @@ -460,21 +408,17 @@ hsaKmtMapMemoryToGPU( return HSAKMT_STATUS_ERROR; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtMapMemoryToGPUNodes( - void* MemoryAddress, //IN (page-aligned) - HSAuint64 MemorySizeInBytes, //IN (page-aligned) - HSAuint64* AlternateVAGPU, //OUT (page-aligned) - HsaMemMapFlags MemMapFlags, //IN - HSAuint64 NumberOfNodes, //IN - HSAuint32* NodeArray //IN -) +HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(void *MemoryAddress, + HSAuint64 MemorySizeInBytes, + HSAuint64 *AlternateVAGPU, + HsaMemMapFlags MemMapFlags, + HSAuint64 NumberOfNodes, + HSAuint32 *NodeArray) { uint32_t *gpu_id_array; HSAKMT_STATUS ret; - if (MemoryAddress == NULL) { + if (!MemoryAddress) { fprintf(stderr, "FIXME: mapping NULL pointer\n"); return HSAKMT_STATUS_ERROR; } @@ -493,15 +437,11 @@ hsaKmtMapMemoryToGPUNodes( gpu_id_array, NumberOfNodes * sizeof(uint32_t), AlternateVAGPU); } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtUnmapMemoryToGPU( - void *MemoryAddress /* IN (page-aligned) */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPU(void *MemoryAddress) { CHECK_KFD_OPEN(); - if (MemoryAddress == NULL) { + if (!MemoryAddress) { /* Workaround for runtime bug */ fprintf(stderr, "FIXME: Unmapping NULL pointer\n"); return HSAKMT_STATUS_SUCCESS; @@ -513,16 +453,12 @@ hsaKmtUnmapMemoryToGPU( return HSAKMT_STATUS_ERROR; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtMapGraphicHandle( - HSAuint32 NodeId, /* IN */ - HSAuint64 GraphicDeviceHandle, /* IN */ - HSAuint64 GraphicResourceHandle, /* IN */ - HSAuint64 GraphicResourceOffset, /* IN */ - HSAuint64 GraphicResourceSize, /* IN */ - HSAuint64 *FlatMemoryAddress /* OUT */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtMapGraphicHandle(HSAuint32 NodeId, + HSAuint64 GraphicDeviceHandle, + HSAuint64 GraphicResourceHandle, + HSAuint64 GraphicResourceOffset, + HSAuint64 GraphicResourceSize, + HSAuint64 *FlatMemoryAddress) { CHECK_KFD_OPEN(); @@ -550,24 +486,15 @@ hsaKmtMapGraphicHandle( return HSAKMT_STATUS_NO_MEMORY; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtUnmapGraphicHandle( - HSAuint32 NodeId, /* IN */ - HSAuint64 FlatMemoryAddress, /* IN */ - HSAuint64 SizeInBytes /* IN */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapGraphicHandle(HSAuint32 NodeId, + HSAuint64 FlatMemoryAddress, + HSAuint64 SizeInBytes) { return hsaKmtUnmapMemoryToGPU(PORT_UINT64_TO_VPTR(FlatMemoryAddress)); } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtGetTileConfig( - HSAuint32 NodeId, /* IN */ - HsaGpuTileConfig *config /* IN & OUT */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *config) { struct kfd_ioctl_get_tile_config_args args; uint32_t gpu_id; @@ -583,9 +510,8 @@ hsaKmtGetTileConfig( args.num_tile_configs = config->NumTileConfigs; args.num_macro_tile_configs = config->NumMacroTileConfigs; - if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_TILE_CONFIG, &args) != 0) { + if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_TILE_CONFIG, &args) != 0) return HSAKMT_STATUS_ERROR; - } config->NumTileConfigs = args.num_tile_configs; config->NumMacroTileConfigs = args.num_macro_tile_configs; @@ -598,24 +524,16 @@ hsaKmtGetTileConfig( return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtQueryPointerInfo( - const void *Pointer, /* IN */ - HsaPointerInfo *PointerInfo /* OUT */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfo(const void *Pointer, + HsaPointerInfo *PointerInfo) { if (!PointerInfo) return HSAKMT_STATUS_INVALID_PARAMETER; return fmm_get_mem_info(Pointer, PointerInfo); } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtSetMemoryUserData( - const void *Pointer, /* IN */ - void *UserData /* IN */ -) +HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryUserData(const void *Pointer, + void *UserData) { return fmm_set_mem_user_data(Pointer, UserData); } diff --git a/projects/rocr-runtime/src/openclose.c b/projects/rocr-runtime/src/openclose.c index 087fbdc36d..5e8b5e1aa4 100644 --- a/projects/rocr-runtime/src/openclose.c +++ b/projects/rocr-runtime/src/openclose.c @@ -36,7 +36,7 @@ static const char kfd_device_name[] = "/dev/kfd"; static const char tmp_file[] = "/var/lock/.amd_hsa_thunk_lock"; -int amd_hsa_thunk_lock_fd = 0; +int amd_hsa_thunk_lock_fd; static pid_t parent_pid = -1; @@ -76,9 +76,7 @@ static inline void init_page_size(void) PAGE_SHIFT = ffs(PAGE_SIZE) - 1; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtOpenKFD(void) +HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void) { HSAKMT_STATUS result; int fd; @@ -94,8 +92,9 @@ hsaKmtOpenKFD(void) if (is_forked_child()) clear_after_fork(); - if (kfd_open_count == 0) - { + if (kfd_open_count == 0) { + amd_hsa_thunk_lock_fd = 0; + fd = open(kfd_device_name, O_RDWR | O_CLOEXEC); if (fd != -1) { @@ -125,7 +124,7 @@ hsaKmtOpenKFD(void) mask = umask(0); /* save the current umask */ /* We don't want the existing umask to mask out S_IWOTH */ - umask(S_IXOTH); + umask(0001); amd_hsa_thunk_lock_fd = open(tmp_file, O_CREAT | O_RDWR, 0666); @@ -136,9 +135,7 @@ hsaKmtOpenKFD(void) if (init_counter_props(sys_props.NumNodes) != HSAKMT_STATUS_SUCCESS) fprintf(stderr, "Profiling is not available\n"); - } - else - { + } else { kfd_open_count++; result = HSAKMT_STATUS_SUCCESS; } @@ -157,18 +154,14 @@ open_failed: return result; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtCloseKFD(void) +HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void) { HSAKMT_STATUS result; pthread_mutex_lock(&hsakmt_mutex); - if (kfd_open_count > 0) - { - if (--kfd_open_count == 0) - { + if (kfd_open_count > 0) { + if (--kfd_open_count == 0) { destroy_counter_props(); destroy_device_debugging_memory(); destroy_process_doorbells(); @@ -183,11 +176,8 @@ hsaKmtCloseKFD(void) } result = HSAKMT_STATUS_SUCCESS; - } - else - { + } else result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; - } pthread_mutex_unlock(&hsakmt_mutex); diff --git a/projects/rocr-runtime/src/perfctr.c b/projects/rocr-runtime/src/perfctr.c index 5267ac1dbe..962fe56a7e 100644 --- a/projects/rocr-runtime/src/perfctr.c +++ b/projects/rocr-runtime/src/perfctr.c @@ -96,7 +96,7 @@ static ssize_t readn(int fd, void *buf, size_t n) bytes = read(fd, buf, left); if (!bytes) /* reach EOF */ return (n - left); - if (bytes < 0 ) { + if (bytes < 0) { if (errno == EINTR) /* read got interrupted */ continue; else @@ -145,8 +145,8 @@ out: HSAKMT_STATUS init_counter_props(unsigned int NumNodes) { - counter_props = calloc(NumNodes, sizeof(struct HsaCounterProperties*)); - if (counter_props == NULL) + counter_props = calloc(NumNodes, sizeof(struct HsaCounterProperties *)); + if (!counter_props) return HSAKMT_STATUS_NO_MEMORY; counter_props_count = NumNodes; @@ -159,11 +159,11 @@ void destroy_counter_props(void) { unsigned int i; - if (counter_props == NULL) + if (!counter_props) return; - for (i = 0; iNumBlocks = total_blocks; @@ -534,18 +530,11 @@ hsaKmtPmcGetCounterProperties( return HSAKMT_STATUS_SUCCESS; } -/** - Registers a set of (HW) counters to be used for tracing/profiling -*/ - -HSAKMT_STATUS -HSAKMTAPI -hsaKmtPmcRegisterTrace( - HSAuint32 NodeId, //IN - HSAuint32 NumberOfCounters, //IN - HsaCounter* Counters, //IN - HsaPmcTraceRoot* TraceRoot //OUT - ) +/* Registers a set of (HW) counters to be used for tracing/profiling */ +HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId, + HSAuint32 NumberOfCounters, + HsaCounter *Counters, + HsaPmcTraceRoot *TraceRoot) { uint32_t gpu_id, i, j; uint64_t min_buf_size = 0; @@ -558,10 +547,10 @@ hsaKmtPmcRegisterTrace( uint64_t *counter_id_ptr; int *fd_ptr; - if (counter_props == NULL) + if (!counter_props) return HSAKMT_STATUS_NO_MEMORY; - if (Counters == NULL || TraceRoot == NULL || NumberOfCounters == 0) + if (!Counters || !TraceRoot || NumberOfCounters == 0) return HSAKMT_STATUS_INVALID_PARAMETER; if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS) @@ -618,7 +607,7 @@ hsaKmtPmcRegisterTrace( + sizeof(uint64_t) * total_counters + sizeof(int) * total_counters, 1); - if (trace == NULL) + if (!trace) return HSAKMT_STATUS_NO_MEMORY; /* Allocated area is partitioned as: @@ -677,16 +666,10 @@ hsaKmtPmcRegisterTrace( return HSAKMT_STATUS_SUCCESS; } -/** - Unregisters a set of (HW) counters used for tracing/profiling -*/ +/* Unregisters a set of (HW) counters used for tracing/profiling */ -HSAKMT_STATUS -HSAKMTAPI -hsaKmtPmcUnregisterTrace( - HSAuint32 NodeId, //IN - HSATraceId TraceId //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId, + HSATraceId TraceId) { uint32_t gpu_id; struct perf_trace *trace; @@ -708,6 +691,7 @@ hsaKmtPmcUnregisterTrace( /* If the trace is in the running state, stop it */ if (trace->state == PERF_TRACE_STATE__STARTED) { HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId); + if (status != HSAKMT_STATUS_SUCCESS) return status; } @@ -717,12 +701,8 @@ hsaKmtPmcUnregisterTrace( return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtPmcAcquireTraceAccess( - HSAuint32 NodeId, //IN - HSATraceId TraceId //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId, + HSATraceId TraceId) { struct perf_trace *trace; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; @@ -766,12 +746,8 @@ out: return ret; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtPmcReleaseTraceAccess( - HSAuint32 NodeId, //IN - HSATraceId TraceId //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtPmcReleaseTraceAccess(HSAuint32 NodeId, + HSATraceId TraceId) { struct perf_trace *trace; uint32_t i; @@ -795,17 +771,10 @@ hsaKmtPmcReleaseTraceAccess( } -/** - Starts tracing operation on a previously established set of performance counters -*/ - -HSAKMT_STATUS -HSAKMTAPI -hsaKmtPmcStartTrace( - HSATraceId TraceId, //IN - void* TraceBuffer, //IN (page aligned) - HSAuint64 TraceBufferSizeBytes //IN (page aligned) - ) +/* Starts tracing operation on a previously established set of performance counters */ +HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStartTrace(HSATraceId TraceId, + void *TraceBuffer, + HSAuint64 TraceBufferSizeBytes) { struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId); @@ -813,7 +782,7 @@ hsaKmtPmcStartTrace( int32_t j; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; - if (TraceId == 0 || TraceBuffer == NULL || TraceBufferSizeBytes == 0) + if (TraceId == 0 || !TraceBuffer || TraceBufferSizeBytes == 0) return HSAKMT_STATUS_INVALID_PARAMETER; if (trace->magic4cc != HSA_PERF_MAGIC4CC) @@ -842,15 +811,9 @@ hsaKmtPmcStartTrace( } -/** - Forces an update of all the counters that a previously started trace operation has registered -*/ +/*Forces an update of all the counters that a previously started trace operation has registered */ -HSAKMT_STATUS -HSAKMTAPI -hsaKmtPmcQueryTrace( - HSATraceId TraceId //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtPmcQueryTrace(HSATraceId TraceId) { struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId); @@ -882,15 +845,8 @@ hsaKmtPmcQueryTrace( } -/** - Stops tracing operation on a previously established set of performance counters -*/ - -HSAKMT_STATUS -HSAKMTAPI -hsaKmtPmcStopTrace( - HSATraceId TraceId //IN - ) +/* Stops tracing operation on a previously established set of performance counters */ +HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStopTrace(HSATraceId TraceId) { struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId); diff --git a/projects/rocr-runtime/src/pmc_table.c b/projects/rocr-runtime/src/pmc_table.c index 3ca656a223..50ff81a521 100644 --- a/projects/rocr-runtime/src/pmc_table.c +++ b/projects/rocr-runtime/src/pmc_table.c @@ -1597,8 +1597,7 @@ uint32_t pmc_table_get_max_concurrent(int block_id) } } -static HSAKMT_STATUS -alloc_pmc_blocks_iommu(void) +static HSAKMT_STATUS alloc_pmc_blocks_iommu(void) { DIR *dir; struct dirent *dent; @@ -1704,10 +1703,9 @@ void free_pmc_blocks(void) iommu_block.num_of_counters = 0; } -HSAKMT_STATUS -get_block_properties(uint32_t node_id, - enum perf_block_id block_id, - struct perf_counter_block *block) +HSAKMT_STATUS get_block_properties(uint32_t node_id, + enum perf_block_id block_id, + struct perf_counter_block *block) { uint16_t dev_id = get_device_id_by_node(node_id); enum asic_family_type asic; diff --git a/projects/rocr-runtime/src/pmc_table.h b/projects/rocr-runtime/src/pmc_table.h index aa9d5bd0d1..7e48cbe801 100644 --- a/projects/rocr-runtime/src/pmc_table.h +++ b/projects/rocr-runtime/src/pmc_table.h @@ -72,9 +72,8 @@ HSAKMT_STATUS alloc_pmc_blocks(void); void free_pmc_blocks(void); uint32_t pmc_table_get_max_concurrent(int block_id); -HSAKMT_STATUS -get_block_properties(uint32_t node_id, - enum perf_block_id block_id, - struct perf_counter_block *block); +HSAKMT_STATUS get_block_properties(uint32_t node_id, + enum perf_block_id block_id, + struct perf_counter_block *block); #endif // PMC_TABLE_H diff --git a/projects/rocr-runtime/src/queues.c b/projects/rocr-runtime/src/queues.c index 6840a5df1a..7fbd1083c7 100644 --- a/projects/rocr-runtime/src/queues.c +++ b/projects/rocr-runtime/src/queues.c @@ -44,8 +44,7 @@ #define WG_CONTEXT_DATA_SIZE_PER_CU_VI 344576 #define WAVES_PER_CU_VI 32 -struct device_info -{ +struct device_info { enum asic_family_type asic_family; uint32_t eop_buffer_size; uint32_t doorbell_size; @@ -110,14 +109,12 @@ static struct device_info *dev_lookup_table[] = { [CHIP_VEGA10] = &vega10_device_info }; -struct device_id -{ +struct device_id { uint16_t dev_id; struct device_info *dev_info; }; -struct queue -{ +struct queue { uint32_t queue_id; uint64_t wptr; uint64_t rptr; @@ -128,11 +125,10 @@ struct queue const struct device_info *dev_info; }; -struct process_doorbells -{ +struct process_doorbells { bool use_gpuvm; uint32_t size; - void* doorbells; + void *doorbells; pthread_mutex_t doorbells_mutex; }; @@ -145,9 +141,10 @@ HSAKMT_STATUS init_process_doorbells(unsigned int NumNodes) HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; /* doorbells[] is accessed using Topology NodeId. This means doorbells[0], - * which corresponds to CPU only Node, might not be used */ + * which corresponds to CPU only Node, might not be used + */ doorbells = malloc(NumNodes * sizeof(struct process_doorbells)); - if (doorbells == NULL) + if (!doorbells) return HSAKMT_STATUS_NO_MEMORY; for (i = 0; i < NumNodes; i++) { @@ -258,7 +255,7 @@ static HSAKMT_STATUS map_doorbell_dgpu(HSAuint32 NodeId, HSAuint32 gpu_id, ptr = fmm_allocate_doorbell(gpu_id, doorbells[NodeId].size, doorbell_offset); - if (ptr == NULL) + if (!ptr) return HSAKMT_STATUS_ERROR; /* map for GPU access */ @@ -299,7 +296,7 @@ static HSAKMT_STATUS map_doorbell(HSAuint32 NodeId, HSAuint32 gpu_id, return status; } -static void* allocate_exec_aligned_memory_cpu(uint32_t size, uint32_t align) +static void *allocate_exec_aligned_memory_cpu(uint32_t size, uint32_t align) { void *ptr; int retval; @@ -344,7 +341,7 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q) return false; } -void* allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, +void *allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uint32_t NodeId, bool nonPaged) { void *mem; @@ -361,14 +358,13 @@ void* allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, size = ALIGN_UP(size, align); ret = hsaKmtAllocMemory(0, size, flags, &mem); - if (ret != HSAKMT_STATUS_SUCCESS) { + if (ret != HSAKMT_STATUS_SUCCESS) return NULL; - } if (NodeId != 0) { uint32_t nodes_array[1] = {NodeId}; - if (hsaKmtRegisterMemoryToNodes(mem, size, 1, nodes_array) - != HSAKMT_STATUS_SUCCESS) { + + if (hsaKmtRegisterMemoryToNodes(mem, size, 1, nodes_array) != HSAKMT_STATUS_SUCCESS) { hsaKmtFreeMemory(mem, size); return NULL; } @@ -386,15 +382,14 @@ void free_exec_aligned_memory_gpu(void *addr, uint32_t size, uint32_t align) { size = ALIGN_UP(size, align); - if (hsaKmtUnmapMemoryToGPU(addr) == HSAKMT_STATUS_SUCCESS) { + if (hsaKmtUnmapMemoryToGPU(addr) == HSAKMT_STATUS_SUCCESS) hsaKmtFreeMemory(addr, size); - } } -static void* allocate_exec_aligned_memory(uint32_t size, - uint32_t align, - enum asic_family_type type, - uint32_t NodeId) +static void *allocate_exec_aligned_memory(uint32_t size, + uint32_t align, + enum asic_family_type type, + uint32_t NodeId) { if (IS_DGPU(type)) return allocate_exec_aligned_memory_gpu(size, align, NodeId, @@ -429,6 +424,7 @@ static int handle_concrete_asic(struct queue *q, uint32_t NodeId) { const struct device_info *dev_info = q->dev_info; + if (dev_info) { if (dev_info->eop_buffer_size > 0) { q->eop_buffer = @@ -436,9 +432,9 @@ static int handle_concrete_asic(struct queue *q, PAGE_SIZE, dev_info->asic_family, NodeId); - if (q->eop_buffer == NULL) { + if (!q->eop_buffer) return HSAKMT_STATUS_NO_MEMORY; - } + args->eop_buffer_address = (uintptr_t)q->eop_buffer; args->eop_buffer_size = dev_info->eop_buffer_size; } @@ -451,9 +447,9 @@ static int handle_concrete_asic(struct queue *q, PAGE_SIZE, dev_info->asic_family, NodeId); - if (q->ctx_save_restore == NULL) {; + if (!q->ctx_save_restore) return HSAKMT_STATUS_NO_MEMORY; - } + args->ctx_save_restore_address = (uintptr_t)q->ctx_save_restore; } } @@ -465,31 +461,28 @@ static int handle_concrete_asic(struct queue *q, * to KFD queue priority (0 to 15) * Indexed by thunk_queue_priority+3 */ -static uint32_t priority_map[] = {0,3,5,7,9,11,15}; +static uint32_t priority_map[] = {0, 3, 5, 7, 9, 11, 15}; -HSAKMT_STATUS -HSAKMTAPI -hsaKmtCreateQueue( - HSAuint32 NodeId, //IN - HSA_QUEUE_TYPE Type, //IN - HSAuint32 QueuePercentage, //IN - HSA_QUEUE_PRIORITY Priority, //IN - void* QueueAddress, //IN - HSAuint64 QueueSizeInBytes, //IN - HsaEvent* Event, //IN - HsaQueueResource* QueueResource //OUT - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId, + HSA_QUEUE_TYPE Type, + HSAuint32 QueuePercentage, + HSA_QUEUE_PRIORITY Priority, + void *QueueAddress, + HSAuint64 QueueSizeInBytes, + HsaEvent *Event, + HsaQueueResource *QueueResource) { HSAKMT_STATUS result; uint32_t gpu_id; uint16_t dev_id; uint64_t doorbell_mmap_offset; - unsigned doorbell_offset; + unsigned int doorbell_offset; struct device_info *dev_info; int err; + CHECK_KFD_OPEN(); - if (Priority < HSA_QUEUE_PRIORITY_MINIMUM || + if (Priority < HSA_QUEUE_PRIORITY_MINIMUM || Priority > HSA_QUEUE_PRIORITY_MAXIMUM) return HSAKMT_STATUS_INVALID_PARAMETER; @@ -500,31 +493,37 @@ hsaKmtCreateQueue( dev_id = get_device_id_by_node(NodeId); dev_info = get_device_info_by_dev_id(dev_id); - struct queue *q = allocate_exec_aligned_memory(sizeof (*q), + struct queue *q = allocate_exec_aligned_memory(sizeof(*q), PAGE_SIZE, dev_info->asic_family, NodeId); - if (q == NULL) + if (!q) return HSAKMT_STATUS_NO_MEMORY; memset(q, 0, sizeof(*q)); struct kfd_ioctl_create_queue_args args; + memset(&args, 0, sizeof(args)); args.gpu_id = gpu_id; q->dev_info = dev_info; - switch (Type) - { - case HSA_QUEUE_COMPUTE: args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE; break; - case HSA_QUEUE_SDMA: args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA; break; - case HSA_QUEUE_COMPUTE_AQL: args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; break; - default: return HSAKMT_STATUS_INVALID_PARAMETER; + switch (Type) { + case HSA_QUEUE_COMPUTE: + args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE; + break; + case HSA_QUEUE_SDMA: + args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA; + break; + case HSA_QUEUE_COMPUTE_AQL: + args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; + break; + default: + return HSAKMT_STATUS_INVALID_PARAMETER; } - if (Type != HSA_QUEUE_COMPUTE_AQL) - { + if (Type != HSA_QUEUE_COMPUTE_AQL) { QueueResource->QueueRptrValue = (uintptr_t)&q->rptr; QueueResource->QueueWptrValue = (uintptr_t)&q->wptr; } @@ -545,8 +544,7 @@ hsaKmtCreateQueue( err = kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args); - if (err == -1) - { + if (err == -1) { free_queue(q); return HSAKMT_STATUS_ERROR; } @@ -558,14 +556,16 @@ hsaKmtCreateQueue( * doorbell page is included in the doorbell offset * returned by KFD. This allows doorbells to be * allocated per-device, independent of the - * per-process queue ID. */ + * per-process queue ID. + */ doorbell_mmap_offset = args.doorbell_offset & ~(HSAuint64)(doorbells[NodeId].size - 1); doorbell_offset = args.doorbell_offset & (doorbells[NodeId].size - 1); } else { /* On older chips, the doorbell offset within the - * doorbell page is based on the queue ID. */ + * doorbell page is based on the queue ID. + */ doorbell_mmap_offset = args.doorbell_offset; doorbell_offset = q->queue_id * dev_info->doorbell_size; } @@ -585,28 +585,24 @@ hsaKmtCreateQueue( } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtUpdateQueue( - HSA_QUEUEID QueueId, //IN - HSAuint32 QueuePercentage,//IN - HSA_QUEUE_PRIORITY Priority, //IN - void* QueueAddress, //IN - HSAuint64 QueueSize, //IN - HsaEvent* Event //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId, + HSAuint32 QueuePercentage, + HSA_QUEUE_PRIORITY Priority, + void *QueueAddress, + HSAuint64 QueueSize, + HsaEvent *Event) { struct kfd_ioctl_update_queue_args arg; struct queue *q = PORT_UINT64_TO_VPTR(QueueId); CHECK_KFD_OPEN(); - if (Priority < HSA_QUEUE_PRIORITY_MINIMUM || + if (Priority < HSA_QUEUE_PRIORITY_MINIMUM || Priority > HSA_QUEUE_PRIORITY_MAXIMUM) return HSAKMT_STATUS_INVALID_PARAMETER; - if (q == NULL) - return (HSAKMT_STATUS_INVALID_PARAMETER); + if (!q) + return HSAKMT_STATUS_INVALID_PARAMETER; arg.queue_id = (HSAuint32)q->queue_id; arg.ring_base_address = (uintptr_t)QueueAddress; arg.ring_size = QueueSize; @@ -614,27 +610,22 @@ hsaKmtUpdateQueue( arg.queue_priority = priority_map[Priority+3]; int err = kmtIoctl(kfd_fd, AMDKFD_IOC_UPDATE_QUEUE, &arg); + if (err == -1) - { return HSAKMT_STATUS_ERROR; - } return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtDestroyQueue( - HSA_QUEUEID QueueId //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId) { CHECK_KFD_OPEN(); struct queue *q = PORT_UINT64_TO_VPTR(QueueId); struct kfd_ioctl_destroy_queue_args args; - if (q == NULL) - return (HSAKMT_STATUS_INVALID_PARAMETER); + if (!q) + return HSAKMT_STATUS_INVALID_PARAMETER; memset(&args, 0, sizeof(args)); @@ -643,30 +634,22 @@ hsaKmtDestroyQueue( int err = kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_QUEUE, &args); if (err == -1) - { return HSAKMT_STATUS_ERROR; - } - else - { - free_queue(q); - return HSAKMT_STATUS_SUCCESS; - } + + free_queue(q); + return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtSetQueueCUMask( - HSA_QUEUEID QueueId, //IN - HSAuint32 CUMaskCount, //IN - HSAuint32* QueueCUMask //IN - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId, + HSAuint32 CUMaskCount, + HSAuint32 *QueueCUMask) { struct queue *q = PORT_UINT64_TO_VPTR(QueueId); struct kfd_ioctl_set_cu_mask_args args; CHECK_KFD_OPEN(); - if (CUMaskCount == 0 || QueueCUMask == NULL || ((CUMaskCount % 32) != 0)) + if (CUMaskCount == 0 || !QueueCUMask || ((CUMaskCount % 32) != 0)) return HSAKMT_STATUS_INVALID_PARAMETER; memset(&args, 0, sizeof(args)); @@ -675,23 +658,18 @@ hsaKmtSetQueueCUMask( args.cu_mask_ptr = (uintptr_t)QueueCUMask; int err = kmtIoctl(kfd_fd, AMDKFD_IOC_SET_CU_MASK, &args); + if (err == -1) - { return HSAKMT_STATUS_ERROR; - } return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtSetTrapHandler( - HSAuint32 Node, - void *TrapHandlerBaseAddress, - HSAuint64 TrapHandlerSizeInBytes, - void *TrapBufferBaseAddress, - HSAuint64 TrapBufferSizeInBytes -) +HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandler(HSAuint32 Node, + void *TrapHandlerBaseAddress, + HSAuint64 TrapHandlerSizeInBytes, + void *TrapBufferBaseAddress, + HSAuint64 TrapBufferSizeInBytes) { struct kfd_ioctl_set_trap_handler_args args; HSAKMT_STATUS result; diff --git a/projects/rocr-runtime/src/time.c b/projects/rocr-runtime/src/time.c index 45709f9bd6..56e5484850 100644 --- a/projects/rocr-runtime/src/time.c +++ b/projects/rocr-runtime/src/time.c @@ -26,12 +26,8 @@ #include "libhsakmt.h" #include "linux/kfd_ioctl.h" -HSAKMT_STATUS -HSAKMTAPI -hsaKmtGetClockCounters( - HSAuint32 NodeId, //IN - HsaClockCounters* Counters //OUT - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId, + HsaClockCounters *Counters) { HSAKMT_STATUS result; uint32_t gpu_id; diff --git a/projects/rocr-runtime/src/topology.c b/projects/rocr-runtime/src/topology.c index 4c0ce1a0b0..1f36a1cac5 100644 --- a/projects/rocr-runtime/src/topology.c +++ b/projects/rocr-runtime/src/topology.c @@ -81,7 +81,7 @@ static struct hsa_gfxip_table { unsigned char minor; // GFXIP Minor engine version unsigned char stepping; // GFXIP Stepping info unsigned char is_dgpu; // Predicate for dGPU devices - const char* amd_name; // CALName of the device + const char *amd_name; // CALName of the device enum asic_family_type asic_family; } gfxip_lookup_table[] = { /* Kaveri Family */ @@ -226,7 +226,7 @@ free_node(node_t *n) { assert(n); - if (n == NULL) + if (!n) return; if ((n)->mem) @@ -240,6 +240,7 @@ free_node(node_t *n) static void free_nodes(node_t *temp_nodes, int size) { int i; + if (temp_nodes) { for (i = 0; i < size; i++) free_node(&temp_nodes[i]); @@ -261,7 +262,7 @@ static int num_subdirs(char *dirpath, char *prefix) int prefix_len = strlen(prefix); dirp = opendir(dirpath); - if(dirp) { + if (dirp) { while ((dir = readdir(dirp)) != 0) { if ((strcmp(dir->d_name, ".") == 0) || (strcmp(dir->d_name, "..") == 0)) @@ -282,8 +283,8 @@ static int num_subdirs(char *dirpath, char *prefix) * to the EAX, EBX, ECX, and EDX registers, as determined by input entered in * EAX (in some cases, ECX as well). */ -static inline void -cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, + uint32_t *edx) { __asm__ __volatile__( "cpuid;" @@ -309,6 +310,7 @@ static void cpuid_count(uint32_t op, int count, uint32_t *eax, uint32_t *ebx, static int lock_to_processor(int processor) { cpu_set_t cpuset; + memset(&cpuset, 0, sizeof(cpu_set_t)); CPU_SET(processor, &cpuset); /* 0: this process */ @@ -377,9 +379,9 @@ static void cpuid_get_cpu_cache_info(uint32_t op, cpu_cacheinfo_t *cpu_ci) eax.split.num_threads_sharing + 1; this_leaf->hsa_cache_prop.CacheLevel = eax.split.level; this_leaf->hsa_cache_prop.CacheType.ui32.CPU = 1; - if (eax.split.type & CACHE_TYPE_DATA ) + if (eax.split.type & CACHE_TYPE_DATA) this_leaf->hsa_cache_prop.CacheType.ui32.Data = 1; - if (eax.split.type & CACHE_TYPE_INST ) + if (eax.split.type & CACHE_TYPE_INST) this_leaf->hsa_cache_prop.CacheType.ui32.Instruction = 1; this_leaf->hsa_cache_prop.CacheLineSize = ebx.split.coherency_line_size + 1; @@ -433,8 +435,7 @@ static void find_cpu_cache_siblings(cpu_cacheinfo_t *cpu_ci_list) this_leaf->hsa_cache_prop.SiblingMap[apicid2 - apicid1] = 1; leaf2->hsa_cache_prop.CacheSize = 0; cpu2->num_duplicated_caches++; - } - else { + } else { leaf2->hsa_cache_prop.SiblingMap[0] = 1; leaf2->hsa_cache_prop.SiblingMap[apicid1 - apicid2] = 1; this_leaf->hsa_cache_prop.CacheSize = 0; @@ -446,8 +447,8 @@ static void find_cpu_cache_siblings(cpu_cacheinfo_t *cpu_ci_list) } #endif /* X86 platform */ -static HSAKMT_STATUS -topology_sysfs_get_generation(uint32_t *gen) { +static HSAKMT_STATUS topology_sysfs_get_generation(uint32_t *gen) +{ FILE *fd; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; @@ -465,12 +466,12 @@ err: return ret; } -HSAKMT_STATUS -topology_sysfs_get_system_props(HsaSystemProperties *props) { +HSAKMT_STATUS topology_sysfs_get_system_props(HsaSystemProperties *props) +{ FILE *fd; char *read_buf, *p; char prop_name[256]; - long long unsigned int prop_val; + unsigned long long prop_val; uint32_t prog; int read_size; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; @@ -487,28 +488,26 @@ topology_sysfs_get_system_props(HsaSystemProperties *props) { goto err1; } - read_size = fread(read_buf, 1, PAGE_SIZE, fd); - if (read_size <= 0) { + read_size = fread(read_buf, 1, PAGE_SIZE, fd); + if (read_size <= 0) { ret = HSAKMT_STATUS_ERROR; goto err2; } - /* Since we're using the buffer as a string, we make sure the string terminates */ - if(read_size >= PAGE_SIZE) - read_size = PAGE_SIZE-1; - read_buf[read_size] = 0; + /* Since we're using the buffer as a string, we make sure the string terminates */ + if (read_size >= PAGE_SIZE) + read_size = PAGE_SIZE - 1; + read_buf[read_size] = 0; - /* - * Read the system properties - */ + /* Read the system properties */ prog = 0; p = read_buf; - while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { - if (strcmp(prop_name,"platform_oem") == 0) + while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { + if (strcmp(prop_name, "platform_oem") == 0) props->PlatformOem = (uint32_t)prop_val; - else if (strcmp(prop_name,"platform_id") == 0) + else if (strcmp(prop_name, "platform_id") == 0) props->PlatformId = (uint32_t)prop_val; - else if (strcmp(prop_name,"platform_rev") == 0) + else if (strcmp(prop_name, "platform_rev") == 0) props->PlatformRev = (uint32_t)prop_val; } @@ -526,8 +525,8 @@ err1: return ret; } -HSAKMT_STATUS -topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id) { +HSAKMT_STATUS topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id) +{ FILE *fd; char path[256]; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; @@ -537,21 +536,20 @@ topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id) { fd = fopen(path, "r"); if (!fd) return HSAKMT_STATUS_ERROR; - if (fscanf(fd, "%ul", gpu_id) != 1) { + if (fscanf(fd, "%ul", gpu_id) != 1) ret = HSAKMT_STATUS_ERROR; - } fclose(fd); return ret; } -static const struct hsa_gfxip_table* find_hsa_gfxip_device(uint16_t device_id) +static const struct hsa_gfxip_table *find_hsa_gfxip_device(uint16_t device_id) { uint32_t i, table_size; table_size = sizeof(gfxip_lookup_table)/sizeof(struct hsa_gfxip_table); - for (i=0; iis_dgpu) { is_dgpu = true; return true; } + is_dgpu = false; return false; } -static HSAKMT_STATUS -topology_get_cpu_model_name(HsaNodeProperties *props, bool is_apu) { +static HSAKMT_STATUS topology_get_cpu_model_name(HsaNodeProperties *props, + bool is_apu) +{ FILE *fd; char read_buf[256], cpu_model_name[HSA_PUBLIC_NAME_SIZE]; const char *p; - uint32_t i, apic_id; + uint32_t i = 0, apic_id = 0; if (!props) return HSAKMT_STATUS_INVALID_PARAMETER; @@ -599,7 +599,7 @@ topology_get_cpu_model_name(HsaNodeProperties *props, bool is_apu) { return HSAKMT_STATUS_ERROR; } - while (fgets(read_buf, sizeof(read_buf), fd) != NULL) { + while (fgets(read_buf, sizeof(read_buf), fd)) { /* Get the model name first, in case matching * apic IDs are also present in the file */ @@ -609,7 +609,8 @@ topology_get_cpu_model_name(HsaNodeProperties *props, bool is_apu) { goto err; p++; // remove separator ':' - for (; isspace(*p); p++); /* remove white space */ + for (; isspace(*p); p++) + ; /* remove white space */ /* Extract model name from string */ for (i = 0; i < sizeof(cpu_model_name) - 1 && p[i] != '\n'; i++) @@ -623,7 +624,8 @@ topology_get_cpu_model_name(HsaNodeProperties *props, bool is_apu) { goto err; p++; // remove separator ':' - for (; isspace(*p); p++); /* remove white space */ + for (; isspace(*p); p++) + ; /* remove white space */ /* Extract apic_id from remaining chars */ apic_id = atoi(p); @@ -632,7 +634,7 @@ topology_get_cpu_model_name(HsaNodeProperties *props, bool is_apu) { if (props->CComputeIdLo == apic_id) { /* Retrieve the CAL name of CPU node */ if (!is_apu) - strncpy( (char *) props->AMDName, cpu_model_name, sizeof(props->AMDName)); + strncpy((char *)props->AMDName, cpu_model_name, sizeof(props->AMDName)); /* Convert from UTF8 to UTF16 */ for (i = 0; cpu_model_name[i] != '\0' && i < HSA_PUBLIC_NAME_SIZE - 1; i++) props->MarketingName[i] = cpu_model_name[i]; @@ -650,6 +652,7 @@ err: static int topology_search_processor_vendor(const char *processor_name) { unsigned int i; + for (i = 0; i < ARRAY_LEN(supported_processor_vendor_name); i++) { if (!strcmp(processor_name, supported_processor_vendor_name[i])) return i; @@ -678,11 +681,12 @@ static void topology_set_processor_vendor(void) return; } - while (fgets(read_buf, sizeof(read_buf), fd) != NULL) { + while (fgets(read_buf, sizeof(read_buf), fd)) { if (!strncmp("vendor_id", read_buf, sizeof("vendor_id") - 1)) { p = strrchr(read_buf, ':'); - p++; // remove separor ':' - for (; isspace(*p); p++); /* remove white space */ + p++; // remove separator ':' + for (; *p && isspace(*p); p++) + ; /* remove white space */ processor_vendor = topology_search_processor_vendor(p); if (processor_vendor != -1) { fclose(fd); @@ -696,21 +700,22 @@ static void topology_set_processor_vendor(void) processor_vendor = GENUINE_INTEL; } -HSAKMT_STATUS -topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, uint32_t *gpu_id, - struct pci_access* pacc ) +HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id, + HsaNodeProperties *props, + uint32_t *gpu_id, + struct pci_access *pacc) { FILE *fd; char *read_buf, *p; char prop_name[256]; char path[256]; - long long unsigned int prop_val; + unsigned long long prop_val; uint32_t i, prog; uint16_t fw_version = 0; int read_size; - const struct hsa_gfxip_table* hsa_gfxip; + const struct hsa_gfxip_table *hsa_gfxip; char namebuf[HSA_PUBLIC_NAME_SIZE]; - const char* name; + const char *name; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; @@ -728,7 +733,7 @@ topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, uint32 fd = fopen(path, "r"); if (!fd) { free(read_buf); - return HSAKMT_STATUS_ERROR; + return HSAKMT_STATUS_ERROR; } read_size = fread(read_buf, 1, PAGE_SIZE, fd); @@ -738,63 +743,61 @@ topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, uint32 } /* Since we're using the buffer as a string, we make sure the string terminates */ - if(read_size >= PAGE_SIZE) - read_size = PAGE_SIZE-1; + if (read_size >= PAGE_SIZE) + read_size = PAGE_SIZE - 1; read_buf[read_size] = 0; - /* - * Read the node properties - */ + /* Read the node properties */ prog = 0; p = read_buf; - while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { - if (strcmp(prop_name,"cpu_cores_count") == 0) + while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { + if (strcmp(prop_name, "cpu_cores_count") == 0) props->NumCPUCores = (uint32_t)prop_val; - else if (strcmp(prop_name,"simd_count") == 0) + else if (strcmp(prop_name, "simd_count") == 0) props->NumFComputeCores = (uint32_t)prop_val; - else if (strcmp(prop_name,"mem_banks_count") == 0) + else if (strcmp(prop_name, "mem_banks_count") == 0) props->NumMemoryBanks = (uint32_t)prop_val; - else if (strcmp(prop_name,"caches_count") == 0) + else if (strcmp(prop_name, "caches_count") == 0) props->NumCaches = (uint32_t)prop_val; - else if (strcmp(prop_name,"io_links_count") == 0) + else if (strcmp(prop_name, "io_links_count") == 0) props->NumIOLinks = (uint32_t)prop_val; - else if (strcmp(prop_name,"cpu_core_id_base") == 0) + else if (strcmp(prop_name, "cpu_core_id_base") == 0) props->CComputeIdLo = (uint32_t)prop_val; - else if (strcmp(prop_name,"simd_id_base") == 0) + else if (strcmp(prop_name, "simd_id_base") == 0) props->FComputeIdLo = (uint32_t)prop_val; - else if (strcmp(prop_name,"capability") == 0) + else if (strcmp(prop_name, "capability") == 0) props->Capability.Value = (uint32_t)prop_val; - else if (strcmp(prop_name,"max_waves_per_simd") == 0) + else if (strcmp(prop_name, "max_waves_per_simd") == 0) props->MaxWavesPerSIMD = (uint32_t)prop_val; - else if (strcmp(prop_name,"lds_size_in_kb") == 0) + else if (strcmp(prop_name, "lds_size_in_kb") == 0) props->LDSSizeInKB = (uint32_t)prop_val; - else if (strcmp(prop_name,"gds_size_in_kb") == 0) + else if (strcmp(prop_name, "gds_size_in_kb") == 0) props->GDSSizeInKB = (uint32_t)prop_val; - else if (strcmp(prop_name,"wave_front_size") == 0) + else if (strcmp(prop_name, "wave_front_size") == 0) props->WaveFrontSize = (uint32_t)prop_val; - else if (strcmp(prop_name,"array_count") == 0) + else if (strcmp(prop_name, "array_count") == 0) props->NumShaderBanks = (uint32_t)prop_val; - else if (strcmp(prop_name,"simd_arrays_per_engine") == 0) + else if (strcmp(prop_name, "simd_arrays_per_engine") == 0) props->NumArrays = (uint32_t)prop_val; - else if (strcmp(prop_name,"cu_per_simd_array") == 0) + else if (strcmp(prop_name, "cu_per_simd_array") == 0) props->NumCUPerArray = (uint32_t)prop_val; - else if (strcmp(prop_name,"simd_per_cu") == 0) + else if (strcmp(prop_name, "simd_per_cu") == 0) props->NumSIMDPerCU = (uint32_t)prop_val; - else if (strcmp(prop_name,"max_slots_scratch_cu") == 0) + else if (strcmp(prop_name, "max_slots_scratch_cu") == 0) props->MaxSlotsScratchCU = (uint32_t)prop_val; - else if (strcmp(prop_name,"fw_version") == 0) + else if (strcmp(prop_name, "fw_version") == 0) fw_version = (uint16_t)prop_val; - else if (strcmp(prop_name,"vendor_id") == 0) + else if (strcmp(prop_name, "vendor_id") == 0) props->VendorId = (uint32_t)prop_val; - else if (strcmp(prop_name,"device_id") == 0) + else if (strcmp(prop_name, "device_id") == 0) props->DeviceId = (uint32_t)prop_val; - else if (strcmp(prop_name,"location_id") == 0) + else if (strcmp(prop_name, "location_id") == 0) props->LocationId = (uint32_t)prop_val; - else if (strcmp(prop_name,"max_engine_clk_fcompute") == 0) + else if (strcmp(prop_name, "max_engine_clk_fcompute") == 0) props->MaxEngineClockMhzFCompute = (uint32_t)prop_val; - else if (strcmp(prop_name,"max_engine_clk_ccompute") == 0) + else if (strcmp(prop_name, "max_engine_clk_ccompute") == 0) props->MaxEngineClockMhzCCompute = (uint32_t)prop_val; - else if (strcmp(prop_name,"local_mem_size") == 0) + else if (strcmp(prop_name, "local_mem_size") == 0) props->LocalMemSize = prop_val; } @@ -817,12 +820,11 @@ topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, uint32 } /* Retrieve the CAL name of the node */ - strncpy( (char *) props->AMDName, hsa_gfxip->amd_name, sizeof(props->AMDName) ); + strncpy((char *)props->AMDName, hsa_gfxip->amd_name, sizeof(props->AMDName)); if (props->NumCPUCores) { /* Is APU node */ ret = topology_get_cpu_model_name(props, true); - if (ret != HSAKMT_STATUS_SUCCESS) - { + if (ret != HSAKMT_STATUS_SUCCESS) { printf("Failed to get APU Model Name from %s\n", PROC_CPUINFO_PATH); ret = HSAKMT_STATUS_SUCCESS; /* No hard error, continue regardless */ } @@ -841,8 +843,7 @@ topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, uint32 /* Is CPU Node */ if (!props->NumFComputeCores || !props->DeviceId) { ret = topology_get_cpu_model_name(props, false); - if (ret != HSAKMT_STATUS_SUCCESS) - { + if (ret != HSAKMT_STATUS_SUCCESS) { printf("Failed to get CPU Model Name from %s\n", PROC_CPUINFO_PATH); ret = HSAKMT_STATUS_SUCCESS; /* No hard error, continue regardless */ } @@ -860,52 +861,53 @@ err: return ret; } -static HSAKMT_STATUS -topology_sysfs_get_mem_props(uint32_t node_id, uint32_t mem_id, HsaMemoryProperties *props) { +static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id, + uint32_t mem_id, + HsaMemoryProperties *props) +{ FILE *fd; char *read_buf, *p; char prop_name[256]; char path[256]; - long long unsigned int prop_val; + unsigned long long prop_val; uint32_t prog; - int read_size; + int read_size; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; assert(props); snprintf(path, 256, "%s/%d/mem_banks/%d/properties", KFD_SYSFS_PATH_NODES, node_id, mem_id); fd = fopen(path, "r"); - if (!fd) { + if (!fd) return HSAKMT_STATUS_ERROR; - } read_buf = malloc(PAGE_SIZE); if (!read_buf) { ret = HSAKMT_STATUS_NO_MEMORY; goto err1; } - read_size = fread(read_buf, 1, PAGE_SIZE, fd); - if (read_size <= 0) { + read_size = fread(read_buf, 1, PAGE_SIZE, fd); + if (read_size <= 0) { ret = HSAKMT_STATUS_ERROR; goto err2; } - /* Since we're using the buffer as a string, we make sure the string terminates */ - if(read_size >= PAGE_SIZE) - read_size = PAGE_SIZE-1; - read_buf[read_size] = 0; + /* Since we're using the buffer as a string, we make sure the string terminates */ + if (read_size >= PAGE_SIZE) + read_size = PAGE_SIZE - 1; + read_buf[read_size] = 0; prog = 0; p = read_buf; - while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { - if (strcmp(prop_name,"heap_type") == 0) + while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { + if (strcmp(prop_name, "heap_type") == 0) props->HeapType = (uint32_t)prop_val; - else if (strcmp(prop_name,"size_in_bytes") == 0) + else if (strcmp(prop_name, "size_in_bytes") == 0) props->SizeInBytes = (uint64_t)prop_val; - else if (strcmp(prop_name,"flags") == 0) + else if (strcmp(prop_name, "flags") == 0) props->Flags.MemoryProperty = (uint32_t)prop_val; - else if (strcmp(prop_name,"width") == 0) + else if (strcmp(prop_name, "width") == 0) props->Width = (uint32_t)prop_val; - else if (strcmp(prop_name,"mem_clk_max") == 0) + else if (strcmp(prop_name, "mem_clk_max") == 0) props->MemoryClockMax = (uint32_t)prop_val; } @@ -946,8 +948,7 @@ static void topology_destroy_temp_cpu_cache_list(void *temp_cpu_ci_list) * @temp_cpu_ci_list - [OUT] temporary cpu-cache-info list to store data * Return - HSAKMT_STATUS_SUCCESS in success or error number in failure */ -static HSAKMT_STATUS -topology_create_temp_cpu_cache_list(void **temp_cpu_ci_list) +static HSAKMT_STATUS topology_create_temp_cpu_cache_list(void **temp_cpu_ci_list) { HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; void *p_temp_cpu_ci_list; @@ -1032,8 +1033,8 @@ exit: * @cpu_ci_list - the cpu cache information list to look up cache info * Return - HSAKMT_STATUS_SUCCESS in success or error number in failure */ -static HSAKMT_STATUS -topology_get_cpu_cache_props(node_t *tbl, cpu_cacheinfo_t *cpu_ci_list) +static HSAKMT_STATUS topology_get_cpu_cache_props(node_t *tbl, + cpu_cacheinfo_t *cpu_ci_list) { HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; uint32_t apicid_low = tbl->node.CComputeIdLo, apicid_max = 0; @@ -1089,71 +1090,71 @@ static void topology_destroy_temp_cpu_cache_list(void *temp_cpu_ci_list) { } -static HSAKMT_STATUS -topology_create_temp_cpu_cache_list(void **temp_cpu_ci_list) +static HSAKMT_STATUS topology_create_temp_cpu_cache_list(void **temp_cpu_ci_list) { return HSAKMT_STATUS_SUCCESS; } -static HSAKMT_STATUS -topology_get_cpu_cache_props(node_t *tbl, cpu_cacheinfo_t *cpu_ci_list) +static HSAKMT_STATUS topology_get_cpu_cache_props(node_t *tbl, + cpu_cacheinfo_t *cpu_ci_list) { return HSAKMT_STATUS_SUCCESS; } #endif -static HSAKMT_STATUS -topology_sysfs_get_cache_props(uint32_t node_id, uint32_t cache_id, HsaCacheProperties *props) { +static HSAKMT_STATUS topology_sysfs_get_cache_props(uint32_t node_id, + uint32_t cache_id, + HsaCacheProperties *props) +{ FILE *fd; char *read_buf, *p; char prop_name[256]; char path[256]; - long long unsigned int prop_val; + unsigned long long prop_val; uint32_t i, prog; - int read_size; + int read_size; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; assert(props); snprintf(path, 256, "%s/%d/caches/%d/properties", KFD_SYSFS_PATH_NODES, node_id, cache_id); fd = fopen(path, "r"); - if (!fd) { + if (!fd) return HSAKMT_STATUS_ERROR; - } read_buf = malloc(PAGE_SIZE); if (!read_buf) { ret = HSAKMT_STATUS_NO_MEMORY; goto err1; } - read_size = fread(read_buf, 1, PAGE_SIZE, fd); - if (read_size <= 0) { + read_size = fread(read_buf, 1, PAGE_SIZE, fd); + if (read_size <= 0) { ret = HSAKMT_STATUS_ERROR; goto err2; } - /* Since we're using the buffer as a string, we make sure the string terminates */ - if(read_size >= PAGE_SIZE) - read_size = PAGE_SIZE-1; - read_buf[read_size] = 0; + /* Since we're using the buffer as a string, we make sure the string terminates */ + if (read_size >= PAGE_SIZE) + read_size = PAGE_SIZE - 1; + read_buf[read_size] = 0; prog = 0; p = read_buf; - while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { - if (strcmp(prop_name,"processor_id_low") == 0) + while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { + if (strcmp(prop_name, "processor_id_low") == 0) props->ProcessorIdLow = (uint32_t)prop_val; - else if (strcmp(prop_name,"level") == 0) + else if (strcmp(prop_name, "level") == 0) props->CacheLevel = (uint32_t)prop_val; - else if (strcmp(prop_name,"size") == 0) + else if (strcmp(prop_name, "size") == 0) props->CacheSize = (uint32_t)prop_val; - else if (strcmp(prop_name,"cache_line_size") == 0) + else if (strcmp(prop_name, "cache_line_size") == 0) props->CacheLineSize = (uint32_t)prop_val; - else if (strcmp(prop_name,"cache_lines_per_tag") == 0) + else if (strcmp(prop_name, "cache_lines_per_tag") == 0) props->CacheLinesPerTag = (uint32_t)prop_val; - else if (strcmp(prop_name,"association") == 0) + else if (strcmp(prop_name, "association") == 0) props->CacheAssociativity = (uint32_t)prop_val; - else if (strcmp(prop_name,"latency") == 0) + else if (strcmp(prop_name, "latency") == 0) props->CacheLatency = (uint32_t)prop_val; - else if (strcmp(prop_name,"type") == 0) + else if (strcmp(prop_name, "type") == 0) props->CacheType.Value = (uint32_t)prop_val; else if (strcmp(prop_name, "sibling_map") == 0) break; @@ -1163,8 +1164,8 @@ topology_sysfs_get_cache_props(uint32_t node_id, uint32_t cache_id, HsaCacheProp if ((sscanf(p, "sibling_map %n", &prog)) == 0 && prog) { i = 0; while ((i < HSA_CPU_SIBLINGS) && - (sscanf(p+=prog, "%u%*[,\n]%n", &props->SiblingMap[i++], - &prog) == 1)); + (sscanf(p += prog, "%u%*[,\n]%n", &props->SiblingMap[i++], &prog) == 1)) + continue; } err2: @@ -1174,66 +1175,67 @@ err1: return ret; } -static HSAKMT_STATUS -topology_sysfs_get_iolink_props(uint32_t node_id, uint32_t iolink_id, HsaIoLinkProperties *props) { +static HSAKMT_STATUS topology_sysfs_get_iolink_props(uint32_t node_id, + uint32_t iolink_id, + HsaIoLinkProperties *props) +{ FILE *fd; char *read_buf, *p; char prop_name[256]; char path[256]; - long long unsigned int prop_val; + unsigned long long prop_val; uint32_t prog; - int read_size; + int read_size; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; assert(props); snprintf(path, 256, "%s/%d/io_links/%d/properties", KFD_SYSFS_PATH_NODES, node_id, iolink_id); fd = fopen(path, "r"); - if (!fd) { + if (!fd) return HSAKMT_STATUS_ERROR; - } read_buf = malloc(PAGE_SIZE); if (!read_buf) { ret = HSAKMT_STATUS_NO_MEMORY; goto err1; } - read_size = fread(read_buf, 1, PAGE_SIZE, fd); - if (read_size <= 0) { + read_size = fread(read_buf, 1, PAGE_SIZE, fd); + if (read_size <= 0) { ret = HSAKMT_STATUS_ERROR; goto err2; } - /* Since we're using the buffer as a string, we make sure the string terminates */ - if(read_size >= PAGE_SIZE) - read_size = PAGE_SIZE-1; - read_buf[read_size] = 0; + /* Since we're using the buffer as a string, we make sure the string terminates */ + if (read_size >= PAGE_SIZE) + read_size = PAGE_SIZE - 1; + read_buf[read_size] = 0; prog = 0; p = read_buf; - while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { - if (strcmp(prop_name,"type") == 0) + while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { + if (strcmp(prop_name, "type") == 0) props->IoLinkType = (uint32_t)prop_val; - else if (strcmp(prop_name,"version_major") == 0) + else if (strcmp(prop_name, "version_major") == 0) props->VersionMajor = (uint32_t)prop_val; - else if (strcmp(prop_name,"version_minor") == 0) + else if (strcmp(prop_name, "version_minor") == 0) props->VersionMinor = (uint32_t)prop_val; - else if (strcmp(prop_name,"node_from") == 0) + else if (strcmp(prop_name, "node_from") == 0) props->NodeFrom = (uint32_t)prop_val; - else if (strcmp(prop_name,"node_to") == 0) + else if (strcmp(prop_name, "node_to") == 0) props->NodeTo = (uint32_t)prop_val; - else if (strcmp(prop_name,"weight") == 0) + else if (strcmp(prop_name, "weight") == 0) props->Weight = (uint32_t)prop_val; - else if (strcmp(prop_name,"min_latency") == 0) + else if (strcmp(prop_name, "min_latency") == 0) props->MinimumLatency = (uint32_t)prop_val; - else if (strcmp(prop_name,"max_latency") == 0) + else if (strcmp(prop_name, "max_latency") == 0) props->MaximumLatency = (uint32_t)prop_val; - else if (strcmp(prop_name,"min_bandwidth") == 0) + else if (strcmp(prop_name, "min_bandwidth") == 0) props->MinimumBandwidth = (uint32_t)prop_val; - else if (strcmp(prop_name,"max_bandwidth") == 0) + else if (strcmp(prop_name, "max_bandwidth") == 0) props->MaximumBandwidth = (uint32_t)prop_val; - else if (strcmp(prop_name,"recommended_transfer_size") == 0) + else if (strcmp(prop_name, "recommended_transfer_size") == 0) props->RecTransferSize = (uint32_t)prop_val; - else if (strcmp(prop_name,"flags") == 0) + else if (strcmp(prop_name, "flags") == 0) props->Flags.LinkProperty = (uint32_t)prop_val; } @@ -1246,11 +1248,11 @@ err1: } /* topology_get_free_io_link_slot_for_node - For the given node_id, find the - * next available free slot to add an io_link + * next available free slot to add an io_link */ -static HsaIoLinkProperties * topology_get_free_io_link_slot_for_node( - uint32_t node_id, const HsaSystemProperties *sys_props, - node_t *nodes) +static HsaIoLinkProperties *topology_get_free_io_link_slot_for_node(uint32_t node_id, + const HsaSystemProperties *sys_props, + node_t *nodes) { HsaIoLinkProperties *props; @@ -1275,14 +1277,16 @@ static HsaIoLinkProperties * topology_get_free_io_link_slot_for_node( } /* topology_add_io_link_for_node - If a free slot is available, - * add io_link for the given Node. If bi_directional is true, set up two - * links for both directions. - * TODO: Add other members of HsaIoLinkProperties + * add io_link for the given Node. If bi_directional is true, set up two + * links for both directions. + * TODO: Add other members of HsaIoLinkProperties */ static HSAKMT_STATUS topology_add_io_link_for_node(uint32_t node_id, - const HsaSystemProperties *sys_props, node_t *nodes, - HSA_IOLINKTYPE IoLinkType, uint32_t NodeTo, - uint32_t Weight, bool bi_dir) + const HsaSystemProperties *sys_props, + node_t *nodes, + HSA_IOLINKTYPE IoLinkType, + uint32_t NodeTo, + uint32_t Weight, bool bi_dir) { HsaIoLinkProperties *props; /* If bi-directional is set true, it's two links to add. */ @@ -1330,7 +1334,8 @@ static int32_t gpu_get_direct_link_cpu(uint32_t gpu_node, node_t *nodes) * been created in the kernel. */ static HSAKMT_STATUS get_direct_iolink_info(uint32_t node1, uint32_t node2, - node_t *nodes, HSAuint32 *weight, HSA_IOLINKTYPE *type) + node_t *nodes, HSAuint32 *weight, + HSA_IOLINKTYPE *type) { HsaIoLinkProperties *props = nodes[node1].link; uint32_t i; @@ -1351,7 +1356,8 @@ static HSAKMT_STATUS get_direct_iolink_info(uint32_t node1, uint32_t node2, } static HSAKMT_STATUS get_indirect_iolink_info(uint32_t node1, uint32_t node2, - node_t *nodes, HSAuint32 *weight, HSA_IOLINKTYPE *type) + node_t *nodes, HSAuint32 *weight, + HSA_IOLINKTYPE *type) { int32_t dir_cpu1 = -1, dir_cpu2 = -1; HSAuint32 weight1 = 0, weight2 = 0, weight3 = 0; @@ -1389,7 +1395,7 @@ static HSAKMT_STATUS get_indirect_iolink_info(uint32_t node1, uint32_t node2, if (ret != HSAKMT_STATUS_SUCCESS) return ret; ret = get_direct_iolink_info(dir_cpu1, node2, - nodes, &weight2, type); + nodes, &weight2, type); } else /* GPU->CPU->CPU->GPU*/ { ret = get_direct_iolink_info(node1, dir_cpu1, nodes, &weight1, NULL); @@ -1434,8 +1440,8 @@ static HSAKMT_STATUS get_indirect_iolink_info(uint32_t node1, uint32_t node2, return HSAKMT_STATUS_SUCCESS; } -static void topology_create_indirect_gpu_links( - const HsaSystemProperties *sys_props, node_t *nodes) +static void topology_create_indirect_gpu_links(const HsaSystemProperties *sys_props, + node_t *nodes) { uint32_t i, j; @@ -1455,8 +1461,7 @@ static void topology_create_indirect_gpu_links( } } -HSAKMT_STATUS -topology_take_snapshot(void) +HSAKMT_STATUS topology_take_snapshot(void) { uint32_t gen_start, gen_end, i, mem_id, cache_id, link_id; HsaSystemProperties sys_props; @@ -1473,9 +1478,9 @@ retry: ret = topology_sysfs_get_system_props(&sys_props); if (ret != HSAKMT_STATUS_SUCCESS) return ret; - if(sys_props.NumNodes > 0) { + if (sys_props.NumNodes > 0) { topology_create_temp_cpu_cache_list(&cpu_ci_list); - temp_nodes = calloc(sys_props.NumNodes * sizeof(node_t),1); + temp_nodes = calloc(sys_props.NumNodes * sizeof(node_t), 1); if (!temp_nodes) return HSAKMT_STATUS_NO_MEMORY; pacc = pci_alloc(); @@ -1518,8 +1523,7 @@ retry: goto err; } } - } - else if (!temp_nodes[i].gpu_id) { /* a CPU node */ + } else if (!temp_nodes[i].gpu_id) { /* a CPU node */ ret = topology_get_cpu_cache_props( &temp_nodes[i], cpu_ci_list); if (ret != HSAKMT_STATUS_SUCCESS) { @@ -1529,7 +1533,8 @@ retry: } /* To simplify, allocate maximum needed memory for io_links for each node. This - * removes the need for realloc when indirect and QPI links are added later */ + * removes the need for realloc when indirect and QPI links are added later + */ temp_nodes[i].link = calloc(sys_props.NumNodes - 1, sizeof(HsaIoLinkProperties)); if (!temp_nodes[i].link) { ret = HSAKMT_STATUS_NO_MEMORY; @@ -1585,12 +1590,8 @@ err: return ret; } -/* - * Drop the Snashot of the HSA topology information. - * Assume lock is held. - */ -HSAKMT_STATUS -topology_drop_snapshot(void) +/* Drop the Snashot of the HSA topology information. Assume lock is held. */ +HSAKMT_STATUS topology_drop_snapshot(void) { HSAKMT_STATUS err; @@ -1614,10 +1615,9 @@ out: return err; } -HSAKMT_STATUS -validate_nodeid(uint32_t nodeid, uint32_t *gpu_id) +HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id) { - if (!node || !_system || _system->NumNodes <= nodeid) + if (!node || !_system || _system->NumNodes <= nodeid) return HSAKMT_STATUS_INVALID_NODE_UNIT; if (gpu_id) *gpu_id = node[nodeid].gpu_id; @@ -1625,11 +1625,12 @@ validate_nodeid(uint32_t nodeid, uint32_t *gpu_id) return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS -gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id){ +HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t *node_id) +{ uint64_t node_idx; - for(node_idx = 0; node_idx < _system->NumNodes; node_idx++){ - if (node[node_idx].gpu_id == gpu_id){ + + for (node_idx = 0; node_idx < _system->NumNodes; node_idx++) { + if (node[node_idx].gpu_id == gpu_id) { *node_id = node_idx; return HSAKMT_STATUS_SUCCESS; } @@ -1639,17 +1640,14 @@ gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id){ } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtAcquireSystemProperties( - HsaSystemProperties* SystemProperties //OUT - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties) { HSAKMT_STATUS err; + CHECK_KFD_OPEN(); if (!SystemProperties) - return HSAKMT_STATUS_INVALID_PARAMETER; + return HSAKMT_STATUS_INVALID_PARAMETER; pthread_mutex_lock(&hsakmt_mutex); @@ -1667,9 +1665,7 @@ out: return err; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtReleaseSystemProperties(void) +HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemProperties(void) { CHECK_KFD_OPEN(); @@ -1684,12 +1680,8 @@ hsaKmtReleaseSystemProperties(void) return err; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtGetNodeProperties( - HSAuint32 NodeId, //IN - HsaNodeProperties* NodeProperties //OUT - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId, + HsaNodeProperties *NodeProperties) { HSAKMT_STATUS err; uint32_t gpu_id; @@ -1701,7 +1693,7 @@ hsaKmtGetNodeProperties( pthread_mutex_lock(&hsakmt_mutex); /* KFD ADD page 18, snapshot protocol violation */ - if (_system == NULL) { + if (!_system) { err = HSAKMT_STATUS_INVALID_NODE_UNIT; assert(_system); goto out; @@ -1731,13 +1723,9 @@ out: return err; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtGetNodeMemoryProperties( - HSAuint32 NodeId, //IN - HSAuint32 NumBanks, //IN - HsaMemoryProperties* MemoryProperties //OUT - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, + HSAuint32 NumBanks, + HsaMemoryProperties *MemoryProperties) { HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS; uint32_t i, gpu_id; @@ -1751,14 +1739,14 @@ hsaKmtGetNodeMemoryProperties( pthread_mutex_lock(&hsakmt_mutex); /* KFD ADD page 18, snapshot protocol violation */ - if (_system == NULL) { + if (!_system) { err = HSAKMT_STATUS_INVALID_NODE_UNIT; assert(_system); goto out; } /* Check still necessary */ - if (NodeId >= _system->NumNodes ) { + if (NodeId >= _system->NumNodes) { err = HSAKMT_STATUS_INVALID_PARAMETER; goto out; } @@ -1790,7 +1778,9 @@ hsaKmtGetNodeMemoryProperties( } /* Add Local memory - HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE. - * For dGPU the topology node contains Local Memory and it is added by the for loop above */ + * For dGPU the topology node contains Local Memory and it is added by + * the for loop above + */ if (!nodeIsDGPU && i < NumBanks && node[NodeId].node.LocalMemSize > 0 && fmm_get_aperture_base_and_limit(FMM_GPUVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) { @@ -1799,7 +1789,7 @@ hsaKmtGetNodeMemoryProperties( i++; } - /*Add SCRATCH*/ + /* Add SCRATCH */ if (i < NumBanks && fmm_get_aperture_base_and_limit(FMM_SCRATCH, gpu_id, &MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) { @@ -1823,14 +1813,10 @@ out: return err; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtGetNodeCacheProperties( - HSAuint32 NodeId, //IN - HSAuint32 ProcessorId, //IN - HSAuint32 NumCaches, //IN - HsaCacheProperties* CacheProperties //OUT - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCacheProperties(HSAuint32 NodeId, + HSAuint32 ProcessorId, + HSAuint32 NumCaches, + HsaCacheProperties *CacheProperties) { HSAKMT_STATUS err; uint32_t i; @@ -1842,7 +1828,7 @@ hsaKmtGetNodeCacheProperties( pthread_mutex_lock(&hsakmt_mutex); /* KFD ADD page 18, snapshot protocol violation */ - if (_system == NULL) { + if (!_system) { err = HSAKMT_STATUS_INVALID_NODE_UNIT; assert(_system); goto out; @@ -1865,13 +1851,9 @@ out: return err; } -HSAKMT_STATUS -HSAKMTAPI -hsaKmtGetNodeIoLinkProperties( - HSAuint32 NodeId, //IN - HSAuint32 NumIoLinks, //IN - HsaIoLinkProperties* IoLinkProperties //OUT - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId, + HSAuint32 NumIoLinks, + HsaIoLinkProperties *IoLinkProperties) { HSAKMT_STATUS err; uint32_t i; @@ -1884,7 +1866,7 @@ hsaKmtGetNodeIoLinkProperties( pthread_mutex_lock(&hsakmt_mutex); /* KFD ADD page 18, snapshot protocol violation */ - if (_system == NULL) { + if (!_system) { err = HSAKMT_STATUS_INVALID_NODE_UNIT; assert(_system); goto out; @@ -1909,15 +1891,16 @@ out: uint16_t get_device_id_by_node(HSAuint32 node_id) { - if (!node || !_system || _system->NumNodes <= node_id) - return 0; + if (!node || !_system || _system->NumNodes <= node_id) + return 0; - return node[node_id].node.DeviceId; + return node[node_id].node.DeviceId; } uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id) { unsigned int i; + if (!node || !_system) return 0; @@ -1935,14 +1918,14 @@ HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array, HSAKMT_STATUS ret; unsigned int i; - if (NumberOfNodes == 0 || NodeArray == NULL || gpu_id_array == NULL) + if (NumberOfNodes == 0 || !NodeArray || !gpu_id_array) return HSAKMT_STATUS_INVALID_PARAMETER; /* Translate Node IDs to gpu_ids */ *gpu_id_array = malloc(NumberOfNodes * sizeof(uint32_t)); - if (*gpu_id_array == NULL) + if (!(*gpu_id_array)) return HSAKMT_STATUS_NO_MEMORY; - for (i = 0; i < NumberOfNodes; i++) { + for (i = 0; i < NumberOfNodes; i++) { ret = validate_nodeid(NodeArray[i], *gpu_id_array + i); if (ret != HSAKMT_STATUS_SUCCESS) { free(*gpu_id_array); @@ -1950,45 +1933,5 @@ HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array, } } - return ret; -} - -#if 0 -static int get_cpu_stepping(uint16_t* stepping) -{ - int ret; - FILE* fd = fopen("/proc/cpuinfo", "r"); - if (!fd) - return -1; - - char* read_buf = malloc(PAGE_SIZE); - if (!read_buf) { - ret = -1; - goto err1; - } - - int read_size = fread(read_buf, 1, PAGE_SIZE, fd); - if (read_size <= 0) { - ret = -2; - goto err2; - } - - /* Since we're using the buffer as a string, we make sure the string terminates */ - if(read_size >= PAGE_SIZE) - read_size = PAGE_SIZE-1; - read_buf[read_size] = 0; - - *stepping = 0; - - char* p = strstr(read_buf, "stepping"); - if (p) - sscanf(p , "stepping\t: %hu\n", stepping); - -err2: - free(read_buf); -err1: - fclose(fd); - return ret; } -#endif diff --git a/projects/rocr-runtime/src/version.c b/projects/rocr-runtime/src/version.c index 95bfec6523..0b7c7c86e8 100644 --- a/projects/rocr-runtime/src/version.c +++ b/projects/rocr-runtime/src/version.c @@ -28,15 +28,12 @@ #include #include "linux/kfd_ioctl.h" -HSAKMT_STATUS -HSAKMTAPI -hsaKmtGetVersion( - HsaVersionInfo* VersionInfo //OUT - ) +HSAKMT_STATUS HSAKMTAPI hsaKmtGetVersion(HsaVersionInfo *VersionInfo) { CHECK_KFD_OPEN(); struct kfd_ioctl_get_version_args args; + memset(&args, 0, sizeof(args)); if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_VERSION, &args) == -1)