diff --git a/.gitignore b/.gitignore index cb5febb..073010b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ *.smod *.a doc/* +bin/* *fclKernels.cl test/testSummary test/test_outputs* diff --git a/examples/nbody.f90 b/examples/nbody.f90 index 2b8bc78..be63b34 100644 --- a/examples/nbody.f90 +++ b/examples/nbody.f90 @@ -20,11 +20,11 @@ program nbody character(*), parameter :: cl_vendor = 'nvidia,amd,intel' ! Vendors for which to create OpenCL context in order of preference ! ---------Program variables --------- -integer :: i, nBlock +integer :: i integer(c_size_t) :: kern1T, kern2T real :: Tavg, perf character(:), allocatable :: kernelSrc ! Kernel source string -type(fclDevice), allocatable :: devices(:) ! List of focal devices +type(fclDevice) :: device ! OpenCL device on which to run type(fclProgram) :: prog ! Focal program object type(fclKernel) :: kern1, kern2 ! Focal kernel object type(fclEvent) :: e @@ -40,32 +40,29 @@ program nbody write(*,*) ('-',i=1,72) write(*,*) -! Create context with nvidia platform -call fclSetDefaultContext(fclCreateContext(vendor=cl_vendor)) +! Initialise OpenCL context and select device with most cores +device = fclInit(vendor=cl_vendor,sortBy='cores') -! Select device with most cores and create command queue -devices = fclFindDevices(sortBy='cores') -call fclSetDefaultCommandQ(fclCreateCommandQ(devices(1),enableProfiling=.true., & +call fclSetDefaultCommandQ(fclCreateCommandQ(device,enableProfiling=.true., & outOfOrderExec=.true.,blockingWrite=.false.)) -write(*,*) ' Created OpenCL command queue on device: "',devices(1)%name,'"' -write(*,'(A,I6,A,I6,A,I4,A,A,A)') ' (', devices(1)%nComputeUnits,' cores, ', & - devices(1)%global_memory/1024/1024,'MB, ', & - devices(1)%clock_freq, 'MHz, ',& - devices(1)%version,')' +write(*,*) ' Created OpenCL command queue on device: "',device%name,'"' +write(*,'(A,I6,A,I6,A,I4,A,A,A)') ' (', device%nComputeUnits,' cores, ', & + device%global_memory/1024/1024,'MB, ', & + device%clock_freq, 'MHz, ',& + device%version,')' write(*,*) '' ! Set profiler device -profiler%device = devices(1) +profiler%device = device ! Load kernels from file and compile call fclGetKernelResource(kernelSrc) prog = fclCompileProgram(kernelSrc) ! Get kernel objects and set local/global work sizes -nBlock = (N+blockSize-1)/blockSize -kern1 = fclGetProgramKernel(prog,'bodyForces',[nBlock*blockSize],[blockSize]) -kern2 = fclGetProgramKernel(prog,'integrateBodies',[nBlock*blockSize],[blockSize]) +kern1 = fclGetProgramKernel(prog,'bodyForces',[N],[blockSize]) +kern2 = fclGetProgramKernel(prog,'integrateBodies',[N],[blockSize]) call fclProfilerAdd(profiler,Niter,kern1,kern2) diff --git a/examples/platform_query.f90 b/examples/platform_query.f90 index 587273c..0e4385e 100644 --- a/examples/platform_query.f90 +++ b/examples/platform_query.f90 @@ -37,6 +37,7 @@ program platform_query platforms(i)%devices(j)%global_memory/1024/1024,'MB, ', & platforms(i)%devices(j)%clock_freq, 'MHz, ',& platforms(i)%devices(j)%version + ! write(*,*) platforms(i)%devices(j)%extensions end do diff --git a/examples/sum.f90 b/examples/sum.f90 index 0a18485..464324d 100644 --- a/examples/sum.f90 +++ b/examples/sum.f90 @@ -9,7 +9,7 @@ program sum integer :: i ! Counter variable character(:), allocatable :: kernelSrc ! Kernel source string -type(fclDevice), allocatable :: devices(:) ! List of focal devices +type(fclDevice) :: device ! OpenCL device on which to run type(fclProgram) :: prog ! Focal program object type(fclKernel) :: sumKernel ! Focal kernel object real(c_float) :: array1(Nelem) ! Host array 1 @@ -17,13 +17,12 @@ program sum type(fclDeviceFloat) :: array1_d ! Device array 1 type(fclDeviceFloat) :: array2_d ! Device array 2 -! Create context with nvidia platform -call fclSetDefaultContext(fclCreateContext(vendor='nvidia,amd,intel')) +! Initialise OpenCL context and select device with most cores +device = fclInit(vendor='nvidia,amd,intel',sortBy='cores') ! Select device with most cores and create command queue -devices = fclFindDevices(sortBy='cores') !,type='cpu') -write(*,*) 'Using device: ',devices(1)%name -call fclSetDefaultCommandQ(fclCreateCommandQ(devices(1),enableProfiling=.true.)) +write(*,*) 'Using device: ',device%name +call fclSetDefaultCommandQ(fclCreateCommandQ(device,enableProfiling=.true.)) ! Load kernel from file and compile ! call fclSourceFromFile('examples/sum.cl',kernelSrc) diff --git a/src/Focal.f90 b/src/Focal.f90 index 9ee0fd2..c2c15a1 100644 --- a/src/Focal.f90 +++ b/src/Focal.f90 @@ -63,6 +63,11 @@ module Focal integer(c_int64_t) :: global_memory !! Total global memory, bytes integer(c_int32_t) :: clock_freq !! Max clock frequency, MHz character(:), allocatable :: version !! OpenCL version + character(:), allocatable :: extensions !! Supported OpenCL extensions + type(fclPlatform), pointer :: platform !! Pointer to containing platform + integer(c_intptr_t) :: cl_platform_id !! OpenCL platform pointer + character(:), allocatable :: platformName !! Name of containing platform + character(:), allocatable :: platformVendor !! Vendor of containing platform end type fclDevice type :: fclPlatform @@ -1008,23 +1013,99 @@ module subroutine fclSetDefaultContext(ctx) !! Set the global default context type(fclContext), intent(in) :: ctx end subroutine fclSetDefaultContext + + module function fclFilterDevices(devices,vendor,type,nameLike,extensions,sortBy) result(deviceList) + !! Filter and sort list of devices based on criteria + type(fclDevice), intent(in) :: devices(:) + character(*), intent(in), optional :: vendor + !! Filter device list based on platform vendor. + !! Specify multiple possible vendors in comma-separate list + character(*), intent(in), optional :: type + !! Filter device list based on device type. + !! Specify at least one of 'cpu', 'gpu', default: 'cpu,gpu' (both) + character(*), intent(in), optional :: nameLike + !! Filter devices based on device name. Look for this substring in device name. + character(*), intent(in), optional :: extensions + !! Filter devices based on supported device extensions. + !! Specify comma-separated list of OpenCL extension names, e.g. cl_khr_fp64. + !! See [clGetDeviceInfo](https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html) + !! Extensions specified are requirements: devices are filtered-out if they don't support all extensions specified. + character(*), intent(in), optional :: sortBy + !! Sort device list based on either 'memory': total global memory, + !! 'cores': total number of compute units, 'clock': maximum clock speed + type(fclDevice), allocatable :: deviceList(:) + !! Filtered and sorted list. Unallocated if no matching devices found. + end function fclFilterDevices + + module function fclInit(vendor,type,nameLike,extensions,sortBy) result(device) + !! Quick setup helper function: find a single device based on criteria + !! and set the default context accordingly. + !! Raises runtime error if no matching device is found. + character(*), intent(in), optional :: vendor + !! Filter device based on platform vendor + !! Specify multiple possible vendors in comma-separate list + character(*), intent(in), optional :: type + !! Filter device list based on device type. + !! Specify at least one of 'cpu', 'gpu', default: 'cpu,gpu' (both) + character(*), intent(in), optional :: nameLike + !! Filter devices based on device name. Look for this substring in device name. + character(*), intent(in), optional :: extensions + !! Filter devices based on supported device extensions. + !! Specify comma-separated list of OpenCL extension names, e.g. cl_khr_fp64. + !! See [clGetDeviceInfo](https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html) + !! Extensions specified are requirements: devices are filtered-out if they don't support all extensions specified. + character(*), intent(in), optional :: sortBy + !! Sort device list based on either 'memory': total global memory, + !! 'cores': total number of compute units, 'clock': maximum clock speed + type(fclDevice), allocatable :: device + !! The device chosen based on the user criteria + end function fclInit + end interface interface fclFindDevices !! Generic interface to list devices, sorted and filtered by properties + !! Raises runtime error if no matching device is found. - module function fclFindDevices_1(ctx,type,nameLike,sortBy) result(deviceList) + module function fclFindDevices_1(ctx,vendor,type,nameLike,extensions,sortBy) result(deviceList) type(fclContext), intent(in), target :: ctx + !! Context containing device for command queue + character(*), intent(in), optional :: vendor + !! Filter device list based on platform vendor. + !! Specify multiple possible vendors in comma-separate list character(*), intent(in), optional :: type + !! Filter device list based on device type. + !! Specify at least one of 'cpu', 'gpu', default: 'cpu,gpu' (both) character(*), intent(in), optional :: nameLike + !! Filter devices based on device name. Look for this substring in device name. + character(*), intent(in), optional :: extensions + !! Filter devices based on supported device extensions. + !! Specify comma-separated list of OpenCL extension names, e.g. cl_khr_fp64. + !! See [clGetDeviceInfo](https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html) + !! Extensions specified are requirements: devices are filtered-out if they don't support all extensions specified. character(*), intent(in), optional :: sortBy + !! Sort device list based on either 'memory': total global memory, + !! 'cores': total number of compute units, 'clock': maximum clock speed type(fclDevice), allocatable :: deviceList(:) end function fclFindDevices_1 - module function fclFindDevices_2(type,nameLike,sortBy) result(deviceList) + module function fclFindDevices_2(vendor,type,nameLike,extensions,sortBy) result(deviceList) + character(*), intent(in), optional :: vendor + !! Filter device list based on platform vendor. + !! Specify multiple possible vendors in comma-separate list character(*), intent(in), optional :: type + !! Filter device list based on device type. + !! Specify at least one of 'cpu', 'gpu', default: 'cpu,gpu' (both) character(*), intent(in), optional :: nameLike + !! Filter devices based on device name. Look for this substring in device name. + character(*), intent(in), optional :: extensions + !! Filter devices based on supported device extensions. + !! Specify comma-separated list of OpenCL extension names, e.g. cl_khr_fp64. + !! See [clGetDeviceInfo](https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html) + !! Extensions specified are requirements: devices are filtered-out if they don't support all extensions specified. character(*), intent(in), optional :: sortBy + !! Sort device list based on either 'memory': total global memory, + !! 'cores': total number of compute units, 'clock': maximum clock speed type(fclDevice), allocatable :: deviceList(:) end function fclFindDevices_2 diff --git a/src/Focal_Query.f90 b/src/Focal_Query.f90 index 2cbbfce..4afa3da 100644 --- a/src/Focal_Query.f90 +++ b/src/Focal_Query.f90 @@ -274,19 +274,23 @@ C_LOC(platform%cl_device_ids), int32_ret) call fclErrorHandler(errcode,'fclGetPlatform','clGetDeviceIDs') + ! --- Populate fclPlatform info strings --- + call fclGetPlatformInfo(platform,CL_PLATFORM_PROFILE,platform%profile) + call fclGetPlatformInfo(platform,CL_PLATFORM_VERSION,platform%version) + call fclGetPlatformInfo(platform,CL_PLATFORM_NAME,platform%name) + call fclGetPlatformInfo(platform,CL_PLATFORM_VENDOR,platform%vendor) + call fclGetPlatformInfo(platform,CL_PLATFORM_EXTENSIONS,platform%extensions) + ! --- Populate fclDevice structure array --- do i=1,platform%numDevice platform%devices(i) = fclGetDevice(platform%cl_device_ids(i)) + platform%devices(i)%platformName = platform%name + platform%devices(i)%platformVendor = platform%vendor + platform%devices(i)%cl_platform_id = platform_id end do - ! --- Populate fclPlatform info strings --- - call fclGetPlatformInfo(platform,CL_PLATFORM_PROFILE,platform%profile) - call fclGetPlatformInfo(platform,CL_PLATFORM_VERSION,platform%version) - call fclGetPlatformInfo(platform,CL_PLATFORM_NAME,platform%name) - call fclGetPlatformInfo(platform,CL_PLATFORM_VENDOR,platform%vendor) - call fclGetPlatformInfo(platform,CL_PLATFORM_EXTENSIONS,platform%extensions) end procedure fclGetPlatform ! --------------------------------------------------------------------------- @@ -303,6 +307,7 @@ call fclGetDeviceInfo(device,CL_DEVICE_GLOBAL_MEM_SIZE,device%global_memory) call fclGetDeviceInfo(device,CL_DEVICE_MAX_CLOCK_FREQUENCY,device%clock_freq) call fclGetDeviceInfo(device,CL_DEVICE_VERSION,device%version) + call fclGetDeviceInfo(device,CL_DEVICE_EXTENSIONS,device%extensions) end procedure fclGetDevice ! --------------------------------------------------------------------------- diff --git a/src/Focal_Setup.f90 b/src/Focal_Setup.f90 index 13f2a14..6093dea 100644 --- a/src/Focal_Setup.f90 +++ b/src/Focal_Setup.f90 @@ -117,14 +117,14 @@ ! --------------------------------------------------------------------------- - module procedure fclFindDevices_1 !(ctx,type,nameLike,sortBy) result(deviceList) - !! Create command queue by finding a device + module procedure fclFilterDevices !(devices,vendor,type,nameLike,extensions,sortBy) result(deviceList) + !! Filter and sort list of devices based on criteria use futils_sorting, only: argsort integer :: i,j - integer :: sortMetric(ctx%platform%numDevice) - integer :: sortList(ctx%platform%numDevice) - logical :: filter(ctx%platform%numDevice) + integer :: sortMetric(size(devices,1)) + integer :: sortList(size(devices,1)) + logical :: filter(size(devices,1)), platformMatch integer(c_int64_t) :: typeFilter integer(c_int64_t) :: deviceType @@ -133,12 +133,17 @@ integer(c_int64_t) :: int64Metric character(3) :: CPU_TYPE + character(:), allocatable :: extensionList(:) + character(:), allocatable :: vendorList(:) + CPU_TYPE = 'CPU' ! --- Parse any request to filter by device type --- typeFilter = 0 if (present(type)) then - if (index(upperstr(type),'CPU') > 0) then + if (index(upperstr(type),'CPU') > 0 .and. index(upperstr(type),'GPU') > 0) then + typeFilter = 0 + else if (index(upperstr(type),'CPU') > 0) then typeFilter = CL_DEVICE_TYPE_CPU elseif (index( upperstr(type) , 'GPU' ) > 0) then typeFilter = CL_DEVICE_TYPE_GPU @@ -148,15 +153,23 @@ end if end if + if (present(extensions)) then + call splitStr(extensions,extensionList,delimiters=',') + end if + + if (present(vendor)) then + call splitStr(vendor,vendorList,delimiters=',') + end if + ! --- Process the devices --- filter = .true. - do i=1,ctx%platform%numDevice + do i=1,size(devices,1) ! --- Filter by device type --- if (typeFilter > 0) then - call fclGetDeviceInfo(ctx%platform%devices(i),CL_DEVICE_TYPE,deviceType) + call fclGetDeviceInfo(devices(i),CL_DEVICE_TYPE,deviceType) if (deviceType /= typeFilter) then filter(i) = .false. ! Filtered out by device type @@ -164,19 +177,43 @@ end if + ! --- Filter by device extensions --- + if (allocated(extensionList)) then + do j=1,size(extensionList,1) + if (index(upperstr(devices(i)%extensions), & + upperstr(trim(extensionList(j)))) == 0) then + filter(i) = .false. ! Filtered out by device extensions + exit + end if + end do + end if + + ! --- Filter by device platform vendor --- + if (allocated(vendorList)) then + platformMatch = .false. + do j=1,size(vendorList,1) + if ( index(upperstr(devices(i)%platformName),upperstr(trim(vendorList(j))))>0 .or. & + index(upperstr(devices(i)%platformVendor),upperstr(trim(vendorList(j))))>0 ) then + platformMatch = .true. + exit + end if + end do + filter(i) = filter(i).and.platformMatch ! Filtered out by device platform vendor + end if + ! --- Extract sorting metric --- if (present(sortBy)) then select case (upperstr(sortBy)) case ('MEMORY') - call fclGetDeviceInfo(ctx%platform%devices(i),CL_DEVICE_GLOBAL_MEM_SIZE,int64Metric) + call fclGetDeviceInfo(devices(i),CL_DEVICE_GLOBAL_MEM_SIZE,int64Metric) sortMetric(i) = int(int64Metric/1000000,c_int32_t) ! Convert to megabytes to avoid overflow in int32 case ('CORES') - call fclGetDeviceInfo(ctx%platform%devices(i),CL_DEVICE_MAX_COMPUTE_UNITS,sortMetric(i)) + call fclGetDeviceInfo(devices(i),CL_DEVICE_MAX_COMPUTE_UNITS,sortMetric(i)) case ('CLOCK') - call fclGetDeviceInfo(ctx%platform%devices(i),CL_DEVICE_MAX_CLOCK_FREQUENCY,sortMetric(i)) + call fclGetDeviceInfo(devices(i),CL_DEVICE_MAX_CLOCK_FREQUENCY,sortMetric(i)) end select @@ -186,7 +223,7 @@ ! --- Filter by device name --- if (present(nameLike)) then - if (index(upperstr(ctx%platform%devices(i)%name),upperstr(nameLike)) == 0) then + if (index(upperstr(devices(i)%name),upperstr(nameLike)) == 0) then filter(i) = .false. ! Filtered out by device name end if end if @@ -196,20 +233,20 @@ ! --- Sort by sorting metric --- sortMetric = -sortMetric ! Sort descending sortList = argsort(sortMetric) - + nFiltered = count(filter) + allocate(deviceList(nFiltered)) if (nFiltered < 1) then - call fclRuntimeError("fclFindDevices: no devices found matching criteria") - end if + return + end if ! --- Output filtered sorted list of devices --- - allocate(deviceList(nFiltered)) nFill = 1 - do i=1,ctx%platform%numDevice + do i=1,size(devices,1) - if (filter(i)) then - j = sortList(i) - deviceList(nFill) = ctx%platform%devices(j) + j = sortList(i) + if (filter(j)) then + deviceList(nFill) = devices(j) nFill = nFill + 1 end if @@ -219,14 +256,90 @@ end do + end procedure fclFilterDevices + ! --------------------------------------------------------------------------- + + + module procedure fclInit !(vendor,type,nameLike,extensions,sortBy) result(device) + !! Quick setup helper function: find a single device based on criteria + !! and set the default context accordingly. + !! Raises runtime error if no matching device is found. + + integer :: i + + type(fclPlatform) :: chosenPlatform + type(fclPlatform), allocatable :: platforms(:) + type(fclDevice), allocatable :: devices(:), deviceList(:) + integer :: nDevice + logical :: found + + ! Get platforms + platforms = fclGetPlatforms(); + + ! Count total number of system devices + nDevice = 0 + do i=1,size(platforms,1) + nDevice = nDevice + platforms(i)%numDevice + end do + + ! Concatenate device lists across platforms + allocate(devices(nDevice)) + nDevice = 0 + do i=1,size(platforms,1) + devices(nDevice+1:nDevice+platforms(i)%numDevice) = platforms(i)%devices(:) + nDevice = nDevice + platforms(i)%numDevice + end do + + ! Find devices based on criteria + deviceList = fclFilterDevices(devices,vendor,type,nameLike,extensions,sortBy) + + if (size(deviceList,1) < 1) then + call fclRuntimeError('fclInit: no devices matching the specified criteria were found.') + end if + + ! Choose first device in filtered, sorted list + device = deviceList(1) + + ! Find corresponding platform for creating context + found = .false. + do i=1,size(platforms,1) + + if (platforms(i)%cl_platform_id == device%cl_platform_id) then + chosenPlatform = platforms(i) + found = .true. + exit + end if + + end do + + ! Create context and set as default + call fclSetDefaultContext(fclCreateContext(chosenPlatform)) + + end procedure fclInit + ! --------------------------------------------------------------------------- + + + module procedure fclFindDevices_1 !(ctx,vendor,type,nameLike,extensions,sortBy) result(deviceList) + !! Create command queue by finding a device + use futils_sorting, only: argsort + + call fclDbgCheckContext('fclFindDevices',ctx) + + deviceList = fclFilterDevices(ctx%platform%devices,vendor,type,nameLike,extensions,sortBy) + + if (.not.allocated(deviceList)) then + call fclRuntimeError('fclFindDevices: no devices matching the specified criteria were found.') + end if + end procedure fclFindDevices_1 ! --------------------------------------------------------------------------- - module procedure fclFindDevices_2 !(type,nameLike,sortBy) result(deviceList) + module procedure fclFindDevices_2 !(type,vendor,nameLike,extensions,sortBy) result(deviceList) + call fclDbgCheckContext('fclFindDevices') - deviceList = fclFindDevices_1(fclDefaultCtx,type,nameLike,sortBy) + deviceList = fclFindDevices_1(fclDefaultCtx,vendor,type,nameLike,extensions,sortBy) end procedure fclFindDevices_2 ! ---------------------------------------------------------------------------