diff --git a/cmd/uMagNUS/main.go b/cmd/uMagNUS/main.go index 6305e53..7589586 100644 --- a/cmd/uMagNUS/main.go +++ b/cmd/uMagNUS/main.go @@ -26,6 +26,9 @@ func main() { log.SetPrefix("") log.SetFlags(0) + opencl.Synchronous = *engine.Flag_sync + opencl.Debug = *engine.Flag_debug + // Check flag and initialize engine if len(*engine.Flag_gpulist) > 0 { var gpu_arr []int @@ -63,7 +66,6 @@ func main() { } } - opencl.Synchronous = *engine.Flag_sync if *engine.Flag_version { printVersion() } diff --git a/cmd/uMagNUS64/main.go b/cmd/uMagNUS64/main.go index c2c5d77..c4a34d2 100644 --- a/cmd/uMagNUS64/main.go +++ b/cmd/uMagNUS64/main.go @@ -26,6 +26,9 @@ func main() { log.SetPrefix("") log.SetFlags(0) + opencl.Synchronous = *engine.Flag_sync + opencl.Debug = *engine.Flag_debug + // Check flag and initialize engine if len(*engine.Flag_gpulist) > 0 { var gpu_arr []int @@ -63,7 +66,6 @@ func main() { } } - opencl.Synchronous = *engine.Flag_sync if *engine.Flag_version { printVersion() } diff --git a/opencl/init.go b/opencl/init.go index 05b202d..4b8a945 100644 --- a/opencl/init.go +++ b/opencl/init.go @@ -25,7 +25,7 @@ var ( GPUInfo string // Human-readable GPU description GPUList []GPU // List of GPUs available Synchronous bool // for debug: synchronize command queue at every kernel launch - Debug bool // for debug: synchronize command queue after every kernel launch + Debug = false // for debug: synchronize command queue after every kernel launch ClPlatforms []*cl.Platform // list of platforms available ClPlatform *cl.Platform // platform the global OpenCL context is attached to ClDevices []*cl.Device // list of devices global OpenCL context may be associated with @@ -209,13 +209,13 @@ func Init(gpu int) { inRegExp := regexp.MustCompile("(?i)intel") adRegExp0 := regexp.MustCompile("(?i)amd") adRegExp1 := regexp.MustCompile("(?i)micro device") - if chk0 := nvRegExp.Match([]byte(GPUInfo)); chk0 { + if chk0 := nvRegExp.Match([]byte(PlatformInfo)); chk0 { GPUVend = 1 } else { - if chk1 := inRegExp.Match([]byte(GPUInfo)); chk1 { + if chk1 := inRegExp.Match([]byte(PlatformInfo)); chk1 { GPUVend = 2 } else { - chk2, chk3 := adRegExp0.Match([]byte(GPUInfo)), adRegExp1.Match([]byte(GPUInfo)) + chk2, chk3 := adRegExp0.Match([]byte(PlatformInfo)), adRegExp1.Match([]byte(PlatformInfo)) if (chk2 == true) || (chk3 == true) { GPUVend = 3 } else { @@ -224,13 +224,13 @@ func Init(gpu int) { } } ClMaxWGNum = ClCUnits + ClTotalPE = ClWGSize[2] * ClCUnits if GPUVend == 1 { // Nvidia - ClTotalPE = ClWGSize[2] * ClCUnits if ClMaxWGSize > ClTotalPE { - ClMaxWGNum = ClTotalPE / ClMaxWGSize - } else { ClMaxWGNum = 1 ClMaxWGSize = ClTotalPE + } else { + ClMaxWGNum = ClTotalPE / ClMaxWGSize } } if GPUVend == 2 { // Intel @@ -252,6 +252,16 @@ func Init(gpu int) { reduceintcfg.Grid[0] = ClTotalPE reduceintcfg.Block[0] = ClPrefWGSz + if Debug { + fmt.Printf(" PlatformInfo: \n%+v \n", PlatformInfo) + fmt.Printf(" GPUInfo: \n%+v \n", GPUInfo) + fmt.Printf(" GPUVend: %+v \n", GPUVend) + fmt.Printf(" ClCUnits: %+v ; ClWGSize = %+v \n", ClCUnits, ClWGSize) + fmt.Printf(" ClTotalPE = %+v \n", ClTotalPE) + fmt.Printf(" ClMaxWGSize = %+v ; ClMaxWGNum = %+v \n", ClMaxWGSize, ClMaxWGNum) + fmt.Printf(" ClPrefWGSz = %+v \n", ClPrefWGSz) + } + data.EnableGPU(memFree, memFree, MemCpy, MemCpyDtoH, MemCpyHtoD) } diff --git a/opencl/opencl.go b/opencl/opencl.go index 0f842c3..7c911a7 100644 --- a/opencl/opencl.go +++ b/opencl/opencl.go @@ -1,10 +1,11 @@ package opencl import ( - "log" + "fmt" "unsafe" cl "github.com/seeder-research/uMagNUS/cl" + util "github.com/seeder-research/uMagNUS/util" ) // Type size in bytes @@ -18,15 +19,15 @@ const ( // Assumes kernel arguments set prior to launch func LaunchKernel(kernname string, gridDim, workDim []int, events []*cl.Event) *cl.Event { if KernList[kernname] == nil { - log.Panic("Kernel " + kernname + " does not exist!") + util.Fatal("Kernel " + kernname + " does not exist!") return nil } if Debug { - log.Printf("Launching kernel: %+v with Grid = %+v and Block = %+v \n", kernname, gridDim, workDim) + fmt.Printf("Launching kernel: %+v with Grid = %+v and Block = %+v \n", kernname, gridDim, workDim) } KernEvent, err := ClCmdQueue.EnqueueNDRangeKernel(KernList[kernname], nil, gridDim, workDim, events) if err != nil { - log.Fatal(err) + util.Fatal(err) return nil } else { return KernEvent @@ -35,31 +36,31 @@ func LaunchKernel(kernname string, gridDim, workDim []int, events []*cl.Event) * func SetKernelArgWrapper(kernname string, index int, arg interface{}) { if KernList[kernname] == nil { - log.Panic("Kernel " + kernname + " does not exist!") + util.Fatal("Kernel " + kernname + " does not exist!") } switch val := arg.(type) { default: if err := KernList[kernname].SetArg(index, val); err != nil { - log.Fatal(err) + util.Fatal(err) } case unsafe.Pointer: memBufHandle, flag := arg.(unsafe.Pointer) if memBufHandle == unsafe.Pointer(uintptr(0)) { if err := KernList[kernname].SetArgUnsafe(index, 8, memBufHandle); err != nil { - log.Fatal(err) + util.Fatal(err) } } else { if flag { if err := KernList[kernname].SetArg(index, (*cl.MemObject)(memBufHandle)); err != nil { - log.Fatal(err) + util.Fatal(err) } } else { - log.Fatal("Unable to change argument type to *cl.MemObject") + util.Fatal("Unable to change argument type to *cl.MemObject") } } case int: if err := KernList[kernname].SetArg(index, (int32)(val)); err != nil { - log.Fatal(err) + util.Fatal(err) } } } diff --git a/opencl64/init.go b/opencl64/init.go index abe3e51..a42d18a 100644 --- a/opencl64/init.go +++ b/opencl64/init.go @@ -25,7 +25,7 @@ var ( GPUInfo string // Human-readable GPU description GPUList []GPU // List of GPUs available Synchronous bool // for debug: synchronize command queue at every kernel launch - Debug bool // for debug: synchronize command queue after every kernel launch + Debug = false // for debug: synchronize command queue after every kernel launch ClPlatforms []*cl.Platform // list of platforms available ClPlatform *cl.Platform // platform the global OpenCL context is attached to ClDevices []*cl.Device // list of devices global OpenCL context may be associated with @@ -212,13 +212,13 @@ func Init(gpu int) { inRegExp := regexp.MustCompile("(?i)intel") adRegExp0 := regexp.MustCompile("(?i)amd") adRegExp1 := regexp.MustCompile("(?i)micro device") - if chk0 := nvRegExp.Match([]byte(GPUInfo)); chk0 { + if chk0 := nvRegExp.Match([]byte(PlatformInfo)); chk0 { GPUVend = 1 } else { - if chk1 := inRegExp.Match([]byte(GPUInfo)); chk1 { + if chk1 := inRegExp.Match([]byte(PlatformInfo)); chk1 { GPUVend = 2 } else { - chk2, chk3 := adRegExp0.Match([]byte(GPUInfo)), adRegExp1.Match([]byte(GPUInfo)) + chk2, chk3 := adRegExp0.Match([]byte(PlatformInfo)), adRegExp1.Match([]byte(PlatformInfo)) if (chk2 == true) || (chk3 == true) { GPUVend = 3 } else { @@ -227,13 +227,13 @@ func Init(gpu int) { } } ClMaxWGNum = ClCUnits + ClTotalPE = ClWGSize[2] * ClCUnits if GPUVend == 1 { // Nvidia - ClTotalPE = ClWGSize[2] * ClCUnits if ClMaxWGSize > ClTotalPE { - ClMaxWGNum = ClTotalPE / ClMaxWGSize - } else { ClMaxWGNum = 1 ClMaxWGSize = ClTotalPE + } else { + ClMaxWGNum = ClTotalPE / ClMaxWGSize } } if GPUVend == 2 { // Intel @@ -255,6 +255,16 @@ func Init(gpu int) { reduceintcfg.Grid[0] = ClTotalPE reduceintcfg.Block[0] = ClPrefWGSz + if Debug { + fmt.Printf(" PlatformInfo: \n%+v \n", PlatformInfo) + fmt.Printf(" GPUInfo: \n%+v \n", GPUInfo) + fmt.Printf(" GPUVend: %+v \n", GPUVend) + fmt.Printf(" ClCUnits: %+v ; ClWGSize = %+v \n", ClCUnits, ClWGSize) + fmt.Printf(" ClTotalPE = %+v \n", ClTotalPE) + fmt.Printf(" ClMaxWGSize = %+v ; ClMaxWGNum = %+v \n", ClMaxWGSize, ClMaxWGNum) + fmt.Printf(" ClPrefWGSz = %+v \n", ClPrefWGSz) + } + data.EnableGPU(memFree, memFree, MemCpy, MemCpyDtoH, MemCpyHtoD) }