From 531eba485f0163779f879e97e342a610e74bee13 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Sun, 9 Jun 2024 10:08:09 -0400 Subject: [PATCH 01/22] Fixed notation on number of GPUs. Signed-off by: David Rowenhorst --- pyebsdindex/opencl/openclparam.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyebsdindex/opencl/openclparam.py b/pyebsdindex/opencl/openclparam.py index 28f77f8..fa0206d 100644 --- a/pyebsdindex/opencl/openclparam.py +++ b/pyebsdindex/opencl/openclparam.py @@ -88,6 +88,7 @@ def get_gpu(self): else: pass self.gpu = gpu + self.ngpu = len(gpu) return self.gpu From a7162ac4f0f3311b5401b515b6835822b00b4ad3 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Mon, 10 Jun 2024 11:12:43 -0400 Subject: [PATCH 02/22] Performance tuning Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index 2be1994..dcdce45 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -309,7 +309,7 @@ def index_pats_distributed( if ngpu > 0: - ngpupro = max(12, ngpu*8) # number of processes that will serve data to the gpu + ngpupro = min(max(6, ngpu*6), 12) # number of processes that will serve data to the gpu #ngpupro = 8 if n_cpu_nodes < 8: ngpupro = min(ngpupro,8) From d742733279c77fa28a13122b4fc0d2bed4f6e805 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Mon, 10 Jun 2024 17:51:14 -0400 Subject: [PATCH 03/22] Attempt to fix Apple M-series and NLPAR gpu Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 2 +- pyebsdindex/opencl/nlpar_cl.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index dcdce45..cf4a64f 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -312,7 +312,7 @@ def index_pats_distributed( ngpupro = min(max(6, ngpu*6), 12) # number of processes that will serve data to the gpu #ngpupro = 8 if n_cpu_nodes < 8: - ngpupro = min(ngpupro,8) + ngpupro = min(ngpupro, n_cpu_nodes) if n_cpu_nodes < 2: ngpupro = 2 #if OSPLATFORM == 'Linux': diff --git a/pyebsdindex/opencl/nlpar_cl.py b/pyebsdindex/opencl/nlpar_cl.py index a3307a2..a367788 100644 --- a/pyebsdindex/opencl/nlpar_cl.py +++ b/pyebsdindex/opencl/nlpar_cl.py @@ -163,7 +163,7 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=Fa #print(gpu_id) clparams.get_context(gpu_id=gpu_id, kfile = 'clnlpar.cl') clparams.get_queue() - target_mem = clparams.queue.device.max_mem_alloc_size//2 + target_mem = min(clparams.queue.device.max_mem_alloc_size//2, int(4e9)) ctx = clparams.ctx prg = clparams.prg queue = clparams.queue @@ -400,7 +400,7 @@ def calcnlpar_cl(self, searchradius=None, lam = None, dthresh = None, saturation #print(gpu_id) clparams.get_context(gpu_id=gpu_id, kfile ='clnlpar.cl') clparams.get_queue() - target_mem = clparams.queue.device.max_mem_alloc_size//2 + target_mem = min(clparams.queue.device.max_mem_alloc_size//4, int(2e9)) ctx = clparams.ctx prg = clparams.prg queue = clparams.queue From aa5fbf22ecff2e277f3383212f096af3fce9174f Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Mon, 10 Jun 2024 21:14:31 -0400 Subject: [PATCH 04/22] More tuning. Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index cf4a64f..de2f968 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -309,7 +309,10 @@ def index_pats_distributed( if ngpu > 0: - ngpupro = min(max(6, ngpu*6), 12) # number of processes that will serve data to the gpu + gpuratio = (12, ngpu*6) + if (platform.machine(), platform.system()) == ('x86_64', 'Darwin'): + gpuratio = (6, ngpu*6) + ngpupro = min(max(gpuratio), 18) # number of processes that will serve data to the gpu #ngpupro = 8 if n_cpu_nodes < 8: ngpupro = min(ngpupro, n_cpu_nodes) From 4b949838c2bf7b19f10a28da6e3a81f455561268 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Mon, 10 Jun 2024 21:35:28 -0400 Subject: [PATCH 05/22] Check Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index de2f968..c67bc10 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -309,10 +309,10 @@ def index_pats_distributed( if ngpu > 0: - gpuratio = (12, ngpu*6) + gpuratio = (12, ngpu*4) if (platform.machine(), platform.system()) == ('x86_64', 'Darwin'): gpuratio = (6, ngpu*6) - ngpupro = min(max(gpuratio), 18) # number of processes that will serve data to the gpu + ngpupro = min(max(gpuratio), 12) # number of processes that will serve data to the gpu #ngpupro = 8 if n_cpu_nodes < 8: ngpupro = min(ngpupro, n_cpu_nodes) From 51433fbb49b19ef5f7829711d42158bc36d24113 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Mon, 10 Jun 2024 21:58:45 -0400 Subject: [PATCH 06/22] Attempt to fix CUDA not seeing GPUs in Ray Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index c67bc10..e744539 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -768,6 +768,7 @@ def __optimizegpuchunk__(indexer, ngpupro, gpu_id, clparam): @ray.remote(num_cpus=1, num_gpus=1) class GPUWorker: def __init__(self, actorid=0, clparammodule=None, gpu_id=None, cudavis = '0'): + del os.environ['CUDA_VISIBLE_DEVICES'] # sys.path.append(path.dirname(path.dirname(__file__))) # do this to help Ray find the program files # import openclparam # do this to help Ray find the program files # device, context, queue, program, mf From 6ac0807e9fc2e50748106c58f61688498a1c928a Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Mon, 10 Jun 2024 22:09:51 -0400 Subject: [PATCH 07/22] Check IP Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index e744539..1027a87 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -47,7 +47,8 @@ else: from pyebsdindex import band_detect as band_detect -RAYIPADDRESS = '127.0.0.1' +#RAYIPADDRESS = '127.0.0.1' +RAYIPADDRESS = '0.0.0.0' OSPLATFORM = platform.system() #if OSPLATFORM == 'Darwin': # RAYIPADDRESS = '0.0.0.0' # the localhost address does not work on macOS when on a VPN From a91df1caec48d51be49215425ab1c2cb0e0c172a Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Mon, 10 Jun 2024 22:13:11 -0400 Subject: [PATCH 08/22] Revert Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index 1027a87..c9adb29 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -47,8 +47,8 @@ else: from pyebsdindex import band_detect as band_detect -#RAYIPADDRESS = '127.0.0.1' -RAYIPADDRESS = '0.0.0.0' +RAYIPADDRESS = '127.0.0.1' +#RAYIPADDRESS = '0.0.0.0' OSPLATFORM = platform.system() #if OSPLATFORM == 'Darwin': # RAYIPADDRESS = '0.0.0.0' # the localhost address does not work on macOS when on a VPN From e7e43186151631ed3ec656a8f198dcdac4c274a9 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Mon, 10 Jun 2024 22:16:15 -0400 Subject: [PATCH 09/22] Revert again. Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index c9adb29..e534d4c 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -769,7 +769,7 @@ def __optimizegpuchunk__(indexer, ngpupro, gpu_id, clparam): @ray.remote(num_cpus=1, num_gpus=1) class GPUWorker: def __init__(self, actorid=0, clparammodule=None, gpu_id=None, cudavis = '0'): - del os.environ['CUDA_VISIBLE_DEVICES'] + #del os.environ['CUDA_VISIBLE_DEVICES'] # sys.path.append(path.dirname(path.dirname(__file__))) # do this to help Ray find the program files # import openclparam # do this to help Ray find the program files # device, context, queue, program, mf From 84ea980382c7bb9d9a7dceebd5662b2192cf767e Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Thu, 20 Jun 2024 08:26:22 -0400 Subject: [PATCH 10/22] Put in initial buffer on GPU worker timeout in case of long JIT times. Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index e534d4c..53c5924 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -462,7 +462,7 @@ def index_pats_distributed( #gpu_launched += 1 - gpuwrker_cycles = 0 + gpuwrker_cycles = -500 cpuwrker_cycles = 0 while ncpudone < njobs: From b408b2b24221ef2bcc3dfe6dc8fd75da307cfec5 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Thu, 20 Jun 2024 09:48:07 -0400 Subject: [PATCH 11/22] Removed use of np.compat.long with np.long64 Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 2 +- pyebsdindex/band_detect.py | 2 +- pyebsdindex/opencl/band_detect_cl.py | 2 +- pyebsdindex/tripletvote.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index 53c5924..0f71425 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -373,7 +373,7 @@ def index_pats_distributed( # fall back to CPU only calculation. clparamfunction = band_detect.getopenclparam # Set up the jobs - njobs = (np.ceil(npats / chunksize)).astype(np.compat.long) + njobs = (np.ceil(npats / chunksize)).astype(np.long64) p_indx_start_end = [ [i * chunksize + patstart, (i + 1) * chunksize + patstart, chunksize] diff --git a/pyebsdindex/band_detect.py b/pyebsdindex/band_detect.py index af5abb9..1f222fa 100644 --- a/pyebsdindex/band_detect.py +++ b/pyebsdindex/band_detect.py @@ -396,7 +396,7 @@ def find_bands(self, patternsIn, verbose=0, chunksize=-1, **kwargs): chunksize = nPats chunk_start_end = [[0,nPats]] else: - nchunks = (np.ceil(nPats / chunksize)).astype(np.compat.long) + nchunks = (np.ceil(nPats / chunksize)).astype(np.long64) chunk_start_end = [[i * chunksize, (i + 1) * chunksize] for i in range(nchunks)] chunk_start_end[-1][1] = nPats diff --git a/pyebsdindex/opencl/band_detect_cl.py b/pyebsdindex/opencl/band_detect_cl.py index 77f79a2..9e271cf 100644 --- a/pyebsdindex/opencl/band_detect_cl.py +++ b/pyebsdindex/opencl/band_detect_cl.py @@ -87,7 +87,7 @@ def find_bands(self, patternsIn, verbose=0, clparams=None, chunksize=528, useCPU nchunks = 1 chunksize = nPats else: - nchunks = (np.ceil(nPats / chunksize)).astype(np.compat.long) + nchunks = (np.ceil(nPats / chunksize)).astype(np.long64) chunk_start_end = [[i * chunksize,(i + 1) * chunksize] for i in range(nchunks)] chunk_start_end[-1][1] = nPats diff --git a/pyebsdindex/tripletvote.py b/pyebsdindex/tripletvote.py index 67fcbd1..420b17b 100644 --- a/pyebsdindex/tripletvote.py +++ b/pyebsdindex/tripletvote.py @@ -407,7 +407,7 @@ def build_trip_lib(self): #print(indx0FID) #This completely over previsions the arrays, this is essentially #N Choose K with N = number of angles and K = 3 - nlib = npoles*np.prod(np.arange(3, dtype=np.int64)+(nangs-2+1))/np.compat.long(math.factorial(3)) + nlib = npoles*np.prod(np.arange(3, dtype=np.int64)+(nangs-2+1))/np.long64(math.factorial(3)) nlib = nlib.astype(int) libANG = np.zeros((nlib, 3)) From adb0fe9c413a317a4d78756da32167c1ff1f350d Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Thu, 20 Jun 2024 09:59:49 -0400 Subject: [PATCH 12/22] Remembering that numpy uses int64, not long64. Signed-off by: David Rowenhorst --- pyebsdindex/_ebsd_index_parallel.py | 2 +- pyebsdindex/band_detect.py | 2 +- pyebsdindex/opencl/band_detect_cl.py | 2 +- pyebsdindex/tripletvote.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py index 0f71425..34e6798 100644 --- a/pyebsdindex/_ebsd_index_parallel.py +++ b/pyebsdindex/_ebsd_index_parallel.py @@ -373,7 +373,7 @@ def index_pats_distributed( # fall back to CPU only calculation. clparamfunction = band_detect.getopenclparam # Set up the jobs - njobs = (np.ceil(npats / chunksize)).astype(np.long64) + njobs = (np.ceil(npats / chunksize)).astype(np.int64) p_indx_start_end = [ [i * chunksize + patstart, (i + 1) * chunksize + patstart, chunksize] diff --git a/pyebsdindex/band_detect.py b/pyebsdindex/band_detect.py index 1f222fa..f1b222a 100644 --- a/pyebsdindex/band_detect.py +++ b/pyebsdindex/band_detect.py @@ -396,7 +396,7 @@ def find_bands(self, patternsIn, verbose=0, chunksize=-1, **kwargs): chunksize = nPats chunk_start_end = [[0,nPats]] else: - nchunks = (np.ceil(nPats / chunksize)).astype(np.long64) + nchunks = (np.ceil(nPats / chunksize)).astype(np.int64) chunk_start_end = [[i * chunksize, (i + 1) * chunksize] for i in range(nchunks)] chunk_start_end[-1][1] = nPats diff --git a/pyebsdindex/opencl/band_detect_cl.py b/pyebsdindex/opencl/band_detect_cl.py index 9e271cf..daae977 100644 --- a/pyebsdindex/opencl/band_detect_cl.py +++ b/pyebsdindex/opencl/band_detect_cl.py @@ -87,7 +87,7 @@ def find_bands(self, patternsIn, verbose=0, clparams=None, chunksize=528, useCPU nchunks = 1 chunksize = nPats else: - nchunks = (np.ceil(nPats / chunksize)).astype(np.long64) + nchunks = (np.ceil(nPats / chunksize)).astype(np.int64) chunk_start_end = [[i * chunksize,(i + 1) * chunksize] for i in range(nchunks)] chunk_start_end[-1][1] = nPats diff --git a/pyebsdindex/tripletvote.py b/pyebsdindex/tripletvote.py index 420b17b..4280823 100644 --- a/pyebsdindex/tripletvote.py +++ b/pyebsdindex/tripletvote.py @@ -407,7 +407,7 @@ def build_trip_lib(self): #print(indx0FID) #This completely over previsions the arrays, this is essentially #N Choose K with N = number of angles and K = 3 - nlib = npoles*np.prod(np.arange(3, dtype=np.int64)+(nangs-2+1))/np.long64(math.factorial(3)) + nlib = npoles*np.prod(np.arange(3, dtype=np.int64)+(nangs-2+1))/np.int64(math.factorial(3)) nlib = nlib.astype(int) libANG = np.zeros((nlib, 3)) From 554e5fae1870700059672eae97463a4305a1b315 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Thu, 20 Jun 2024 11:26:45 -0400 Subject: [PATCH 13/22] Validate that all indices are ints. Signed-off by: David Rowenhorst --- pyebsdindex/radon_fast.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pyebsdindex/radon_fast.py b/pyebsdindex/radon_fast.py index 7ce1534..77de160 100644 --- a/pyebsdindex/radon_fast.py +++ b/pyebsdindex/radon_fast.py @@ -134,9 +134,9 @@ def radon_plan_setup(self, image=None, imageDim=None, nTheta=None, nRho=None, rh #else: #indx_x = np.ceil(a[i] * n + b1).astype(np.int64) indx_x = np.round(a[i] * n + b1).astype(np.int64) - indx_x = np.where(indx_x < 0, outofbounds, indx_x) - indx_x = np.where(indx_x >= self.imDim[1], outofbounds, indx_x) - indx1D = np.clip(indx_x+self.imDim[1]*n, 0, outofbounds) + indx_x = np.where(indx_x < 0, outofbounds, indx_x).astype(np.int64) + indx_x = np.where(indx_x >= self.imDim[1], outofbounds, indx_x).astype(np.int64) + indx1D = np.clip(indx_x+self.imDim[1]*n, 0, outofbounds).astype(np.int64) # for j in range(self.nRho): # indx_good = indx1D[j,:].flatten() # whgood = np.nonzero(indx_good < outofbounds)[0] @@ -151,10 +151,10 @@ def radon_plan_setup(self, image=None, imageDim=None, nTheta=None, nRho=None, rh # indx1D[j, 0:whmask.size] = newindex[whmask] self.indexPlan[:, i, 0:self.imDim[0]] = indx1D - tempindx = self.indexPlan.flatten() - mask = np.concatenate( (self.mask.flatten(), np.array([0,0]))) + tempindx = self.indexPlan.flatten().astype(np.int64) + mask = np.concatenate( (self.mask.flatten().astype(np.int64), np.array([0,0], dtype=np.int64))) tempindx = np.where(mask[tempindx] > 0, tempindx, outofbounds) - maskindex = np.concatenate((self.maskindex.flatten(), np.array([-1,-1]))) + maskindex = np.concatenate((self.maskindex.flatten(), np.array([-1,-1]))).astype(np.int64) tempindx = np.where(maskindex[tempindx] >= 0, maskindex[tempindx], outofbounds) self.indexPlan = tempindx.reshape([self.nRho,self.nTheta,self.imDim.max()]) self.indexPlan.sort(axis = -1) From c54f183b7a12f2974741b9e2650c0ffeb5000d9d Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Thu, 20 Jun 2024 15:46:21 -0400 Subject: [PATCH 14/22] More numpy 2.0 cleanup Signed-off by: David Rowenhorst --- pyebsdindex/pcopt.py | 4 ++-- pyebsdindex/radon_fast.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyebsdindex/pcopt.py b/pyebsdindex/pcopt.py index 009f7b7..2320e19 100644 --- a/pyebsdindex/pcopt.py +++ b/pyebsdindex/pcopt.py @@ -435,9 +435,9 @@ def initializeswarm(self, start=None, bounds=None): self.vellimit = 4*np.mean(np.sqrt(np.sum(self.vel**2, axis=1))) - self.pbest = np.zeros(self.n_particles) + np.infty + self.pbest = np.zeros(self.n_particles) + np.inf self.pbest_loc = np.copy(self.pos) - self.gbest = np.infty + self.gbest = np.inf self.gbest_loc = start diff --git a/pyebsdindex/radon_fast.py b/pyebsdindex/radon_fast.py index 77de160..300d4fc 100644 --- a/pyebsdindex/radon_fast.py +++ b/pyebsdindex/radon_fast.py @@ -331,7 +331,7 @@ def radon2pole(self,bandData,PC=None,vendor='EDAX'): stheta = np.sin(theta) ctheta = np.cos(theta) - pctemp = np.asfarray(PC).copy() + pctemp = np.asarray(PC, dtype=np.float32).copy() shapet = pctemp.shape if ven != 'EMSOFT': if len(shapet) < 2: From 6bb3bac8a4f315d3f12cc5fed32f0b30617420e3 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Fri, 28 Jun 2024 13:54:09 -0400 Subject: [PATCH 15/22] And more numpy 2.0 changes Signed-off by: David Rowenhorst --- pyebsdindex/opencl/band_detect_cl.py | 2 +- pyebsdindex/opencl/radon_fast_cl.py | 2 +- pyebsdindex/tripletvote.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyebsdindex/opencl/band_detect_cl.py b/pyebsdindex/opencl/band_detect_cl.py index daae977..a2305da 100644 --- a/pyebsdindex/opencl/band_detect_cl.py +++ b/pyebsdindex/opencl/band_detect_cl.py @@ -270,7 +270,7 @@ def radon_fasterCL(self,image,padding = np.array([0,0]), fixArtifacts = False, b #radon_gpu = cl.Buffer(ctx,mf.READ_WRITE,size=radon.nbytes) #radon_gpu = cl.Buffer(ctx,mf.READ_WRITE | mf.COPY_HOST_PTR,hostbuf=radon) image_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=image) - imstep = np.uint64(np.product(shapeIm[-2:])) + imstep = np.uint64(np.prod(shapeIm[-2:])) tic = timer() nImChunk = np.uint64(nImCL/clvtypesize) diff --git a/pyebsdindex/opencl/radon_fast_cl.py b/pyebsdindex/opencl/radon_fast_cl.py index 5fc8b75..8089879 100644 --- a/pyebsdindex/opencl/radon_fast_cl.py +++ b/pyebsdindex/opencl/radon_fast_cl.py @@ -98,7 +98,7 @@ def radon_fasterCL(self,image,padding = np.array([0,0]), fixArtifacts = False, image_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=image_align) rdnIndx_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=self.indexPlan) - imstep = np.uint64(np.product(shapeIm[-2:])) + imstep = np.uint64(np.prod(shapeIm[-2:])) indxstep = np.uint64(self.indexPlan.shape[-1]) rdnstep = np.uint64(self.nRho * self.nTheta) diff --git a/pyebsdindex/tripletvote.py b/pyebsdindex/tripletvote.py index 4280823..4910b17 100644 --- a/pyebsdindex/tripletvote.py +++ b/pyebsdindex/tripletvote.py @@ -792,7 +792,7 @@ def _refine_orientation(self, bandnorms, whGood, polematch): tic = timer() poles = self.tripLib.completelib['polesCart'] nGood = whGood.size - n2Fit = np.int64(np.product(np.arange(2)+(nGood-2+1))/np.int64(2)) + n2Fit = np.int64(np.prod(np.arange(2)+(nGood-2+1))/np.int64(2)) whGood = np.asarray(whGood,dtype=np.int64) #AB, ABgood = self.orientation_refine_loops_am(nGood,whGood,poles,bandnorms,polematch,n2Fit) # tic = timer() From 92129f9e6698db18bbe230f810ab14d65ebfb38f Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Fri, 28 Jun 2024 14:11:02 -0400 Subject: [PATCH 16/22] Fixed typo Signed-off by: David Rowenhorst --- pyebsdindex/opencl/clkernels.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyebsdindex/opencl/clkernels.cl b/pyebsdindex/opencl/clkernels.cl index b712d2f..b045f90 100644 --- a/pyebsdindex/opencl/clkernels.cl +++ b/pyebsdindex/opencl/clkernels.cl @@ -70,7 +70,7 @@ __kernel void loaduint16( const __global ushort *im1, __global float *im1flt, co // simple program to convert a float to float and transpose array -__kernel void loaduufloat32( const __global float *im1, __global float *im1flt, const unsigned long int nImCL) +__kernel void loadfloat32( const __global float *im1, __global float *im1flt, const unsigned long int nImCL) { const unsigned long int x = get_global_id(0); const unsigned long int y = get_global_id(1); From ec3921fc8695f6ef33ae680418802ab409ece5fc Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Fri, 28 Jun 2024 14:43:18 -0400 Subject: [PATCH 17/22] Type corrections Signed-off by: David Rowenhorst --- pyebsdindex/opencl/band_detect_cl.py | 2 +- pyebsdindex/opencl/radon_fast_cl.py | 2 +- pyebsdindex/tripletvote.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyebsdindex/opencl/band_detect_cl.py b/pyebsdindex/opencl/band_detect_cl.py index a2305da..8830f97 100644 --- a/pyebsdindex/opencl/band_detect_cl.py +++ b/pyebsdindex/opencl/band_detect_cl.py @@ -270,7 +270,7 @@ def radon_fasterCL(self,image,padding = np.array([0,0]), fixArtifacts = False, b #radon_gpu = cl.Buffer(ctx,mf.READ_WRITE,size=radon.nbytes) #radon_gpu = cl.Buffer(ctx,mf.READ_WRITE | mf.COPY_HOST_PTR,hostbuf=radon) image_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=image) - imstep = np.uint64(np.prod(shapeIm[-2:])) + imstep = np.uint64(np.prod(shapeIm[-2:], dtype=int)) tic = timer() nImChunk = np.uint64(nImCL/clvtypesize) diff --git a/pyebsdindex/opencl/radon_fast_cl.py b/pyebsdindex/opencl/radon_fast_cl.py index 8089879..fd8b3b9 100644 --- a/pyebsdindex/opencl/radon_fast_cl.py +++ b/pyebsdindex/opencl/radon_fast_cl.py @@ -98,7 +98,7 @@ def radon_fasterCL(self,image,padding = np.array([0,0]), fixArtifacts = False, image_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=image_align) rdnIndx_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=self.indexPlan) - imstep = np.uint64(np.prod(shapeIm[-2:])) + imstep = np.uint64(np.prod(shapeIm[-2:], dtype=int)) indxstep = np.uint64(self.indexPlan.shape[-1]) rdnstep = np.uint64(self.nRho * self.nTheta) diff --git a/pyebsdindex/tripletvote.py b/pyebsdindex/tripletvote.py index 4910b17..26dd026 100644 --- a/pyebsdindex/tripletvote.py +++ b/pyebsdindex/tripletvote.py @@ -407,7 +407,7 @@ def build_trip_lib(self): #print(indx0FID) #This completely over previsions the arrays, this is essentially #N Choose K with N = number of angles and K = 3 - nlib = npoles*np.prod(np.arange(3, dtype=np.int64)+(nangs-2+1))/np.int64(math.factorial(3)) + nlib = int(npoles*np.prod(np.arange(3, dtype=np.int64)+(nangs-2+1))//np.int64(math.factorial(3))) nlib = nlib.astype(int) libANG = np.zeros((nlib, 3)) @@ -792,7 +792,7 @@ def _refine_orientation(self, bandnorms, whGood, polematch): tic = timer() poles = self.tripLib.completelib['polesCart'] nGood = whGood.size - n2Fit = np.int64(np.prod(np.arange(2)+(nGood-2+1))/np.int64(2)) + n2Fit = np.int64(np.prod(np.arange(2)+(nGood-2+1), dtype=int)//np.int64(2)) whGood = np.asarray(whGood,dtype=np.int64) #AB, ABgood = self.orientation_refine_loops_am(nGood,whGood,poles,bandnorms,polematch,n2Fit) # tic = timer() From 065ce7e1de69526ee7b3d7b66ca5d6a8abf450ff Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Fri, 28 Jun 2024 14:46:19 -0400 Subject: [PATCH 18/22] Type corrections Signed-off by: David Rowenhorst --- pyebsdindex/tripletvote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyebsdindex/tripletvote.py b/pyebsdindex/tripletvote.py index 26dd026..1b39eb4 100644 --- a/pyebsdindex/tripletvote.py +++ b/pyebsdindex/tripletvote.py @@ -407,7 +407,7 @@ def build_trip_lib(self): #print(indx0FID) #This completely over previsions the arrays, this is essentially #N Choose K with N = number of angles and K = 3 - nlib = int(npoles*np.prod(np.arange(3, dtype=np.int64)+(nangs-2+1))//np.int64(math.factorial(3))) + nlib = npoles*np.prod(np.arange(3, dtype=np.int64)+(nangs-2+1))//np.int64(math.factorial(3)) nlib = nlib.astype(int) libANG = np.zeros((nlib, 3)) From a44b40664a473bbcc39665a30d309e563fd815e1 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Fri, 28 Jun 2024 14:52:23 -0400 Subject: [PATCH 19/22] Another type fix Signed-off by: David Rowenhorst --- pyebsdindex/ebsd_pattern.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyebsdindex/ebsd_pattern.py b/pyebsdindex/ebsd_pattern.py index 54b84a5..74e04d1 100644 --- a/pyebsdindex/ebsd_pattern.py +++ b/pyebsdindex/ebsd_pattern.py @@ -502,7 +502,7 @@ def pat_reader(self, patStart=0, nPatToRead=1): typeread = self.filedatatype typebyte = self.filedatatype(0).nbytes - f.seek(int(nPerPat * patStart * typebyte),1) + f.seek(int(np.int64(nPerPat) * np.int64(patStart) * typebyte),1) readpats = np.fromfile(f,dtype=typeread,count=int(nPatToRead * nPerPat)) readpats = readpats.reshape(nPatToRead,self.patternH,self.patternW) f.close() From 25e9cd0338340318e4d038f8f20e28dfeb4fb2e0 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Fri, 28 Jun 2024 15:23:47 -0400 Subject: [PATCH 20/22] Signed-off by: David Rowenhorst --- pyebsdindex/ebsd_pattern.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyebsdindex/ebsd_pattern.py b/pyebsdindex/ebsd_pattern.py index 74e04d1..bbe496c 100644 --- a/pyebsdindex/ebsd_pattern.py +++ b/pyebsdindex/ebsd_pattern.py @@ -502,8 +502,13 @@ def pat_reader(self, patStart=0, nPatToRead=1): typeread = self.filedatatype typebyte = self.filedatatype(0).nbytes +<<<<<<< Updated upstream f.seek(int(np.int64(nPerPat) * np.int64(patStart) * typebyte),1) readpats = np.fromfile(f,dtype=typeread,count=int(nPatToRead * nPerPat)) +======= + f.seek(int(nPerPat * patStart * typebyte),1) + readpats = np.fromfile(f,dtype=typeread,count=np.int64(np.int64(nPatToRead) * np.int64(nPerPat))) +>>>>>>> Stashed changes readpats = readpats.reshape(nPatToRead,self.patternH,self.patternW) f.close() yx = np.unravel_index(np.arange(int(patStart), int(patStart+nPatToRead), dtype = np.uint64), From eba5f0a39eecc65e06a5dd024c4070a9a3fe4603 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Fri, 28 Jun 2024 15:24:58 -0400 Subject: [PATCH 21/22] More corrections Signed-off by: David Rowenhorst --- pyebsdindex/ebsd_pattern.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pyebsdindex/ebsd_pattern.py b/pyebsdindex/ebsd_pattern.py index bbe496c..a8bf20c 100644 --- a/pyebsdindex/ebsd_pattern.py +++ b/pyebsdindex/ebsd_pattern.py @@ -502,13 +502,10 @@ def pat_reader(self, patStart=0, nPatToRead=1): typeread = self.filedatatype typebyte = self.filedatatype(0).nbytes -<<<<<<< Updated upstream + f.seek(int(np.int64(nPerPat) * np.int64(patStart) * typebyte),1) - readpats = np.fromfile(f,dtype=typeread,count=int(nPatToRead * nPerPat)) -======= - f.seek(int(nPerPat * patStart * typebyte),1) readpats = np.fromfile(f,dtype=typeread,count=np.int64(np.int64(nPatToRead) * np.int64(nPerPat))) ->>>>>>> Stashed changes + readpats = readpats.reshape(nPatToRead,self.patternH,self.patternW) f.close() yx = np.unravel_index(np.arange(int(patStart), int(patStart+nPatToRead), dtype = np.uint64), From 53ba837da48c986a4a207fe2705cacf259577a91 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Fri, 28 Jun 2024 17:59:48 -0400 Subject: [PATCH 22/22] Prepare for release. Signed-off by: David Rowenhorst --- CHANGELOG.rst | 10 ++++++++++ pyebsdindex/__init__.py | 2 +- pyebsdindex/opencl/nlpar_cl.py | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a5a0d15..62acd3c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,16 @@ Changelog All notable changes to PyEBSDIndex will be documented in this file. The format is based on `Keep a Changelog `_. +0.3.5 (2024-06-07) +================== + +Fixed +----- +- Further tweaking of NLPAR GPU memory limits for Apple-ARM. +- Many small type fixes for numpy 2.0 compatibillty. +- Corrected GPU detection for distributed indexing. +- Fixed issue where slower machines would erroneously detect a GPU timeout. + 0.3.4 (2024-06-07) ================== diff --git a/pyebsdindex/__init__.py b/pyebsdindex/__init__.py index 53da16a..b8a23bf 100644 --- a/pyebsdindex/__init__.py +++ b/pyebsdindex/__init__.py @@ -7,7 +7,7 @@ ] __description__ = "Python based tool for Radon based EBSD indexing" __name__ = "pyebsdindex" -__version__ = "0.3.4" +__version__ = "0.3.5" # Try to import only once - also will perform check that at least one GPU is found. diff --git a/pyebsdindex/opencl/nlpar_cl.py b/pyebsdindex/opencl/nlpar_cl.py index a367788..28a239c 100644 --- a/pyebsdindex/opencl/nlpar_cl.py +++ b/pyebsdindex/opencl/nlpar_cl.py @@ -163,7 +163,7 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=Fa #print(gpu_id) clparams.get_context(gpu_id=gpu_id, kfile = 'clnlpar.cl') clparams.get_queue() - target_mem = min(clparams.queue.device.max_mem_alloc_size//2, int(4e9)) + target_mem = min(clparams.queue.device.max_mem_alloc_size//2, np.int64(4e9)) ctx = clparams.ctx prg = clparams.prg queue = clparams.queue