Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fixes and numpy 2.0 compatability. #64

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ Changelog
All notable changes to PyEBSDIndex will be documented in this file. The format is based
on `Keep a Changelog <https://keepachangelog.com/en/1.1.0>`_.

0.3.5 (2024-06-07)
==================

Fixed
-----
- Further tweaking of NLPAR GPU memory limits for Apple-ARM.
- Many small type fixes for numpy 2.0 compatibillty.
- Corrected GPU detection for distributed indexing.
- Fixed issue where slower machines would erroneously detect a GPU timeout.


0.3.4 (2024-06-07)
==================
Expand Down
2 changes: 1 addition & 1 deletion pyebsdindex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
]
__description__ = "Python based tool for Radon based EBSD indexing"
__name__ = "pyebsdindex"
__version__ = "0.3.4"
__version__ = "0.3.5"


# Try to import only once - also will perform check that at least one GPU is found.
Expand Down
13 changes: 9 additions & 4 deletions pyebsdindex/_ebsd_index_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from pyebsdindex import band_detect as band_detect

RAYIPADDRESS = '127.0.0.1'
#RAYIPADDRESS = '0.0.0.0'
OSPLATFORM = platform.system()
#if OSPLATFORM == 'Darwin':
# RAYIPADDRESS = '0.0.0.0' # the localhost address does not work on macOS when on a VPN
Expand Down Expand Up @@ -309,10 +310,13 @@ def index_pats_distributed(


if ngpu > 0:
ngpupro = max(12, ngpu*8) # number of processes that will serve data to the gpu
gpuratio = (12, ngpu*4)
if (platform.machine(), platform.system()) == ('x86_64', 'Darwin'):
gpuratio = (6, ngpu*6)
ngpupro = min(max(gpuratio), 12) # number of processes that will serve data to the gpu
#ngpupro = 8
if n_cpu_nodes < 8:
ngpupro = min(ngpupro,8)
ngpupro = min(ngpupro, n_cpu_nodes)
if n_cpu_nodes < 2:
ngpupro = 2
#if OSPLATFORM == 'Linux':
Expand Down Expand Up @@ -369,7 +373,7 @@ def index_pats_distributed(
# fall back to CPU only calculation.
clparamfunction = band_detect.getopenclparam
# Set up the jobs
njobs = (np.ceil(npats / chunksize)).astype(np.compat.long)
njobs = (np.ceil(npats / chunksize)).astype(np.int64)

p_indx_start_end = [
[i * chunksize + patstart, (i + 1) * chunksize + patstart, chunksize]
Expand Down Expand Up @@ -458,7 +462,7 @@ def index_pats_distributed(

#gpu_launched += 1

gpuwrker_cycles = 0
gpuwrker_cycles = -500
cpuwrker_cycles = 0

while ncpudone < njobs:
Expand Down Expand Up @@ -765,6 +769,7 @@ def __optimizegpuchunk__(indexer, ngpupro, gpu_id, clparam):
@ray.remote(num_cpus=1, num_gpus=1)
class GPUWorker:
def __init__(self, actorid=0, clparammodule=None, gpu_id=None, cudavis = '0'):
#del os.environ['CUDA_VISIBLE_DEVICES']
# sys.path.append(path.dirname(path.dirname(__file__))) # do this to help Ray find the program files
# import openclparam # do this to help Ray find the program files
# device, context, queue, program, mf
Expand Down
2 changes: 1 addition & 1 deletion pyebsdindex/band_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def find_bands(self, patternsIn, verbose=0, chunksize=-1, **kwargs):
chunksize = nPats
chunk_start_end = [[0,nPats]]
else:
nchunks = (np.ceil(nPats / chunksize)).astype(np.compat.long)
nchunks = (np.ceil(nPats / chunksize)).astype(np.int64)
chunk_start_end = [[i * chunksize, (i + 1) * chunksize] for i in range(nchunks)]
chunk_start_end[-1][1] = nPats

Expand Down
6 changes: 4 additions & 2 deletions pyebsdindex/ebsd_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,8 +502,10 @@ def pat_reader(self, patStart=0, nPatToRead=1):
typeread = self.filedatatype
typebyte = self.filedatatype(0).nbytes

f.seek(int(nPerPat * patStart * typebyte),1)
readpats = np.fromfile(f,dtype=typeread,count=int(nPatToRead * nPerPat))

f.seek(int(np.int64(nPerPat) * np.int64(patStart) * typebyte),1)
readpats = np.fromfile(f,dtype=typeread,count=np.int64(np.int64(nPatToRead) * np.int64(nPerPat)))

readpats = readpats.reshape(nPatToRead,self.patternH,self.patternW)
f.close()
yx = np.unravel_index(np.arange(int(patStart), int(patStart+nPatToRead), dtype = np.uint64),
Expand Down
4 changes: 2 additions & 2 deletions pyebsdindex/opencl/band_detect_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def find_bands(self, patternsIn, verbose=0, clparams=None, chunksize=528, useCPU
nchunks = 1
chunksize = nPats
else:
nchunks = (np.ceil(nPats / chunksize)).astype(np.compat.long)
nchunks = (np.ceil(nPats / chunksize)).astype(np.int64)

chunk_start_end = [[i * chunksize,(i + 1) * chunksize] for i in range(nchunks)]
chunk_start_end[-1][1] = nPats
Expand Down Expand Up @@ -270,7 +270,7 @@ def radon_fasterCL(self,image,padding = np.array([0,0]), fixArtifacts = False, b
#radon_gpu = cl.Buffer(ctx,mf.READ_WRITE,size=radon.nbytes)
#radon_gpu = cl.Buffer(ctx,mf.READ_WRITE | mf.COPY_HOST_PTR,hostbuf=radon)
image_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=image)
imstep = np.uint64(np.product(shapeIm[-2:]))
imstep = np.uint64(np.prod(shapeIm[-2:], dtype=int))
tic = timer()

nImChunk = np.uint64(nImCL/clvtypesize)
Expand Down
2 changes: 1 addition & 1 deletion pyebsdindex/opencl/clkernels.cl
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ __kernel void loaduint16( const __global ushort *im1, __global float *im1flt, co


// simple program to convert a float to float and transpose array
__kernel void loaduufloat32( const __global float *im1, __global float *im1flt, const unsigned long int nImCL)
__kernel void loadfloat32( const __global float *im1, __global float *im1flt, const unsigned long int nImCL)
{
const unsigned long int x = get_global_id(0);
const unsigned long int y = get_global_id(1);
Expand Down
4 changes: 2 additions & 2 deletions pyebsdindex/opencl/nlpar_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=Fa
#print(gpu_id)
clparams.get_context(gpu_id=gpu_id, kfile = 'clnlpar.cl')
clparams.get_queue()
target_mem = clparams.queue.device.max_mem_alloc_size//2
target_mem = min(clparams.queue.device.max_mem_alloc_size//2, np.int64(4e9))
ctx = clparams.ctx
prg = clparams.prg
queue = clparams.queue
Expand Down Expand Up @@ -400,7 +400,7 @@ def calcnlpar_cl(self, searchradius=None, lam = None, dthresh = None, saturation
#print(gpu_id)
clparams.get_context(gpu_id=gpu_id, kfile ='clnlpar.cl')
clparams.get_queue()
target_mem = clparams.queue.device.max_mem_alloc_size//2
target_mem = min(clparams.queue.device.max_mem_alloc_size//4, int(2e9))
ctx = clparams.ctx
prg = clparams.prg
queue = clparams.queue
Expand Down
1 change: 1 addition & 0 deletions pyebsdindex/opencl/openclparam.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def get_gpu(self):
else:
pass
self.gpu = gpu
self.ngpu = len(gpu)
return self.gpu


Expand Down
2 changes: 1 addition & 1 deletion pyebsdindex/opencl/radon_fast_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def radon_fasterCL(self,image,padding = np.array([0,0]), fixArtifacts = False,
image_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=image_align)
rdnIndx_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=self.indexPlan)

imstep = np.uint64(np.product(shapeIm[-2:]))
imstep = np.uint64(np.prod(shapeIm[-2:], dtype=int))
indxstep = np.uint64(self.indexPlan.shape[-1])
rdnstep = np.uint64(self.nRho * self.nTheta)

Expand Down
4 changes: 2 additions & 2 deletions pyebsdindex/pcopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,9 +435,9 @@ def initializeswarm(self, start=None, bounds=None):
self.vellimit = 4*np.mean(np.sqrt(np.sum(self.vel**2, axis=1)))


self.pbest = np.zeros(self.n_particles) + np.infty
self.pbest = np.zeros(self.n_particles) + np.inf
self.pbest_loc = np.copy(self.pos)
self.gbest = np.infty
self.gbest = np.inf
self.gbest_loc = start


Expand Down
14 changes: 7 additions & 7 deletions pyebsdindex/radon_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ def radon_plan_setup(self, image=None, imageDim=None, nTheta=None, nRho=None, rh
#else:
#indx_x = np.ceil(a[i] * n + b1).astype(np.int64)
indx_x = np.round(a[i] * n + b1).astype(np.int64)
indx_x = np.where(indx_x < 0, outofbounds, indx_x)
indx_x = np.where(indx_x >= self.imDim[1], outofbounds, indx_x)
indx1D = np.clip(indx_x+self.imDim[1]*n, 0, outofbounds)
indx_x = np.where(indx_x < 0, outofbounds, indx_x).astype(np.int64)
indx_x = np.where(indx_x >= self.imDim[1], outofbounds, indx_x).astype(np.int64)
indx1D = np.clip(indx_x+self.imDim[1]*n, 0, outofbounds).astype(np.int64)
# for j in range(self.nRho):
# indx_good = indx1D[j,:].flatten()
# whgood = np.nonzero(indx_good < outofbounds)[0]
Expand All @@ -151,10 +151,10 @@ def radon_plan_setup(self, image=None, imageDim=None, nTheta=None, nRho=None, rh
# indx1D[j, 0:whmask.size] = newindex[whmask]

self.indexPlan[:, i, 0:self.imDim[0]] = indx1D
tempindx = self.indexPlan.flatten()
mask = np.concatenate( (self.mask.flatten(), np.array([0,0])))
tempindx = self.indexPlan.flatten().astype(np.int64)
mask = np.concatenate( (self.mask.flatten().astype(np.int64), np.array([0,0], dtype=np.int64)))
tempindx = np.where(mask[tempindx] > 0, tempindx, outofbounds)
maskindex = np.concatenate((self.maskindex.flatten(), np.array([-1,-1])))
maskindex = np.concatenate((self.maskindex.flatten(), np.array([-1,-1]))).astype(np.int64)
tempindx = np.where(maskindex[tempindx] >= 0, maskindex[tempindx], outofbounds)
self.indexPlan = tempindx.reshape([self.nRho,self.nTheta,self.imDim.max()])
self.indexPlan.sort(axis = -1)
Expand Down Expand Up @@ -331,7 +331,7 @@ def radon2pole(self,bandData,PC=None,vendor='EDAX'):
stheta = np.sin(theta)
ctheta = np.cos(theta)

pctemp = np.asfarray(PC).copy()
pctemp = np.asarray(PC, dtype=np.float32).copy()
shapet = pctemp.shape
if ven != 'EMSOFT':
if len(shapet) < 2:
Expand Down
4 changes: 2 additions & 2 deletions pyebsdindex/tripletvote.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def build_trip_lib(self):
#print(indx0FID)
#This completely over previsions the arrays, this is essentially
#N Choose K with N = number of angles and K = 3
nlib = npoles*np.prod(np.arange(3, dtype=np.int64)+(nangs-2+1))/np.compat.long(math.factorial(3))
nlib = npoles*np.prod(np.arange(3, dtype=np.int64)+(nangs-2+1))//np.int64(math.factorial(3))
nlib = nlib.astype(int)

libANG = np.zeros((nlib, 3))
Expand Down Expand Up @@ -792,7 +792,7 @@ def _refine_orientation(self, bandnorms, whGood, polematch):
tic = timer()
poles = self.tripLib.completelib['polesCart']
nGood = whGood.size
n2Fit = np.int64(np.product(np.arange(2)+(nGood-2+1))/np.int64(2))
n2Fit = np.int64(np.prod(np.arange(2)+(nGood-2+1), dtype=int)//np.int64(2))
whGood = np.asarray(whGood,dtype=np.int64)
#AB, ABgood = self.orientation_refine_loops_am(nGood,whGood,poles,bandnorms,polematch,n2Fit)
# tic = timer()
Expand Down