From dfd4c30b73f96c4c480fbe53a630940c547f6cab Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Sun, 10 Mar 2019 08:08:18 +0100 Subject: [PATCH 1/2] 'index' should return a generic JaggedArray and new 'tojagged' has to be usable on non-valid 'self' because it's used to build up jagged tables --- awkward/array/jagged.py | 15 ++++++++++++--- awkward/version.py | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 50e911b1..292c3abc 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -433,7 +433,7 @@ def parents(self, value): def index(self): tmp = self.compact() out = self.numpy.arange(len(tmp._content), dtype=self.INDEXTYPE) - return self.copy(starts=tmp._starts, stops=tmp._stops, content=(out - tmp._starts[tmp.parents])) + return self.JaggedArray(tmp._starts, tmp._stops, (out - tmp._starts[tmp.parents])) def _getnbytes(self, seen): if id(self) in seen: @@ -766,12 +766,21 @@ def __setitem__(self, where, what): def tojagged(self, data): if isinstance(data, JaggedArray): - if not self.numpy.array_equal(self.counts, data.counts): + selfcounts = self.stops - self._starts + datacounts = data.stops - data._starts + if not self.numpy.array_equal(selfcounts, datacounts): raise ValueError("cannot broadcast JaggedArray to match JaggedArray with a different counts") if len(self._starts) == 0: return self.copy(content=data._content) + + tmp = self.compact() + assert self.offsetsaliased(tmp._starts, tmp._stops) # because that's what compact means + tmpparents = self.offsets2parents(tmp._starts.base) + + index = self.JaggedArray(tmp._starts, tmp._stops, (self.numpy.arange(tmp._stops[-1], dtype=self.INDEXTYPE) - tmp._starts[tmpparents])) + data = data.compact() - return self.copy(content=data._content[self.IndexedArray.invert((self.index + self._starts)._content)]) + return self.copy(content=data._content[self.IndexedArray.invert((index + self._starts)._content)]) elif isinstance(data, awkward.array.base.AwkwardArray): if len(self._starts) != len(data): diff --git a/awkward/version.py b/awkward/version.py index 7669c2fd..fdfe8e68 100644 --- a/awkward/version.py +++ b/awkward/version.py @@ -30,7 +30,7 @@ import re -__version__ = "0.8.9" +__version__ = "0.8.10" version = __version__ version_info = tuple(re.split(r"[-\.]", __version__)) From 574f2010954c4f547bbeb5065efd73b4234b55da Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Sun, 10 Mar 2019 09:06:39 +0100 Subject: [PATCH 2/2] introduce self.stops (cut to starts length) vs self._stops (raw) and always use self.stops to define counts --- awkward/array/jagged.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 292c3abc..b471a398 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -323,10 +323,13 @@ def starts(self, value): self._starts = value self._offsets, self._counts, self._parents = None, None, None self._isvalid = False - + @property def stops(self): - return self._stops + if len(self._stops) == len(self._starts): + return self._stops + else: + return self._stops[:len(self._starts)] @stops.setter def stops(self, value): @@ -386,7 +389,7 @@ def offsets(self, value): def counts(self): if self._counts is None: self._valid() - self._counts = self._stops - self._starts + self._counts = self.stops - self._starts return self._counts @counts.setter @@ -483,7 +486,7 @@ def _validstartsstops(cls, starts, stops): raise ValueError("starts must have the same (or shorter) length than stops") if starts.shape[1:] != stops.shape[1:]: raise ValueError("starts and stops must have the same dimensionality (shape[1:])") - if not (stops >= starts).all(): + if not (stops[:len(starts)] >= starts).all(): raise ValueError("stops must be greater than or equal to starts") def __iter__(self, checkiter=True): @@ -590,7 +593,7 @@ def __getitem__(self, where): stack.insert(0, node.counts) node = node.flatten() - counts = node._stops - node._starts + counts = node.stops - node._starts if head < 0: head = counts + head if not self.numpy.bitwise_and(0 <= head, head < counts).all(): @@ -605,7 +608,7 @@ def __getitem__(self, where): if nslices >= 2: raise NotImplementedError("this implementation cannot slice a JaggedArray in more than two dimensions") - counts = node._stops - node._starts + counts = node.stops - node._starts step = 1 if head.step is None else head.step if step == 0: @@ -851,7 +854,7 @@ def _tojagged(self, starts=None, stops=None, copy=True): if self.offsetsaliased(starts, stops): parents = self.offsets2parents(starts.base) elif len(starts.shape) == 1 and self.numpy.array_equal(starts[1:], stops[:-1]): - if len(self._stops) == 0: + if len(stops) == 0: offsets = self.numpy.array([0], dtype=self.INDEXTYPE) else: offsets = self.numpy.append(starts, stops[-1])