From 6204ed9fe047f8530902c6cfd8786ea5dc124271 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 19 Nov 2018 06:53:17 -0600 Subject: [PATCH 1/4] working for all step==1 --- awkward/array/jagged.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index c311e879..38e0541e 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -507,14 +507,16 @@ def __getitem__(self, where): elif head.start >= 0: starts = awkward.util.numpy.minimum(counts, head.start) else: - starts = awkward.util.numpy.minimum(counts, counts + head.start) + starts = awkward.util.numpy.maximum(0, awkward.util.numpy.minimum(counts, counts + head.start)) if head.stop is None: stops = counts elif head.stop >= 0: stops = awkward.util.numpy.minimum(counts, head.stop) else: - stops = awkward.util.numpy.minimum(counts, counts + head.stop) + stops = awkward.util.numpy.maximum(0, awkward.util.numpy.minimum(counts, counts + head.stop)) + + stops = awkward.util.numpy.maximum(starts, stops) else: if head.start is None: @@ -522,28 +524,32 @@ def __getitem__(self, where): elif head.start >= 0: starts = awkward.util.numpy.minimum(counts - 1, head.start) else: - starts = awkward.util.numpy.minimum(counts - 1, counts + head.start) + starts = awkward.util.numpy.maximum(-1, awkward.util.numpy.minimum(counts - 1, counts + head.start)) if head.stop is None: stops = awkward.util.numpy.full(counts.shape, -1, dtype=awkward.util.INDEXTYPE) elif head.stop >= 0: stops = awkward.util.numpy.minimum(counts - 1, head.stop) else: - stops = awkward.util.numpy.minimum(counts - 1, counts + head.stop) + stops = awkward.util.numpy.maximum(-1, awkward.util.numpy.minimum(counts - 1, counts + head.stop)) + + stops = awkward.util.numpy.minimum(starts, stops) if step > 0: start = starts.min() stop = stops.max() newcounts = stops - starts + else: + raise NotImplementedError - quotient, remainder = divmod(stop - start, step) - oversize = quotient + (1 if remainder != 0 else 0) - indexes = awkward.util.numpy.empty((len(node), oversize), dtype=awkward.util.INDEXTYPE) - indexes[:, :] = awkward.util.numpy.arange(start, stop, step) + quotient, remainder = divmod(stop - start, step) + oversize = quotient + (1 if remainder != 0 else 0) + indexes = awkward.util.numpy.empty((len(node), oversize), dtype=awkward.util.INDEXTYPE) + indexes[:, :] = awkward.util.numpy.arange(start, stop, step) - absindexes = indexes + node._starts.reshape((len(node), 1)) + absindexes = indexes + node._starts.reshape((len(node), 1)) - goodindexes = absindexes[awkward.util.numpy.bitwise_and(indexes >= starts.reshape((len(node), 1)), indexes < stops.reshape((len(node), 1)))] + goodindexes = absindexes[awkward.util.numpy.bitwise_and(indexes >= starts.reshape((len(node), 1)), indexes < stops.reshape((len(node), 1)))] newoffsets = counts2offsets(newcounts.reshape(-1)) node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=node.content[goodindexes]) From 06fe59c430c8efcf8cc687b26c48aba0689c79cc Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 19 Nov 2018 07:54:44 -0600 Subject: [PATCH 2/4] seems to be working for all cases (need unit tests) --- awkward/array/jagged.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 38e0541e..4df7314e 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -518,6 +518,16 @@ def __getitem__(self, where): stops = awkward.util.numpy.maximum(starts, stops) + start = starts.min() + stop = stops.max() + indexes = awkward.util.numpy.empty((len(node), abs(stop - start)), dtype=awkward.util.INDEXTYPE) + indexes[:, :] = awkward.util.numpy.arange(start, stop) + + mask = indexes >= starts.reshape((len(node), 1)) + awkward.util.numpy.bitwise_and(mask, indexes < stops.reshape((len(node), 1)), out=mask) + if step != 1: + awkward.util.numpy.bitwise_and(mask, awkward.util.numpy.remainder(indexes - starts.reshape((len(node), 1)), step) == 0, out=mask) + else: if head.start is None: starts = counts - 1 @@ -535,24 +545,21 @@ def __getitem__(self, where): stops = awkward.util.numpy.minimum(starts, stops) - if step > 0: - start = starts.min() - stop = stops.max() - newcounts = stops - starts - else: - raise NotImplementedError - - quotient, remainder = divmod(stop - start, step) - oversize = quotient + (1 if remainder != 0 else 0) - indexes = awkward.util.numpy.empty((len(node), oversize), dtype=awkward.util.INDEXTYPE) - indexes[:, :] = awkward.util.numpy.arange(start, stop, step) + start = starts.max() + stop = stops.min() + indexes = awkward.util.numpy.empty((len(node), abs(stop - start)), dtype=awkward.util.INDEXTYPE) + indexes[:, :] = awkward.util.numpy.arange(start, stop, -1) - absindexes = indexes + node._starts.reshape((len(node), 1)) - - goodindexes = absindexes[awkward.util.numpy.bitwise_and(indexes >= starts.reshape((len(node), 1)), indexes < stops.reshape((len(node), 1)))] + mask = indexes <= starts.reshape((len(node), 1)) + awkward.util.numpy.bitwise_and(mask, indexes > stops.reshape((len(node), 1)), out=mask) + if step != -1: + awkward.util.numpy.bitwise_and(mask, awkward.util.numpy.remainder(indexes - starts.reshape((len(node), 1)), step) == 0, out=mask) + newcounts = awkward.util.numpy.count_nonzero(mask, axis=1) newoffsets = counts2offsets(newcounts.reshape(-1)) - node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=node.content[goodindexes]) + newcontent = node.content[(indexes + node._starts.reshape((len(node), 1)))[mask]] + + node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=newcontent) else: # the other cases are possible, but complicated; the first sets the form From 1513ef6744e4e4f37e4561365cabbbf9e0ec5dcc Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 19 Nov 2018 08:55:58 -0600 Subject: [PATCH 3/4] jagged subslice tests are all good --- tests/test_jagged.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_jagged.py b/tests/test_jagged.py index 35b93bf1..fa2563fd 100644 --- a/tests/test_jagged.py +++ b/tests/test_jagged.py @@ -141,6 +141,13 @@ def test_jagged_fancy(self): a = JaggedArray([0, 3, 3, 5], [3, 3, 5, 10], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9]]) assert a[[1, 2]].tolist() == [[], [[3.3], [4.4]]] + def test_jagged_subslice(self): + a = JaggedArray.fromiter([[], [100, 101, 102], [200, 201, 202, 203], [300, 301, 302, 303, 304], [], [500, 501], [600], []]) + for start in None, 0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5, -6: + for stop in None, 0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5, -6: + for step in None, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5: + assert a[:, start:stop:step].tolist() == [x.tolist()[start:stop:step] for x in a] + def test_jagged_jagged(self): a = JaggedArray.fromoffsets([0, 3, 3, 5], JaggedArray.fromoffsets([0, 3, 3, 8, 10, 10], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) assert [a[i].tolist() for i in range(len(a))] == [[[0.0, 1.1, 2.2], [], [3.3, 4.4, 5.5, 6.6, 7.7]], [], [[8.8, 9.9], []]] From 1ca9dd3ad76d0afb18bec4935bc1cb963cf5eaf6 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 19 Nov 2018 10:30:04 -0600 Subject: [PATCH 4/4] implemented and fully tested jaggedarray.regular) --- awkward/array/jagged.py | 30 ++++++++++++++++++++++-------- tests/test_jagged.py | 25 +++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 4df7314e..35fc4377 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -193,6 +193,14 @@ def fromjagged(cls, jagged): jagged = jagged._tojagged(copy=False) return cls(jagged._starts, jagged._stops, jagged._content) + @classmethod + def fromregular(cls, content, size=1): + quotient = -(-len(content) // size) + offsets = awkward.util.numpy.arange(0, quotient * size + 1, size, dtype=awkward.util.INDEXTYPE) + if len(offsets) > 0: + offsets[-1] = len(content) + return cls.fromoffsets(offsets, content) + def copy(self, starts=None, stops=None, content=None): out = self.__class__.__new__(self.__class__) out._starts = self._starts @@ -745,6 +753,20 @@ def recurse(x): else: return awkward.array.objects.Methods.maybemixin(type(result), JaggedArray).fromcounts(counts, result) + def regular(self): + if len(self) > 0 and not (self.counts.reshape(-1)[0] == self.counts).all(): + raise ValueError("jagged array is not regular: different elements have different counts") + count = self.counts.reshape(-1)[0] + + if self._canuseoffset(): + out = self._content[self._starts[0]:self._stops[-1]] + return out.reshape(self._starts.shape + (count,) + self._content.shape[1:]) + + else: + indexes = awkward.util.numpy.repeat(self._starts, count).reshape(self._starts.shape + (count,)) + indexes += awkward.util.numpy.arange(count) + return self._content[indexes] + @staticmethod def aligned(*jaggedarrays): if not all(isinstance(x, JaggedArray) for x in jaggedarrays): @@ -1081,14 +1103,6 @@ def max(self): else: return self._minmax_general(False, False) - @classmethod - def regular(cls, content, size=1): - quotient = -(-len(content) // size) - offsets = awkward.util.numpy.arange(0, quotient * size + 1, size, dtype=awkward.util.INDEXTYPE) - if len(offsets) > 0: - offsets[-1] = len(content) - return cls.fromoffsets(offsets, content) - @classmethod def concat(cls, first, *rest): # all elements of first followed by all elements of second arrays = (first,) + rest diff --git a/tests/test_jagged.py b/tests/test_jagged.py index fa2563fd..d078c0c6 100644 --- a/tests/test_jagged.py +++ b/tests/test_jagged.py @@ -198,6 +198,31 @@ def test_jagged_ufunc_table(self): assert (a + numpy.array([100, 200, 300, 400])).tolist() == [[{"x": 100, "y": 100.0}, {"x": 101, "y": 101.1}, {"x": 102, "y": 102.2}], [], [{"x": 303, "y": 303.3}, {"x": 304, "y": 304.4}], [{"x": 405, "y": 405.5}, {"x": 406, "y": 406.6}, {"x": 407, "y": 407.7}, {"x": 408, "y": 408.8}, {"x": 409, "y": 409.9}]] assert (a + awkward.Table(x=[100, 200, 300, 400], y=[1000, 2000, 3000, 4000])).tolist() == [[{"x": 100, "y": 1000.0}, {"x": 101, "y": 1001.1}, {"x": 102, "y": 1002.2}], [], [{"x": 303, "y": 3003.3}, {"x": 304, "y": 3004.4}], [{"x": 405, "y": 4005.5}, {"x": 406, "y": 4006.6}, {"x": 407, "y": 4007.7}, {"x": 408, "y": 4008.8}, {"x": 409, "y": 4009.9}]] + def test_jagged_regular(self): + a = JaggedArray([0, 3, 6, 9], [3, 6, 9, 12], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0]) + assert a.regular().tolist() == [[0.0, 1.1, 2.2], [3.3, 4.4, 5.5], [6.6, 7.7, 8.8], [9.9, 10.0, 11.0]] + + a = JaggedArray([0, 3, 6, 9], [3, 6, 9, 12], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]]) + assert a.regular().tolist() == [[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]], [[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]] + + a = JaggedArray([[0, 3], [6, 9]], [[3, 6], [9, 12]], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0]) + assert a.regular().tolist() == [[[0.0, 1.1, 2.2], [3.3, 4.4, 5.5]], [[6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]] + + a = JaggedArray([[0, 3], [6, 9]], [[3, 6], [9, 12]], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]]) + assert a.regular().tolist() == [[[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]]], [[[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]] + + a = JaggedArray([0, 3, 7, 10], [3, 6, 10, 13], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 999, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0]) + assert a.regular().tolist() == [[0.0, 1.1, 2.2], [3.3, 4.4, 5.5], [6.6, 7.7, 8.8], [9.9, 10.0, 11.0]] + + a = JaggedArray([0, 3, 7, 10], [3, 6, 10, 13], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [999], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]]) + assert a.regular().tolist() == [[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]], [[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]] + + a = JaggedArray([[0, 3], [7, 10]], [[3, 6], [10, 13]], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 999, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0]) + assert a.regular().tolist() == [[[0.0, 1.1, 2.2], [3.3, 4.4, 5.5]], [[6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]] + + a = JaggedArray([[0, 3], [7, 10]], [[3, 6], [10, 13]], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [999], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]]) + assert a.regular().tolist() == [[[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]]], [[[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]] + def test_jagged_cross(self): pass