diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index c311e879..35fc4377 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -193,6 +193,14 @@ def fromjagged(cls, jagged): jagged = jagged._tojagged(copy=False) return cls(jagged._starts, jagged._stops, jagged._content) + @classmethod + def fromregular(cls, content, size=1): + quotient = -(-len(content) // size) + offsets = awkward.util.numpy.arange(0, quotient * size + 1, size, dtype=awkward.util.INDEXTYPE) + if len(offsets) > 0: + offsets[-1] = len(content) + return cls.fromoffsets(offsets, content) + def copy(self, starts=None, stops=None, content=None): out = self.__class__.__new__(self.__class__) out._starts = self._starts @@ -507,14 +515,26 @@ def __getitem__(self, where): elif head.start >= 0: starts = awkward.util.numpy.minimum(counts, head.start) else: - starts = awkward.util.numpy.minimum(counts, counts + head.start) + starts = awkward.util.numpy.maximum(0, awkward.util.numpy.minimum(counts, counts + head.start)) if head.stop is None: stops = counts elif head.stop >= 0: stops = awkward.util.numpy.minimum(counts, head.stop) else: - stops = awkward.util.numpy.minimum(counts, counts + head.stop) + stops = awkward.util.numpy.maximum(0, awkward.util.numpy.minimum(counts, counts + head.stop)) + + stops = awkward.util.numpy.maximum(starts, stops) + + start = starts.min() + stop = stops.max() + indexes = awkward.util.numpy.empty((len(node), abs(stop - start)), dtype=awkward.util.INDEXTYPE) + indexes[:, :] = awkward.util.numpy.arange(start, stop) + + mask = indexes >= starts.reshape((len(node), 1)) + awkward.util.numpy.bitwise_and(mask, indexes < stops.reshape((len(node), 1)), out=mask) + if step != 1: + awkward.util.numpy.bitwise_and(mask, awkward.util.numpy.remainder(indexes - starts.reshape((len(node), 1)), step) == 0, out=mask) else: if head.start is None: @@ -522,31 +542,32 @@ def __getitem__(self, where): elif head.start >= 0: starts = awkward.util.numpy.minimum(counts - 1, head.start) else: - starts = awkward.util.numpy.minimum(counts - 1, counts + head.start) + starts = awkward.util.numpy.maximum(-1, awkward.util.numpy.minimum(counts - 1, counts + head.start)) if head.stop is None: stops = awkward.util.numpy.full(counts.shape, -1, dtype=awkward.util.INDEXTYPE) elif head.stop >= 0: stops = awkward.util.numpy.minimum(counts - 1, head.stop) else: - stops = awkward.util.numpy.minimum(counts - 1, counts + head.stop) - - if step > 0: - start = starts.min() - stop = stops.max() - newcounts = stops - starts + stops = awkward.util.numpy.maximum(-1, awkward.util.numpy.minimum(counts - 1, counts + head.stop)) - quotient, remainder = divmod(stop - start, step) - oversize = quotient + (1 if remainder != 0 else 0) - indexes = awkward.util.numpy.empty((len(node), oversize), dtype=awkward.util.INDEXTYPE) - indexes[:, :] = awkward.util.numpy.arange(start, stop, step) + stops = awkward.util.numpy.minimum(starts, stops) - absindexes = indexes + node._starts.reshape((len(node), 1)) + start = starts.max() + stop = stops.min() + indexes = awkward.util.numpy.empty((len(node), abs(stop - start)), dtype=awkward.util.INDEXTYPE) + indexes[:, :] = awkward.util.numpy.arange(start, stop, -1) - goodindexes = absindexes[awkward.util.numpy.bitwise_and(indexes >= starts.reshape((len(node), 1)), indexes < stops.reshape((len(node), 1)))] + mask = indexes <= starts.reshape((len(node), 1)) + awkward.util.numpy.bitwise_and(mask, indexes > stops.reshape((len(node), 1)), out=mask) + if step != -1: + awkward.util.numpy.bitwise_and(mask, awkward.util.numpy.remainder(indexes - starts.reshape((len(node), 1)), step) == 0, out=mask) + newcounts = awkward.util.numpy.count_nonzero(mask, axis=1) newoffsets = counts2offsets(newcounts.reshape(-1)) - node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=node.content[goodindexes]) + newcontent = node.content[(indexes + node._starts.reshape((len(node), 1)))[mask]] + + node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=newcontent) else: # the other cases are possible, but complicated; the first sets the form @@ -732,6 +753,20 @@ def recurse(x): else: return awkward.array.objects.Methods.maybemixin(type(result), JaggedArray).fromcounts(counts, result) + def regular(self): + if len(self) > 0 and not (self.counts.reshape(-1)[0] == self.counts).all(): + raise ValueError("jagged array is not regular: different elements have different counts") + count = self.counts.reshape(-1)[0] + + if self._canuseoffset(): + out = self._content[self._starts[0]:self._stops[-1]] + return out.reshape(self._starts.shape + (count,) + self._content.shape[1:]) + + else: + indexes = awkward.util.numpy.repeat(self._starts, count).reshape(self._starts.shape + (count,)) + indexes += awkward.util.numpy.arange(count) + return self._content[indexes] + @staticmethod def aligned(*jaggedarrays): if not all(isinstance(x, JaggedArray) for x in jaggedarrays): @@ -1068,14 +1103,6 @@ def max(self): else: return self._minmax_general(False, False) - @classmethod - def regular(cls, content, size=1): - quotient = -(-len(content) // size) - offsets = awkward.util.numpy.arange(0, quotient * size + 1, size, dtype=awkward.util.INDEXTYPE) - if len(offsets) > 0: - offsets[-1] = len(content) - return cls.fromoffsets(offsets, content) - @classmethod def concat(cls, first, *rest): # all elements of first followed by all elements of second arrays = (first,) + rest diff --git a/tests/test_jagged.py b/tests/test_jagged.py index 35b93bf1..d078c0c6 100644 --- a/tests/test_jagged.py +++ b/tests/test_jagged.py @@ -141,6 +141,13 @@ def test_jagged_fancy(self): a = JaggedArray([0, 3, 3, 5], [3, 3, 5, 10], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9]]) assert a[[1, 2]].tolist() == [[], [[3.3], [4.4]]] + def test_jagged_subslice(self): + a = JaggedArray.fromiter([[], [100, 101, 102], [200, 201, 202, 203], [300, 301, 302, 303, 304], [], [500, 501], [600], []]) + for start in None, 0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5, -6: + for stop in None, 0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5, -6: + for step in None, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5: + assert a[:, start:stop:step].tolist() == [x.tolist()[start:stop:step] for x in a] + def test_jagged_jagged(self): a = JaggedArray.fromoffsets([0, 3, 3, 5], JaggedArray.fromoffsets([0, 3, 3, 8, 10, 10], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) assert [a[i].tolist() for i in range(len(a))] == [[[0.0, 1.1, 2.2], [], [3.3, 4.4, 5.5, 6.6, 7.7]], [], [[8.8, 9.9], []]] @@ -191,6 +198,31 @@ def test_jagged_ufunc_table(self): assert (a + numpy.array([100, 200, 300, 400])).tolist() == [[{"x": 100, "y": 100.0}, {"x": 101, "y": 101.1}, {"x": 102, "y": 102.2}], [], [{"x": 303, "y": 303.3}, {"x": 304, "y": 304.4}], [{"x": 405, "y": 405.5}, {"x": 406, "y": 406.6}, {"x": 407, "y": 407.7}, {"x": 408, "y": 408.8}, {"x": 409, "y": 409.9}]] assert (a + awkward.Table(x=[100, 200, 300, 400], y=[1000, 2000, 3000, 4000])).tolist() == [[{"x": 100, "y": 1000.0}, {"x": 101, "y": 1001.1}, {"x": 102, "y": 1002.2}], [], [{"x": 303, "y": 3003.3}, {"x": 304, "y": 3004.4}], [{"x": 405, "y": 4005.5}, {"x": 406, "y": 4006.6}, {"x": 407, "y": 4007.7}, {"x": 408, "y": 4008.8}, {"x": 409, "y": 4009.9}]] + def test_jagged_regular(self): + a = JaggedArray([0, 3, 6, 9], [3, 6, 9, 12], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0]) + assert a.regular().tolist() == [[0.0, 1.1, 2.2], [3.3, 4.4, 5.5], [6.6, 7.7, 8.8], [9.9, 10.0, 11.0]] + + a = JaggedArray([0, 3, 6, 9], [3, 6, 9, 12], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]]) + assert a.regular().tolist() == [[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]], [[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]] + + a = JaggedArray([[0, 3], [6, 9]], [[3, 6], [9, 12]], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0]) + assert a.regular().tolist() == [[[0.0, 1.1, 2.2], [3.3, 4.4, 5.5]], [[6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]] + + a = JaggedArray([[0, 3], [6, 9]], [[3, 6], [9, 12]], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]]) + assert a.regular().tolist() == [[[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]]], [[[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]] + + a = JaggedArray([0, 3, 7, 10], [3, 6, 10, 13], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 999, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0]) + assert a.regular().tolist() == [[0.0, 1.1, 2.2], [3.3, 4.4, 5.5], [6.6, 7.7, 8.8], [9.9, 10.0, 11.0]] + + a = JaggedArray([0, 3, 7, 10], [3, 6, 10, 13], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [999], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]]) + assert a.regular().tolist() == [[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]], [[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]] + + a = JaggedArray([[0, 3], [7, 10]], [[3, 6], [10, 13]], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 999, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0]) + assert a.regular().tolist() == [[[0.0, 1.1, 2.2], [3.3, 4.4, 5.5]], [[6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]] + + a = JaggedArray([[0, 3], [7, 10]], [[3, 6], [10, 13]], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [999], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]]) + assert a.regular().tolist() == [[[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]]], [[[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]] + def test_jagged_cross(self): pass