Merge pull request #34 from scikit-hep/issue-31

Resolves issue #31
scikit-hep · Nov 19, 2018 · 99a71c0 · 99a71c0
2 parents 72070b1 + 1ca9dd3
commit 99a71c0
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 24 deletions.
diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py
@@ -193,6 +193,14 @@ def fromjagged(cls, jagged):
         jagged = jagged._tojagged(copy=False)
         return cls(jagged._starts, jagged._stops, jagged._content)
 
+    @classmethod
+    def fromregular(cls, content, size=1):
+        quotient = -(-len(content) // size)
+        offsets = awkward.util.numpy.arange(0, quotient * size + 1, size, dtype=awkward.util.INDEXTYPE)
+        if len(offsets) > 0:
+            offsets[-1] = len(content)
+        return cls.fromoffsets(offsets, content)
+
     def copy(self, starts=None, stops=None, content=None):
         out = self.__class__.__new__(self.__class__)
         out._starts  = self._starts
@@ -507,46 +515,59 @@ def __getitem__(self, where):
                     elif head.start >= 0:
                         starts = awkward.util.numpy.minimum(counts, head.start)
                     else:
-                        starts = awkward.util.numpy.minimum(counts, counts + head.start)
+                        starts = awkward.util.numpy.maximum(0, awkward.util.numpy.minimum(counts, counts + head.start))
 
                     if head.stop is None:
                         stops = counts
                     elif head.stop >= 0:
                         stops = awkward.util.numpy.minimum(counts, head.stop)
                     else:
-                        stops = awkward.util.numpy.minimum(counts, counts + head.stop)
+                        stops = awkward.util.numpy.maximum(0, awkward.util.numpy.minimum(counts, counts + head.stop))
+
+                    stops = awkward.util.numpy.maximum(starts, stops)
+
+                    start = starts.min()
+                    stop = stops.max()
+                    indexes = awkward.util.numpy.empty((len(node), abs(stop - start)), dtype=awkward.util.INDEXTYPE)
+                    indexes[:, :] = awkward.util.numpy.arange(start, stop)
+
+                    mask = indexes >= starts.reshape((len(node), 1))
+                    awkward.util.numpy.bitwise_and(mask, indexes < stops.reshape((len(node), 1)), out=mask)
+                    if step != 1:
+                        awkward.util.numpy.bitwise_and(mask, awkward.util.numpy.remainder(indexes - starts.reshape((len(node), 1)), step) == 0, out=mask)
 
                 else:
                     if head.start is None:
                         starts = counts - 1
                     elif head.start >= 0:
                         starts = awkward.util.numpy.minimum(counts - 1, head.start)
                     else:
-                        starts = awkward.util.numpy.minimum(counts - 1, counts + head.start)
+                        starts = awkward.util.numpy.maximum(-1, awkward.util.numpy.minimum(counts - 1, counts + head.start))
 
                     if head.stop is None:
                         stops = awkward.util.numpy.full(counts.shape, -1, dtype=awkward.util.INDEXTYPE)
                     elif head.stop >= 0:
                         stops = awkward.util.numpy.minimum(counts - 1, head.stop)
                     else:
-                        stops = awkward.util.numpy.minimum(counts - 1, counts + head.stop)
-
-                if step > 0:
-                    start = starts.min()
-                    stop = stops.max()
-                    newcounts = stops - starts
+                        stops = awkward.util.numpy.maximum(-1, awkward.util.numpy.minimum(counts - 1, counts + head.stop))
 
-                    quotient, remainder = divmod(stop - start, step)
-                    oversize = quotient + (1 if remainder != 0 else 0)
-                    indexes = awkward.util.numpy.empty((len(node), oversize), dtype=awkward.util.INDEXTYPE)
-                    indexes[:, :] = awkward.util.numpy.arange(start, stop, step)
+                    stops = awkward.util.numpy.minimum(starts, stops)
 
-                    absindexes = indexes + node._starts.reshape((len(node), 1))
+                    start = starts.max()
+                    stop = stops.min()
+                    indexes = awkward.util.numpy.empty((len(node), abs(stop - start)), dtype=awkward.util.INDEXTYPE)
+                    indexes[:, :] = awkward.util.numpy.arange(start, stop, -1)
 
-                    goodindexes = absindexes[awkward.util.numpy.bitwise_and(indexes >= starts.reshape((len(node), 1)), indexes < stops.reshape((len(node), 1)))]
+                    mask = indexes <= starts.reshape((len(node), 1))
+                    awkward.util.numpy.bitwise_and(mask, indexes > stops.reshape((len(node), 1)), out=mask)
+                    if step != -1:
+                        awkward.util.numpy.bitwise_and(mask, awkward.util.numpy.remainder(indexes - starts.reshape((len(node), 1)), step) == 0, out=mask)
 
+                newcounts = awkward.util.numpy.count_nonzero(mask, axis=1)
                 newoffsets = counts2offsets(newcounts.reshape(-1))
-                node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=node.content[goodindexes])
+                newcontent = node.content[(indexes + node._starts.reshape((len(node), 1)))[mask]]
+
+                node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=newcontent)
 
             else:
                 # the other cases are possible, but complicated; the first sets the form
@@ -732,6 +753,20 @@ def recurse(x):
         else:
             return awkward.array.objects.Methods.maybemixin(type(result), JaggedArray).fromcounts(counts, result)
 
+    def regular(self):
+        if len(self) > 0 and not (self.counts.reshape(-1)[0] == self.counts).all():
+            raise ValueError("jagged array is not regular: different elements have different counts")
+        count = self.counts.reshape(-1)[0]
+
+        if self._canuseoffset():
+            out = self._content[self._starts[0]:self._stops[-1]]
+            return out.reshape(self._starts.shape + (count,) + self._content.shape[1:])
+
+        else:
+            indexes = awkward.util.numpy.repeat(self._starts, count).reshape(self._starts.shape + (count,))
+            indexes += awkward.util.numpy.arange(count)
+            return self._content[indexes]
+
     @staticmethod
     def aligned(*jaggedarrays):
         if not all(isinstance(x, JaggedArray) for x in jaggedarrays):
@@ -1068,14 +1103,6 @@ def max(self):
         else:
             return self._minmax_general(False, False)
 
-    @classmethod
-    def regular(cls, content, size=1):
-        quotient = -(-len(content) // size)
-        offsets = awkward.util.numpy.arange(0, quotient * size + 1, size, dtype=awkward.util.INDEXTYPE)
-        if len(offsets) > 0:
-            offsets[-1] = len(content)
-        return cls.fromoffsets(offsets, content)
-
     @classmethod
     def concat(cls, first, *rest):    # all elements of first followed by all elements of second
         arrays = (first,) + rest

diff --git a/tests/test_jagged.py b/tests/test_jagged.py
@@ -141,6 +141,13 @@ def test_jagged_fancy(self):
         a = JaggedArray([0, 3, 3, 5], [3, 3, 5, 10], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9]])
         assert a[[1, 2]].tolist() == [[], [[3.3], [4.4]]]
 
+    def test_jagged_subslice(self):
+        a = JaggedArray.fromiter([[], [100, 101, 102], [200, 201, 202, 203], [300, 301, 302, 303, 304], [], [500, 501], [600], []])
+        for start in None, 0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5, -6:
+            for stop in None, 0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5, -6:
+                for step in None, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5:
+                    assert a[:, start:stop:step].tolist() == [x.tolist()[start:stop:step] for x in a]
+
     def test_jagged_jagged(self):
         a = JaggedArray.fromoffsets([0, 3, 3, 5], JaggedArray.fromoffsets([0, 3, 3, 8, 10, 10], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]))
         assert [a[i].tolist() for i in range(len(a))] == [[[0.0, 1.1, 2.2], [], [3.3, 4.4, 5.5, 6.6, 7.7]], [], [[8.8, 9.9], []]]
@@ -191,6 +198,31 @@ def test_jagged_ufunc_table(self):
         assert (a + numpy.array([100, 200, 300, 400])).tolist() == [[{"x": 100, "y": 100.0}, {"x": 101, "y": 101.1}, {"x": 102, "y": 102.2}], [], [{"x": 303, "y": 303.3}, {"x": 304, "y": 304.4}], [{"x": 405, "y": 405.5}, {"x": 406, "y": 406.6}, {"x": 407, "y": 407.7}, {"x": 408, "y": 408.8}, {"x": 409, "y": 409.9}]]
         assert (a + awkward.Table(x=[100, 200, 300, 400], y=[1000, 2000, 3000, 4000])).tolist() == [[{"x": 100, "y": 1000.0}, {"x": 101, "y": 1001.1}, {"x": 102, "y": 1002.2}], [], [{"x": 303, "y": 3003.3}, {"x": 304, "y": 3004.4}], [{"x": 405, "y": 4005.5}, {"x": 406, "y": 4006.6}, {"x": 407, "y": 4007.7}, {"x": 408, "y": 4008.8}, {"x": 409, "y": 4009.9}]]
 
+    def test_jagged_regular(self):
+        a = JaggedArray([0, 3, 6, 9], [3, 6, 9, 12], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0])
+        assert a.regular().tolist() == [[0.0, 1.1, 2.2], [3.3, 4.4, 5.5], [6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]
+
+        a = JaggedArray([0, 3, 6, 9], [3, 6, 9, 12], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]])
+        assert a.regular().tolist() == [[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]], [[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]
+
+        a = JaggedArray([[0, 3], [6, 9]], [[3, 6], [9, 12]], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0])
+        assert a.regular().tolist() == [[[0.0, 1.1, 2.2], [3.3, 4.4, 5.5]], [[6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]]
+
+        a = JaggedArray([[0, 3], [6, 9]], [[3, 6], [9, 12]], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]])
+        assert a.regular().tolist() == [[[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]]], [[[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]]
+
+        a = JaggedArray([0, 3, 7, 10], [3, 6, 10, 13], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 999, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0])
+        assert a.regular().tolist() == [[0.0, 1.1, 2.2], [3.3, 4.4, 5.5], [6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]
+
+        a = JaggedArray([0, 3, 7, 10], [3, 6, 10, 13], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [999], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]])
+        assert a.regular().tolist() == [[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]], [[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]
+
+        a = JaggedArray([[0, 3], [7, 10]], [[3, 6], [10, 13]], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 999, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0])
+        assert a.regular().tolist() == [[[0.0, 1.1, 2.2], [3.3, 4.4, 5.5]], [[6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]]
+
+        a = JaggedArray([[0, 3], [7, 10]], [[3, 6], [10, 13]], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [999], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]])
+        assert a.regular().tolist() == [[[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]]], [[[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]]
+
     def test_jagged_cross(self):
         pass