diff --git a/awkward/__init__.py b/awkward/__init__.py index 5002532a..406171a6 100644 --- a/awkward/__init__.py +++ b/awkward/__init__.py @@ -29,11 +29,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from awkward.array.chunked import ChunkedArray, AppendableArray -from awkward.array.indexed import IndexedArray, ByteIndexedArray, IndexedMaskedArray +from awkward.array.indexed import IndexedArray, ByteIndexedArray, SparseArray from awkward.array.jagged import JaggedArray, ByteJaggedArray -from awkward.array.masked import MaskedArray, BitMaskedArray +from awkward.array.masked import MaskedArray, BitMaskedArray, IndexedMaskedArray from awkward.array.objects import Methods, ObjectArray -from awkward.array.sparse import SparseArray from awkward.array.table import Table from awkward.array.union import UnionArray from awkward.array.virtual import VirtualArray @@ -43,4 +42,4 @@ # convenient access to the version number from awkward.version import __version__ -__all__ = ["ChunkedArray", "AppendableArray", "IndexedArray", "ByteIndexedArray", "IndexedMaskedArray", "JaggedArray", "ByteJaggedArray", "MaskedArray", "BitMaskedArray", "Methods", "ObjectArray", "SparseArray", "Table", "UnionArray", "VirtualArray", "fromiter", "__version__"] +__all__ = ["ChunkedArray", "AppendableArray", "IndexedArray", "ByteIndexedArray", "SparseArray", "JaggedArray", "ByteJaggedArray", "MaskedArray", "BitMaskedArray", "IndexedMaskedArray", "Methods", "ObjectArray", "Table", "UnionArray", "VirtualArray", "fromiter", "__version__"] diff --git a/awkward/array/base.py b/awkward/array/base.py index fa9e8202..52a53eaa 100644 --- a/awkward/array/base.py +++ b/awkward/array/base.py @@ -28,11 +28,15 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import types + import awkward.util class AwkwardArray(awkward.util.NDArrayOperatorsMixin): def __array__(self, *args, **kwargs): - raise Exception("{0} {1}".format(args, kwargs)) + # hitting this function is usually undesirable; uncomment to search for performance bugs + # raise Exception("{0} {1}".format(args, kwargs)) + return awkward.util.numpy.array(self, *args, **kwargs) def __iter__(self): for i in range(len(self)): @@ -65,8 +69,8 @@ def __bool__(self): __nonzero__ = __bool__ @property - def jshape(self): - return self.type.jshape + def size(self): + return len(self) def tolist(self): import awkward.array.table @@ -80,6 +84,9 @@ def tolist(self): out.append(self._try_tolist(x)) return out + def _valid(self): + pass + def valid(self): try: self._valid() @@ -88,6 +95,40 @@ def valid(self): else: return True + def _argfields(self, function): + if not isinstance(function, types.FunctionType): + raise TypeError("function (or lambda) required") + + if (isinstance(function, types.FunctionType) and function.__code__.co_argcount == 1) or isinstance(self._content, awkward.util.numpy.ndarray): + return None, None + + required = function.__code__.co_varnames[:function.__code__.co_argcount] + has_varargs = (function.__code__.co_flags & 0x04) != 0 + has_kwargs = (function.__code__.co_flags & 0x08) != 0 + + args = [] + kwargs = {} + + order = self.columns + + for i, n in enumerate(required): + if n in self._content: + args.append(n) + elif str(i) in self._content: + args.append(str(i)) + else: + args.append(order[i]) + + if has_varargs: + while str(i) in self._content: + args.append(str(i)) + i += 1 + + if has_kwargs: + kwargs = [n for n in self._content if n not in required] + + return args, kwargs + def apply(self, function): args, kwargs = self._argfields(function) if args is None and kwargs is None: @@ -123,3 +164,46 @@ def minby(self, function): args = tuple(self[n] for n in args) kwargs = dict((n, self[n]) for n in kwargs) return self[function(*args, **kwargs).argmin()] + +class AwkwardArrayWithContent(AwkwardArray): + def __setitem__(self, where, what): + if isinstance(where, awkward.util.string): + self._content[where] = what + + elif awkward.util.isstringslice(where): + if len(where) != len(what): + raise ValueError("number of keys ({0}) does not match number of provided arrays ({1})".format(len(where), len(what))) + for x, y in zip(where, what): + self._content[x] = y + + else: + raise TypeError("invalid index for assigning column to Table: {0}".format(where)) + + def __delitem__(self, where): + if isinstance(where, awkward.util.string): + del self._content[where] + elif awkward.util.isstringslice(where): + for x in where: + del self._content[x] + else: + raise TypeError("invalid index for removing column from Table: {0}".format(where)) + + @property + def base(self): + if isinstance(self._content, awkward.util.numpy.ndarray): + raise TypeError("array has no Table, and hence no base") + return self._content.base + + @property + def columns(self): + if isinstance(self._content, awkward.util.numpy.ndarray): + return [] + else: + return self._content.columns + + @property + def allcolumns(self): + if isinstance(self._content, awkward.util.numpy.ndarray): + return [] + else: + return self._content.allcolumns diff --git a/awkward/array/chunked.py b/awkward/array/chunked.py index 8ac4b289..7ab70eb6 100644 --- a/awkward/array/chunked.py +++ b/awkward/array/chunked.py @@ -45,6 +45,7 @@ def copy(self, chunks=None, counts=None): out._offsets = self._offsets if chunks is not None: out.chunks = chunks + out._counts = [] if counts is not None: out.counts = counts return out @@ -54,20 +55,26 @@ def deepcopy(self, chunks=None, counts=None): out._chunks = [awkward.util.deepcopy(out._chunks) for x in out._chunks] return out + def _mine(self, overrides): + return {} + def empty_like(self, **overrides): self.knowcounts() self._valid() - return self.copy([awkward.util.numpy.empty_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.empty_like(**overrides) for x in self._chunks], counts=list(self._counts)) + mine = self._mine(overrides) + return self.copy([awkward.util.numpy.empty_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.empty_like(**overrides) for x in self._chunks], counts=list(self._counts), **mine) def zeros_like(self, **overrides): self.knowcounts() self._valid() - return self.copy([awkward.util.numpy.zeros_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.zeros_like(**overrides) for x in self._chunks], counts=list(self._counts)) + mine = self._mine(overrides) + return self.copy([awkward.util.numpy.zeros_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.zeros_like(**overrides) for x in self._chunks], counts=list(self._counts), **mine) def ones_like(self, **overrides): self.knowcounts() self._valid() - return self.copy([awkward.util.numpy.ones_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.ones_like(**overrides) for x in self._chunks], counts=list(self._counts)) + mine = self._mine(overrides) + return self.copy([awkward.util.numpy.ones_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.ones_like(**overrides) for x in self._chunks], counts=list(self._counts), **mine) @property def chunks(self): @@ -79,7 +86,6 @@ def chunks(self, value): iter(value) except TypeError: raise TypeError("chunks must be iterable") - self._chunks = [awkward.util.toarray(x, awkward.util.DEFAULTTYPE) for x in value] self._types = [None] * len(self._chunks) @@ -116,13 +122,13 @@ def knowcounts(self, until=None): if until is None: until = len(self._chunks) if not 0 <= until <= len(self._chunks): - raise IndexError("cannot knowcounts until chunkid {0} with {1} chunks".format(until, len(self._chunks))) + raise ValueError("cannot knowcounts until chunkid {0} with {1} chunks".format(until, len(self._chunks))) for i in range(len(self._counts), until): self._counts.append(len(self._chunks[i])) def knowtype(self, at): if not 0 <= at < len(self._chunks): - raise IndexError("cannot knowtype at chunkid {0} with {1} chunks".format(at, len(self._chunks))) + raise ValueError("cannot knowtype at chunkid {0} with {1} chunks".format(at, len(self._chunks))) tpe = awkward.type.fromarray(self._chunks[at]) if tpe.takes == 0: self._types[at] = () @@ -248,33 +254,24 @@ def _slices(self): offsets = self.offsets return [slice(start, stop) for start, stop in zip(offsets[:-1], offsets[1:])] - @property - def base(self): - raise TypeError("ChunkedArray has no base") - def _valid(self): if len(self._counts) > len(self._chunks): raise ValueError("ChunkArray has more counts than chunks") + for i, count in enumerate(self._counts): + if count != len(self._chunks[i]): + raise ValueError("count[{0}] does not agree with len(chunk[{0}])".format(i)) tpe = self._type() for i in range(len(self._types)): if self._types[i] is None or self._types[i] is () or self._types[i] is tpe.to: pass - elif self._types[i] == tpe.to: - self._types[i] = tpe.to + elif self._types[i] == tpe.to: # valid if all chunks have the same high-level type + self._types[i] = tpe.to # once checked, make them identically equal for faster checking next time else: raise TypeError("chunks do not have matching types:\n\n{0}\n\nversus\n\n{1}".format(tpe.to.__str__(indent=" "), self._types[i].__str__(indent=" "))) return tpe - def _argfields(self, function): - if isinstance(function, types.FunctionType) and function.__code__.co_argcount == 1: - return awkward.util._argfields(function) - if len(self._chunks) == 0 or isinstance(self.type.to, awkward.util.numpy.dtype): - return awkward.util._argfields(function) - else: - return self._chunks[0]._argfields(function) - def __str__(self): if self.countsknown: return super(ChunkedArray, self).__str__() @@ -289,7 +286,7 @@ def __iter__(self): for i, chunk in enumerate(self._chunks): if i >= len(self._counts): self._counts.append(len(chunk)) - for x in chunk: + for x in chunk[:self._counts[i]]: yield x def __array__(self, *args, **kwargs): @@ -499,9 +496,20 @@ def __setitem__(self, where, what): mine[where] = theirs else: raise ValueError("only ChunkedArrays with the same chunk sizes can be assigned to columns of a ChunkedArray") - + + def __delitem__(self, where): + if isinstance(where, awkward.util.string): + for chunk in self._chunks: + del chunk[where] + elif awkward.util.isstringslice(where): + for chunk in self._chunks: + for x in where: + del chunk[x] + else: + raise TypeError("invalid index for removing column from Table: {0}".format(where)) + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - self._valid() + import awkward.array.objects if method != "__call__": return NotImplemented @@ -510,6 +518,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): rest = [] for x in inputs: if isinstance(x, ChunkedArray): + x._valid() if first is None: first = x else: @@ -535,6 +544,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): out = None chunks = {} + types = {} for batch in batches: result = getattr(ufunc, method)(*batch, **kwargs) @@ -542,30 +552,32 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if out is None: out = list(result) for i, x in enumerate(result): - if isinstance(x, (awkward.util.numpy.ndarray, awkward.array.base.AwkwardBase)): + if isinstance(x, (awkward.util.numpy.ndarray, awkward.array.base.AwkwardArray)): if i not in chunks: chunks[i] = [] chunks[i].append(x) + types[i] = type(x) elif method == "at": pass else: - if isinstance(result, (awkward.util.numpy.ndarray, awkward.array.base.AwkwardBase)): + if isinstance(result, (awkward.util.numpy.ndarray, awkward.array.base.AwkwardArray)): if None not in chunks: chunks[None] = [] chunks[None].append(result) + types[None] = type(result) - if out is None: - if None in chunks: - return ChunkedArray(chunks[None]) - else: - return None + if out is None: + if None in chunks: + return awkward.array.objects.Methods.maybemixin(types[None], ChunkedArray)(chunks[None]) else: - for i in range(len(out)): - if i in chunks: - out[i] = ChunkedArray(chunks[i]) - return tuple(out) + return None + else: + for i in range(len(out)): + if i in chunks: + out[i] = awkward.array.objects.Methods.maybemixin(types[i], ChunkedArray)(chunks[i]) + return tuple(out) def any(self): return any(x.any() for x in self._chunks) @@ -577,79 +589,147 @@ def all(self): def concat(cls, first, *rest): raise NotImplementedError + @property + def base(self): + raise TypeError("ChunkedArray has no base") + @property def columns(self): - if len(self._chunks) == 0 or isinstance(self._chunks[0], awkward.util.numpy.ndarray): - raise TypeError("array has no Table, and hence no columns") - return self._chunks[0].columns + if len(self._chunks) == 0 or isinstance(self.type.to, awkward.util.numpy.dtype): + return [] + else: + return self._chunks[0].columns @property def allcolumns(self): - if len(self._chunks) == 0 or isinstance(self._chunks[0], awkward.util.numpy.ndarray): - raise TypeError("array has no Table, and hence no columns") - return self._chunks[0].allcolumns + if len(self._chunks) == 0 or isinstance(self.type.to, awkward.util.numpy.dtype): + return [] + else: + return self._chunks[0].allcolumns def pandas(self): raise NotImplementedError class AppendableArray(ChunkedArray): - pass - -# class AppendableArray(ChunkedArray): -# @classmethod -# def empty(cls, generator): -# return AppendableArray([0], [], generator) - -# def __init__(self, offsets, chunks, generator): -# super(AppendableArray, self).__init__(offsets, chunks) -# self.generator = generator - -# @property -# def offsets(self): -# return self._offsets - -# @offsets.setter -# def offsets(self, value): -# self._offsets = list(value) - -# @property -# def generator(self): -# return self._generator - -# @generator.setter -# def generator(self, value): -# if not callable(value): -# raise TypeError("generator must be a callable (of zero arguments)") -# self._generator = value - -# def append(self, value): -# if len(self._offsets) != len(self._chunks) + 1: -# raise ValueError("length of offsets {0} must be equal to length of chunks {1} plus one ({2})".format(len(self._offsets), len(self._chunks), len(self._chunks) + 1)) - -# if len(self._chunks) == 0 or self._offsets[-1] - self._offsets[-2] == len(self._chunks[-1]): -# self._chunks.append(self._generator()) -# self._offsets.append(self._offsets[-1]) - -# laststart = self._offsets[-1] - self._offsets[-2] -# self._chunks[-1][laststart] = value -# self._offsets[-1] += 1 - -# def extend(self, values): -# if len(self._offsets) != len(self._chunks) + 1: -# raise ValueError("length of offsets {0} must be equal to length of chunks {1} plus one ({2})".format(len(self._offsets), len(self._chunks), len(self._chunks) + 1)) - -# while len(values) > 0: -# if len(self._chunks) == 0 or self._offsets[-1] - self._offsets[-2] >= len(self._chunks[-1]): -# self._chunks.append(self._generator()) -# self._offsets.append(self._offsets[-1]) - -# laststart = self._offsets[-1] - self._offsets[-2] -# available = len(self._chunks[-1]) - laststart -# if len(values) < available: -# self._chunks[-1][laststart : laststart + len(values)] = values -# self._offsets[-1] += len(values) -# values = [] -# else: -# self._chunks[-1][laststart:] = values[:available] -# self._offsets[-1] += available -# values = values[available:] + def __init__(self, chunkshape, dtype, chunks=[]): + self.chunkshape = chunkshape + self.dtype = dtype + self.chunks = chunks + + def copy(self, chunkshape=None, dtype=None, chunks=None): + out = self.__class__.__new__(self.__class__) + out._chunkshape = chunkshape + out._dtype = dtype + out._chunks = list(self._chunks) + out._counts = list(self._counts) + out._types = list(self._types) + if chunks is not None: + out.chunks = chunks + return out + + def _mine(self, overrides): + mine = {} + mine["chunkshape"] = overrides.pop("chunkshape", self._chunkshape) + mine["dtype"] = overrides.pop("dtype", self._dtype) + return mine + + @property + def chunkshape(self): + return self._chunkshape + + @chunkshape.setter + def chunkshape(self, value): + if isinstance(value, awkward.util.integer) and value > 0: + self._chunkshape = (value,) + else: + try: + for x in value: + assert isinstance(x, awkward.util.integer) and value > 0 + except TypeError: + raise TypeError("chunkshape must be an integer or a tuple of integers") + except AssertionError: + raise ValueError("chunkshape must be a positive integer or tuple of positive integers") + else: + self._chunkshape = tuple(value) + + @property + def dtype(self): + return self._dtype + + @dtype.setter + def dtype(self, value): + self._dtype = awkward.util.numpy.dtype(value) + + @property + def chunks(self): + return self._chunks + + @chunks.setter + def chunks(self, value): + try: + iter(value) + except TypeError: + raise TypeError("chunks must be iterable") + chunks = [awkward.util.toarray(x, awkward.util.DEFAULTTYPE, awkward.util.numpy.ndarray) for x in value] + for chunk in chunks: + if chunk.dtype != self._dtype: + raise ValueError("cannot assign chunk with dtype ({0}) to an AppendableArray with dtype ({1})".format(chunk.dtype, self._dtype)) + if chunk.shape[1:] != self._chunkshape[1:]: + raise ValueError("cannot assign chunk with dimensionality ({0}) to an AppendableArray with dimensionality ({1}), where dimensionality is shape[1:]".format(chunk.shape[1:], self._chunkshape[1:])) + self._chunks = chunks + self._counts = [len(x) for x in self._chunks] + self._types = [None] * len(self._chunks) + + @property + def counts(self): + return self._counts + + @counts.setter + def counts(self, value): + raise AttributeError("cannot assign to counts in AppendableArray") + + def knowcounts(self, until=None): + pass + + @property + def offsets(self): + import awkward.array.jagged + return awkward.array.jagged.counts2offsets(self._counts) + + @property + def type(self): + return awkward.type.ArrayType(*(self.shape + (self._dtype,))) + + @property + def shape(self): + return (len(self),) + self._chunkshape[1:] + + def _valid(self): + pass + + def __setitem__(self, where, what): + raise TypeError("array has no Table, cannot assign columns") + + def __delitem__(self, where): + raise TypeError("array has no Table, cannot remove columns") + + def append(self, value): + if len(self._chunks) == 0 or self._counts[-1] == len(self._chunks[-1]): + self._types.append(None) + self._counts.append(0) + self._chunks.append(awkward.util.numpy.empty(self._chunkshape, dtype=self._dtype)) + + self._chunks[-1][self._counts[-1]] = value + self._counts[-1] += 1 + + def extend(self, values): + while len(values) > 0: + if len(self._chunks) == 0 or self._counts[-1] == len(self._chunks[-1]): + self._types.append(None) + self._counts.append(0) + self._chunks.append(awkward.util.numpy.empty(self._chunkshape, dtype=self._dtype)) + + howmany = min(len(values), len(self._chunks[-1]) - self._counts[-1]) + self._chunks[-1][self._counts[-1] : self._counts[-1] + howmany] = values[:howmany] + self._counts[-1] += howmany + values = values[howmany:] diff --git a/awkward/array/indexed.py b/awkward/array/indexed.py index 3df2296a..eb5387a0 100644 --- a/awkward/array/indexed.py +++ b/awkward/array/indexed.py @@ -29,6 +29,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import awkward.array.base +import awkward.type import awkward.util def invert(permutation): @@ -40,12 +41,10 @@ def invert(permutation): raise ValueError("cannot invert index; it contains duplicates") return out -class IndexedArray(awkward.array.base.AwkwardArray): +class IndexedArray(awkward.array.base.AwkwardArrayWithContent): def __init__(self, index, content): self.index = index self.content = content - self._inverse = None - self._isvalid = False def copy(self, index=None, content=None): out = self.__class__.__new__(self.__class__) @@ -67,31 +66,22 @@ def deepcopy(self, index=None, content=None): return out def empty_like(self, **overrides): - mine = {} - mine["index"] = overrides.pop("index", self._index) - mine["content"] = overrides.pop("content", self._content) if isinstance(self._content, awkward.util.numpy.ndarray): - return self.copy(content=awkward.util.numpy.empty_like(self._content), **mine) + return self.copy(content=awkward.util.numpy.empty_like(self._content)) else: - return self.copy(content=self._content.empty_like(**overrides), **mine) + return self.copy(content=self._content.empty_like(**overrides)) def zeros_like(self, **overrides): - mine = {} - mine["index"] = overrides.pop("index", self._index) - mine["content"] = overrides.pop("content", self._content) if isinstance(self._content, awkward.util.numpy.ndarray): - return self.copy(content=awkward.util.numpy.zeros_like(self._content), **mine) + return self.copy(content=awkward.util.numpy.zeros_like(self._content)) else: - return self.copy(content=self._content.zeros_like(**overrides), **mine) + return self.copy(content=self._content.zeros_like(**overrides)) def ones_like(self, **overrides): - mine = {} - mine["index"] = overrides.pop("index", self._index) - mine["content"] = overrides.pop("content", self._content) if isinstance(self._content, awkward.util.numpy.ndarray): - return self.copy(content=awkward.util.numpy.ones_like(self._content), **mine) + return self.copy(content=awkward.util.numpy.ones_like(self._content)) else: - return self.copy(content=self._content.ones_like(**overrides), **mine) + return self.copy(content=self._content.ones_like(**overrides)) @property def index(self): @@ -99,7 +89,7 @@ def index(self): @index.setter def index(self, value): - value = awkward.util.toarray(value, awkward.util.INDEXTYPE) + value = awkward.util.toarray(value, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) if not issubclass(value.dtype.type, awkward.util.numpy.integer): raise TypeError("index must have integer dtype") if (value < 0).any(): @@ -130,11 +120,7 @@ def __len__(self): @property def type(self): - return self._content.type - - @property - def base(self): - return self._content.base + return awkward.type.ArrayType(*(self._index.shape + (awkward.type.fromarray(self._content).to,))) def _valid(self): if not self._isvalid: @@ -142,12 +128,6 @@ def _valid(self): raise ValueError("maximum index ({0}) is beyond the length of the content ({1})".format(self._index.reshape(-1).max(), len(self._content))) self._isvalid = True - def _argfields(self, function): - if (isinstance(function, types.FunctionType) and function.__code__.co_argcount == 1) or isinstance(self._content, awkward.util.numpy.ndarray): - return awkward.util._argfields(function) - else: - return self._content._argfields(function) - def __iter__(self): self._valid() for i in self._index: @@ -157,9 +137,7 @@ def __getitem__(self, where): self._valid() if awkward.util.isstringslice(where): - out = self.copy(self._index, self._content[where]) - out._isvalid = True - return out + return self.copy(content=self._content[where]) if isinstance(where, tuple) and len(where) == 0: return self @@ -167,21 +145,20 @@ def __getitem__(self, where): where = (where,) head, tail = where[:len(self._index.shape)], where[len(self._index.shape):] - where = self._index[where] - if len(where.shape) != 0 and len(where) == 0: + head = self._index[head] + if len(head.shape) != 0 and len(head) == 0: return awkward.util.numpy.empty(0, dtype=self._content.dtype)[tail] else: - return self._content[(where,) + tail] + return self._content[(head,) + tail] def _invert(self, what): - if what.shape != self._index.shape: - raise ValueError("array to assign does not have the same shape as index") if self._inverse is None: self._inverse = invert(self._index) return IndexedArray(self._inverse, what) def __setitem__(self, where, what): - self._valid() + if what.shape[:len(self._index.shape)] != self._index.shape: + raise ValueError("array to assign does not have the same starting shape as index") if isinstance(where, awkward.util.string): self._content[where] = self._invert(what) @@ -196,14 +173,13 @@ def __setitem__(self, where, what): raise TypeError("invalid index for assigning column to Table: {0}".format(where)) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - self._valid() - if method != "__call__": return NotImplemented inputs = list(inputs) for i in range(len(inputs)): if isinstance(inputs[i], IndexedArray): + inputs[i]._valid() inputs[i] = inputs[i][:] return getattr(ufunc, method)(*inputs, **kwargs) @@ -212,18 +188,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): def concat(cls, first, *rest): raise NotImplementedError - @property - def columns(self): - if isinstance(self._content, awkward.util.numpy.ndarray): - raise TypeError("array has no Table, and hence no columns") - return self._content.columns - - @property - def allcolumns(self): - if isinstance(self._content, awkward.util.numpy.ndarray): - raise TypeError("array has no Table, and hence no columns") - return self._content.allcolumns - def pandas(self): import pandas @@ -298,7 +262,7 @@ def content(self, value): @property def type(self): - return awkward.type.ArrayType(self._index.shape, self._dtype) + return awkward.type.ArrayType(*(self._index.shape + (self._dtype,))) @property def dtype(self): @@ -312,6 +276,7 @@ def _valid(self): if not self._isvalid: if len(self._index) != 0 and self._index.reshape(-1).max() > len(self._content): raise ValueError("maximum index ({0}) is beyond the length of the content ({1})".format(self._index.reshape(-1).max(), len(self._content))) + self._isvalid = True def __iter__(self): self._valid() @@ -337,7 +302,11 @@ def __getitem__(self, where): starts = self._index[where] if len(starts.shape) == 0: - return self._content[starts : starts + self._dtype.itemsize].view(self._dtype)[0] + if self._dtype.subdtype is None: + return self._content[starts : starts + self._dtype.itemsize].view(self._dtype)[0] + else: + dt, sh = self._dtype.subdtype + return self._content[starts : starts + self._dtype.itemsize].view(dt).reshape(sh) else: if len(starts) == 0: @@ -346,15 +315,24 @@ def __getitem__(self, where): else: index = awkward.util.numpy.repeat(starts, self._dtype.itemsize) index += awkward.util.numpy.tile(awkward.util.numpy.arange(self._dtype.itemsize), len(starts)) - return self._content[index].view(self._dtype) + if self._dtype.subdtype is None: + return self._content[index].view(self._dtype) + else: + dt, sh = self._dtype.subdtype + return self._content[index].view(dt).reshape((-1,) + sh) def __setitem__(self, where, what): - self._valid() if awkward.util.isstringslice(where): raise IndexError("only integers, slices (`:`), and integer or boolean arrays are valid indices") else: raise TypeError("invalid index for assigning column to Table: {0}".format(where)) + def __delitem__(self, where): + if awkward.util.isstringslice(where): + raise IndexError("only integers, slices (`:`), and integer or boolean arrays are valid indices") + else: + raise TypeError("invalid index for removing column from Table: {0}".format(where)) + def any(self): return self._content[self._index].any() @@ -365,35 +343,96 @@ def all(self): def concat(cls, first, *rest): raise NotImplementedError - @property - def columns(self): - raise NotImplementedError - - @property - def allcolumns(self): - raise NotImplementedError - def pandas(self): raise NotImplementedError -class IndexedMaskedArray(IndexedArray): - def __init__(self, index, content, maskedwhen=-1): - raise NotImplementedError +class SparseArray(awkward.array.base.AwkwardArrayWithContent): + def __init__(self, length, index, content, default=None): + self.length = length + self.index = index + self.content = content + self.default = default - def copy(self, index=None, content=None): - raise NotImplementedError + def copy(self, length=None, index=None, content=None, default=None): + out = self.__class__.__new__(self.__class__) + out._length = self._length + out._index = self._index + out._content = self._content + out._default = self._default + out._inverse = self._inverse + if length is not None: + out.length = length + if index is not None: + out.index = index + if content is not None: + out.content = content + if default is not None: + out.default = default + return out - def deepcopy(self, index=None, content=None): - raise NotImplementedError + def deepcopy(self, length=None, index=None, content=None, default=None): + out = self.copy(length=length, index=index, content=content, default=default) + out._index = awkward.util.deepcopy(out._index) + out._content = awkward.util.deepcopy(out._content) + out._inverse = awkward.util.deepcopy(out._inverse) + return out def empty_like(self, **overrides): - raise NotImplementedError + mine = {} + mine = overrides.pop("length", self._length) + mine = overrides.pop("default", self._default) + if isinstance(self._content, awkward.util.numpy.ndarray): + return self.copy(content=awkward.util.numpy.empty_like(self._content), **mine) + else: + return self.copy(content=self._content.empty_like(**overrides), **mine) def zeros_like(self, **overrides): - raise NotImplementedError + mine = {} + mine = overrides.pop("length", self._length) + mine = overrides.pop("default", self._default) + if isinstance(self._content, awkward.util.numpy.ndarray): + return self.copy(content=awkward.util.numpy.zeros_like(self._content), **mine) + else: + return self.copy(content=self._content.zeros_like(**overrides), **mine) def ones_like(self, **overrides): - raise NotImplementedError + mine = {} + mine = overrides.pop("length", self._length) + mine = overrides.pop("default", self._default) + if isinstance(self._content, awkward.util.numpy.ndarray): + return self.copy(content=awkward.util.numpy.ones_like(self._content), **mine) + else: + return self.copy(content=self._content.ones_like(**overrides), **mine) + + @property + def length(self): + return self._length + + @length.setter + def length(self, value): + if not isinstance(value, awkward.util.integer): + raise TypeError("length must be an integer") + if value < 0: + raise ValueError("length must be a non-negative integer") + self._length = value + + @property + def index(self): + return self._index + + @index.setter + def index(self, value): + value = awkward.util.toarray(value, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) + if not issubclass(value.dtype.type, awkward.util.numpy.integer): + raise TypeError("index must have integer dtype") + if len(value.shape) != 1: + raise ValueError("index must be one-dimensional") + if (value < 0).any(): + raise ValueError("index must be a non-negative array") + if len(value) > 0 and not (value[1:] >= value[:-1]).all(): + raise ValueError("index must be monatonically increasing") + self._index = value + self._inverse = None @property def content(self): @@ -401,87 +440,254 @@ def content(self): @content.setter def content(self, value): - raise NotImplementedError + self._content = awkward.util.toarray(value, awkward.util.DEFAULTTYPE) @property - def type(self): - raise NotImplementedError + def default(self): + import awkward.array.jagged + + if self._default is None: + if isinstance(self._content, awkward.array.jagged.JaggedArray): + return awkward.array.jagged.JaggedArray([0], [0], self._content.content) + elif self._content.shape[1:] == (): + return self._content.dtype.type(0) + else: + return awkward.util.numpy.zeros(self._content.shape[1:], dtype=self._content.dtype) + + else: + return self._default + + @default.setter + def default(self, value): + self._default = value + @property + def type(self): + return awkward.type.ArrayType(self._length, awkward.type.fromarray(self._content).to) + def __len__(self): - raise NotImplementedError + return self._length @property def shape(self): - raise NotImplementedError + return (self._length,) + self._content.shape[1:] @property def dtype(self): - raise NotImplementedError - - @property - def base(self): - raise NotImplementedError + return self._content.dtype def _valid(self): - raise NotImplementedError - - def _argfields(self, function): - raise NotImplementedError + if len(self._index) > len(self._content): + raise ValueError("length of index ({0}) must not be greater than the length of content ({1})".format(len(self._index), len(self._content))) def __iter__(self): - raise NotImplementedError + self._valid() + + length = self._length + index = self._index + lenindex = len(self._index) + content = self._content + default = self.default + + i = 0 + j = awkward.util.numpy.searchsorted(index, 0, side="left") + while i != length: + if j == lenindex: + yield default + elif index[j] == i: + yield content[j] + while j != lenindex and index[j] == i: + j += 1 + else: + yield default + i += 1 def __getitem__(self, where): - raise NotImplementedError + import awkward.array.union + self._valid() + + if awkward.util.isstringslice(where): + return self.copy(content=self._content[where]) + + if isinstance(where, tuple) and len(where) == 0: + return self + if not isinstance(where, tuple): + where = (where,) + head, tail = where[0], where[1:] + + if isinstance(head, awkward.util.integer): + original_head = head + if head < 0: + head += self._length + if not 0 <= head < self._length: + raise IndexError("index {0} is out of bounds for size {1}".format(original_head, length)) + + match = awkward.util.numpy.searchsorted(self._index, head, side="left") + + if self._index[match] == head: + return self._content[(match,) + tail] + elif tail == (): + return self.default + else: + return self.default[tail] + + elif isinstance(head, slice): + start, stop, step = head.indices(self._length) + + if step == 0: + raise ValueError("slice step cannot be zero") + elif step > 0: + mask = (self._index < stop) + mask &= (self._index >= start) + index = self._index - start + elif step < 0: + mask = (self._index > stop) + mask &= (self._index <= start) + index = start - self._index + + if (step > 0 and stop - start > 0) or (step < 0 and stop - start < 0): + d, m = divmod(abs(start - stop), abs(step)) + length = d + (1 if m != 0 else 0) + else: + length = 0 + + if abs(step) > 1: + index, remainder = awkward.util.numpy.divmod(index, abs(step)) + mask[remainder != 0] = False + + index = index[mask] + content = self._content[mask] + if step < 0: + index = index[::-1] + content = content[::-1] + + return self.copy(length=length, index=index, content=content)[tail] + + elif isinstance(head, SparseArray) and len(head.shape) == 1 and issubclass(head.dtype.type, (awkward.util.numpy.bool, awkward.util.numpy.bool_)): + head._valid() + if self._length != head._length: + raise IndexError("boolean index did not match indexed array along dimension 0; dimension is {0} but corresponding boolean dimension is {1}".format(self._length, head._length)) + + # the new index is a cumsum (starting at zero) of the boolean values + index = awkward.util.numpy.cumsum(head._content) + length = index[-1] + index[1:] = index[:-1] + index[0] = 0 + + # find my sparse elements in the mask's sparse elements + match1 = awkward.util.numpy.searchsorted(head._index, self._index, side="left") + match1[match1 >= len(head._index)] = len(head._index) - 1 + content = self._content[awkward.util.numpy.logical_and(head._index[match1] == self._index, head._content[match1])] + + # find the mask's sparse elements in my sparse elements + match2 = awkward.util.numpy.searchsorted(self._index, head._index, side="left") + index = index[awkward.util.numpy.logical_and(self._index[match2] == head._index, head._content)] + + return self.copy(length=length, index=index, content=content) + + else: + head = awkward.util.toarray(head, awkward.util.INDEXTYPE) + if len(head.shape) == 1 and issubclass(head.dtype.type, (awkward.util.numpy.bool, awkward.util.numpy.bool_)): + if self._length != len(head): + raise IndexError("boolean index did not match indexed array along dimension 0; dimension is {0} but corresponding boolean dimension is {1}".format(self._length, len(head))) + + head = awkward.util.numpy.arange(self._length, dtype=awkward.util.INDEXTYPE)[head] + + if len(head.shape) == 1 and issubclass(head.dtype.type, awkward.util.numpy.integer): + mask = (head < 0) + if mask.any(): + head[mask] += self._length + if (head < 0).any() or (head >= self._length).any(): + raise IndexError("indexes out of bounds for size {0}".format(self._length)) + + match = awkward.util.numpy.searchsorted(self._index, head, side="left") + explicit = (self._index[match] == head) + + tags = awkward.util.numpy.zeros(len(head), dtype=awkward.util.TAGTYPE) + index = awkward.util.numpy.zeros(len(head), dtype=awkward.util.INDEXTYPE) + tags[explicit] = 1 + index[explicit] = awkward.util.numpy.arange(awkward.util.numpy.count_nonzero(explicit)) + + content = self._content[match[explicit]] + default = awkward.util.numpy.array([self.default]) + return awkward.array.union.UnionArray(tags, index, [default, content])[tail] + + else: + raise TypeError("cannot interpret shape {0}, dtype {1} as a fancy index or mask".format(head.shape, head.dtype)) + + def _getinverse(self): + if self._inverse is None: + self._inverse = awkward.util.numpy.searchsorted(self._index, awkward.util.numpy.arange(self._length, dtype=awkward.util.INDEXTYPE), side="left") + if len(self._index) > 0: + self._inverse[self._index[-1] + 1 :] = len(self._index) - 1 + return self._inverse + + @property + def dense(self): + self._valid() + + if isinstance(self._content, awkward.util.numpy.ndarray): + out = awkward.util.numpy.full(self.shape, self.default, dtype=self.dtype) + if len(self._index) != 0: + mask = self.boolmask(maskedwhen=True) + out[mask] = self._content[self._inverse[mask]] + return out + + else: + raise NotImplementedError(type(self._content)) + + def boolmask(self, maskedwhen=True): + self._valid() + + if len(self._index) == 0: + return awkward.util.numpy.empty(0, dtype=awkward.util.numpy.bool_) + + if maskedwhen: + return self._index[self._getinverse()] == awkward.util.numpy.arange(self._length, dtype=awkward.util.INDEXTYPE) + else: + return self._index[self._getinverse()] != awkward.util.numpy.arange(self._length, dtype=awkward.util.INDEXTYPE) + + def _invert(self, what): + if len(what) != self._length: + raise ValueError("cannot assign array of length {0} to sparse table of length {1}".format(len(what), self._length)) + + test = what[self.boolmask(maskedwhen=False)].any() + while not isinstance(test, bool): + test = test.any() + + if test: + raise ValueError("cannot assign an array with non-zero elements in the undefined spots of a sparse table") + + return IndexedArray(self._inverse, what) def __setitem__(self, where, what): - raise NotImplementedError + if isinstance(where, awkward.util.string): + self._content[where] = self._invert(what) + + elif awkward.util.isstringslice(where): + if len(where) != len(what): + raise ValueError("number of keys ({0}) does not match number of provided arrays ({1})".format(len(where), len(what))) + for x, y in zip(where, what): + self._content[x] = self._invert(y) + + else: + raise TypeError("invalid index for assigning column to Table: {0}".format(where)) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - raise NotImplementedError + if method != "__call__": + return NotImplemented - @classmethod - def concat(cls, first, *rest): - raise NotImplementedError + inputs = list(inputs) + for i in range(len(inputs)): + if isinstance(inputs[i], SparseArray): + inputs[i]._valid() + inputs[i] = inputs[i].dense # FIXME: can do better (optimization) - @property - def columns(self): - raise NotImplementedError + return getattr(ufunc, method)(*inputs, **kwargs) - @property - def allcolumns(self): + @classmethod + def concat(cls, first, *rest): raise NotImplementedError def pandas(self): raise NotImplementedError - -# class IndexedMaskedArray(IndexedArray): -# def __init__(self, index, content, maskedwhen=-1): -# super(IndexedMaskedArray, self).__init__(index, content) -# self.maskedwhen = maskedwhen - -# @property -# def maskedwhen(self): -# return self._maskedwhen - -# @maskedwhen.setter -# def maskedwhen(self, value): -# if not isinstance(value, (numbers.Integral, numpy.integer)): -# raise TypeError("maskedwhen must be an integer") -# self._maskedwhen = value - -# def __getitem__(self, where): -# if self._isstring(where): -# return IndexedMaskedArray(self._index, self._content[where], maskedwhen=self._maskedwhen) - -# if not isinstance(where, tuple): -# where = (where,) -# head, tail = where[0], where[1:] - -# if isinstance(head, (numbers.Integral, numpy.integer)): -# if self._index[head] == self._maskedwhen: -# return numpy.ma.masked -# else: -# return self._content[self._singleton((self._index[head],) + tail)] -# else: -# return IndexedMaskedArray(self._index[head], self._content[self._singleton((slice(None),) + tail)], maskedwhen=self._maskedwhen) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index bf56b7ac..8f672043 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -30,7 +30,6 @@ import math import numbers -import types import awkward.array.base import awkward.type @@ -101,7 +100,7 @@ def uniques2offsetsparents(uniques): return offsets, parents -class JaggedArray(awkward.array.base.AwkwardArray): +class JaggedArray(awkward.array.base.AwkwardArrayWithContent): def __init__(self, starts, stops, content): self.starts = starts self.stops = stops @@ -118,7 +117,9 @@ def fromiter(cls, iterable): @classmethod def fromoffsets(cls, offsets, content): - offsets = awkward.util.toarray(offsets, awkward.util.INDEXTYPE) + offsets = awkward.util.toarray(offsets, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) + if not issubclass(offsets.dtype.type, awkward.util.numpy.integer): + raise TypeError("offsets must have integer dtype") if len(offsets.shape) != 1 or (offsets < 0).any(): raise ValueError("offsets must be a one-dimensional, non-negative array") out = cls(offsets[:-1], offsets[1:], content) @@ -127,7 +128,9 @@ def fromoffsets(cls, offsets, content): @classmethod def fromcounts(cls, counts, content): - counts = awkward.util.toarray(counts, awkward.util.INDEXTYPE) + counts = awkward.util.toarray(counts, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) + if not issubclass(counts.dtype.type, awkward.util.numpy.integer): + raise TypeError("counts must have integer dtype") if (counts < 0).any(): raise ValueError("counts must be a non-negative array") offsets = counts2offsets(counts.reshape(-1)) @@ -138,7 +141,9 @@ def fromcounts(cls, counts, content): @classmethod def fromparents(cls, parents, content): - parents = awkward.util.toarray(parents, awkward.util.INDEXTYPE) + parents = awkward.util.toarray(parents, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) + if not issubclass(parents.dtype.type, awkward.util.numpy.integer): + raise TypeError("parents must have integer dtype") if len(parents.shape) != 1 or len(parents) != len(content): raise ValueError("parents array must be one-dimensional with the same length as content") starts, stops = parents2startsstops(parents) @@ -148,7 +153,9 @@ def fromparents(cls, parents, content): @classmethod def fromuniques(cls, uniques, content): - uniques = awkward.util.toarray(uniques, awkward.util.INDEXTYPE) + uniques = awkward.util.toarray(uniques, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) + if not issubclass(uniques.dtype.type, awkward.util.numpy.integer): + raise TypeError("uniques must have integer dtype") if len(uniques.shape) != 1 or len(uniques) != len(content): raise ValueError("uniques array must be one-dimensional with the same length as content") offsets, parents = uniques2offsetsparents(uniques) @@ -189,31 +196,22 @@ def deepcopy(self, starts=None, stops=None, content=None): return out def empty_like(self, **overrides): - mine = {} - mine["starts"] = overrides.pop("starts", self._starts) - mine["stops"] = overrides.pop("stops", self._stops) if isinstance(self._content, awkward.util.numpy.ndarray): - return self.copy(content=awkward.util.numpy.empty_like(self._content), **mine) + return self.copy(content=awkward.util.numpy.empty_like(self._content)) else: - return self.copy(content=self._content.empty_like(**overrides), **mine) + return self.copy(content=self._content.empty_like(**overrides)) def zeros_like(self, **overrides): - mine = {} - mine["starts"] = overrides.pop("starts", self._starts) - mine["stops"] = overrides.pop("stops", self._stops) if isinstance(self._content, awkward.util.numpy.ndarray): - return self.copy(content=awkward.util.numpy.zeros_like(self._content), **mine) + return self.copy(content=awkward.util.numpy.zeros_like(self._content)) else: - return self.copy(content=self._content.zeros_like(**overrides), **mine) + return self.copy(content=self._content.zeros_like(**overrides)) def ones_like(self, **overrides): - mine = {} - mine["starts"] = overrides.pop("starts", self._starts) - mine["stops"] = overrides.pop("stops", self._stops) if isinstance(self._content, awkward.util.numpy.ndarray): - return self.copy(content=awkward.util.numpy.ones_like(self._content), **mine) + return self.copy(content=awkward.util.numpy.ones_like(self._content)) else: - return self.copy(content=self._content.ones_like(**overrides), **mine) + return self.copy(content=self._content.ones_like(**overrides)) @property def starts(self): @@ -221,9 +219,11 @@ def starts(self): @starts.setter def starts(self, value): - value = awkward.util.toarray(value, awkward.util.INDEXTYPE) + value = awkward.util.toarray(value, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) if not issubclass(value.dtype.type, awkward.util.numpy.integer): raise TypeError("starts must have integer dtype") + if len(value.shape) == 0: + raise ValueError("starts must have at least one dimension") if (value < 0).any(): raise ValueError("starts must be a non-negative array") self._starts = value @@ -236,9 +236,11 @@ def stops(self): @stops.setter def stops(self, value): - value = awkward.util.toarray(value, awkward.util.INDEXTYPE) + value = awkward.util.toarray(value, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) if not issubclass(value.dtype.type, awkward.util.numpy.integer): raise TypeError("stops must have integer dtype") + if len(value.shape) == 0: + raise ValueError("stops must have at least one dimension") if (value < 0).any(): raise ValueError("stops must be a non-negative array") self._stops = value @@ -268,7 +270,7 @@ def offsets(self): @offsets.setter def offsets(self, value): - value = awkward.util.toarray(value, awkward.util.INDEXTYPE) + value = awkward.util.toarray(value, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) if not issubclass(value.dtype.type, awkward.util.numpy.integer): raise TypeError("offsets must have integer dtype") if len(value.shape) != 1 or (value < 0).any(): @@ -288,9 +290,11 @@ def counts(self): @counts.setter def counts(self, value): - value = awkward.util.toarray(value, awkward.util.INDEXTYPE) + value = awkward.util.toarray(value, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) if not issubclass(value.dtype.type, awkward.util.numpy.integer): raise TypeError("counts must have integer dtype") + if len(value.shape) == 0: + raise ValueError("counts must have at least one dimension") if (value < 0).any(): raise ValueError("counts must be a non-negative array") offsets = counts2offsets(value.reshape(-1)) @@ -313,11 +317,11 @@ def parents(self): @parents.setter def parents(self, value): - value = awkward.util.toarray(value, awkward.util.INDEXTYPE) + value = awkward.util.toarray(value, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) if not issubclass(value.dtype.type, awkward.util.numpy.integer): raise TypeError("parents must have integer dtype") - if len(value) != len(content): - raise ValueError("parents array must have the same length as content") + if len(value.shape) == 0: + raise ValueError("parents must have at least one dimension") self._starts, self._stops = parents2startsstops(value) self._offsets, self._counts = None, None self._parents = value @@ -344,44 +348,29 @@ def __len__(self): def type(self): return awkward.type.ArrayType(*(self._starts.shape + (awkward.type.ArrayType(awkward.util.numpy.inf, awkward.type.fromarray(self._content).to),))) - @property - def base(self): - return self._content.base - - def _valid(self): + def _valid(self, assign_isvalid=True): if not self._isvalid: self._validstartsstops(self._starts, self._stops) - stops = self._stops[self._starts != self._stops].reshape(-1) + nonempty = (self._starts != self._stops) + + starts = self._starts[nonempty].reshape(-1) + if len(starts) != 0 and starts.reshape(-1).max() >= len(self._content): + raise ValueError("maximum start ({0}) is at or beyond the length of the content ({1})".format(starts.reshape(-1).max(), len(self._content))) + + stops = self._stops[nonempty].reshape(-1) if len(stops) != 0 and stops.reshape(-1).max() > len(self._content): raise ValueError("maximum stop ({0}) is beyond the length of the content ({1})".format(self._stops.reshape(-1).max(), len(self._content))) - self._isvalid = True - - def _argfields(self, function): - if (isinstance(function, types.FunctionType) and function.__code__.co_argcount == 1) or isinstance(self._content, awkward.util.numpy.ndarray): - return awkward.util._argfields(function) - else: - return self._content._argfields(function) + if assign_isvalid: + self._isvalid = True @staticmethod def _validstartsstops(starts, stops): - if len(starts.shape) == 0: - raise TypeError("starts must have at least one dimension") - if starts.shape[0] == 0: - starts = starts.view(awkward.util.INDEXTYPE) - if not issubclass(starts.dtype.type, awkward.util.numpy.integer): - raise TypeError("starts must have integer dtype") - - if len(stops.shape) != len(starts.shape): - raise TypeError("stops must have the same shape as starts") - if stops.shape[0] == 0: - stops = stops.view(awkward.util.INDEXTYPE) - if not issubclass(stops.dtype.type, awkward.util.numpy.integer): - raise TypeError("stops must have integer dtype") - if len(starts) > len(stops): - raise ValueError("starts must not have more elements than stops") + raise ValueError("starts must have the same (or shorter) length than stops") + if starts.shape[1:] != stops.shape[1:]: + raise ValueError("starts and stops must have the same dimensionality (shape[1:])") def __iter__(self): self._valid() @@ -398,12 +387,7 @@ def __getitem__(self, where): self._valid() if awkward.util.isstringslice(where): - out = self.copy(self._starts, self._stops, self._content[where]) - out._offsets = self._offsets - out._counts = self._counts - out._parents = self._parents - out._isvalid = True - return out + return self.copy(self._starts, self._stops, self._content[where]) if isinstance(where, tuple) and len(where) == 0: return self @@ -504,15 +488,6 @@ def __setitem__(self, where, what): else: raise TypeError("invalid index for assigning column to Table: {0}".format(where)) - def __delitem__(self, where): - if isinstance(where, awkward.util.string): - del self._content[where] - elif awkward.util.isstringslice(where): - for x in where: - del self._content[x] - else: - raise TypeError("invalid index for removing column from Table: {0}".format(where)) - def _broadcast(self, data): data = awkward.util.toarray(data, self._content.dtype) good = (self.parents >= 0) @@ -535,26 +510,26 @@ def _tojagged(self, starts=None, stops=None, copy=True): elif stops is None: starts = awkward.util.toarray(starts, awkward.util.INDEXTYPE) if len(self) != len(starts): - raise IndexError("cannot fit JaggedArray of length {0} into starts of length {1}".format(len(self), len(starts))) + raise ValueError("cannot fit JaggedArray of length {0} into starts of length {1}".format(len(self), len(starts))) stops = starts + self.counts if (stops[:-1] > starts[1:]).any(): - raise IndexError("cannot fit contents of JaggedArray into the given starts array") + raise ValueError("cannot fit contents of JaggedArray into the given starts array") elif starts is None: stops = awkward.util.toarray(stops, awkward.util.INDEXTYPE) if len(self) != len(stops): - raise IndexError("cannot fit JaggedArray of length {0} into stops of length {1}".format(len(self), len(stops))) + raise ValueError("cannot fit JaggedArray of length {0} into stops of length {1}".format(len(self), len(stops))) starts = stops - self.counts if (stops[:-1] > starts[1:]).any(): - raise IndexError("cannot fit contents of JaggedArray into the given stops array") + raise ValueError("cannot fit contents of JaggedArray into the given stops array") else: if not awkward.util.numpy.array_equal(stops - starts, self.counts): - raise IndexError("cannot fit contents of JaggedArray into the given starts and stops arrays") + raise ValueError("cannot fit contents of JaggedArray into the given starts and stops arrays") self._validstartsstops(starts, stops) @@ -589,8 +564,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): import awkward.array.objects import awkward.array.table - self._valid() - if method != "__call__": return NotImplemented @@ -598,6 +571,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): starts, stops = None, None for i in range(len(inputs)): if isinstance(inputs[i], JaggedArray): + inputs[i]._valid() + if starts is stops is None: inputs[i] = inputs[i]._tojagged(copy=False) starts, stops = inputs[i]._starts, inputs[i]._stops @@ -747,7 +722,7 @@ def argcross(self, other): self._valid() if not isinstance(other, JaggedArray): - raise ValueError("both arrays must be JaggedArrays") + raise TypeError("both arrays must be JaggedArrays") if len(self) != len(other): raise ValueError("both JaggedArrays must have the same length") @@ -1025,7 +1000,7 @@ def concat(cls, first, *rest): # all elements of first followed by all elemen x._valid() if not all(isinstance(x, JaggedArray) for x in arrays): - raise ValueError("cannot hconcat JaggedArrays with non-JaggedArrays") + raise TypeError("cannot concat JaggedArrays with non-JaggedArrays") starts = awkward.util.numpy.concatenate([x._starts for x in arrays]) stops = awkward.util.numpy.concatenate([x._stops for x in arrays]) @@ -1071,18 +1046,6 @@ def zip(cls, columns1={}, *columns2, **columns3): newtable = awkward.array.table.Table(awkward.util.OrderedDict(zip(table._content, [x._content for x in inputs]))) return cls(first._starts, first._stops, newtable) - @property - def columns(self): - if isinstance(self._content, awkward.util.numpy.ndarray): - raise TypeError("array has no Table, and hence no columns") - return self._content.columns - - @property - def allcolumns(self): - if isinstance(self._content, awkward.util.numpy.ndarray): - raise TypeError("array has no Table, and hence no columns") - return self._content.allcolumns - def pandas(self): import pandas @@ -1161,7 +1124,10 @@ def content(self): @content.setter def content(self, value): - self._content = awkward.util.toarray(value, awkward.util.CHARTYPE, awkward.util.numpy.ndarray).view(awkward.util.CHARTYPE).reshape(-1) + value = awkward.util.toarray(value, awkward.util.CHARTYPE, awkward.util.numpy.ndarray) + if len(value.shape) != 1: + raise ValueError("content of ByteJaggedArray must have 1-dimensional shape") + self._content = value.view(awkward.util.CHARTYPE) self._isvalid = False @property @@ -1202,9 +1168,10 @@ def _divitemsize(self, x): def _valid(self): if not self._isvalid: - super(ByteJaggedArray, self)._valid() + super(ByteJaggedArray, self)._valid(assign_isvalid=False) - if (self._divitemsize(self.counts) * self._subdtype.itemsize != self.counts).any(): + counts = self._stops - self._starts + if (self._divitemsize(counts) * self._subdtype.itemsize != counts).any(): raise ValueError("not all counts are a multiple of {0}".format(self._subdtype.itemsize)) self._isvalid = True @@ -1262,31 +1229,31 @@ def _tojagged(self, starts=None, stops=None, copy=True): elif stops is None: starts = awkward.util.toarray(starts, awkward.util.INDEXTYPE) if self._starts.shape != starts.shape: - raise IndexError("cannot fit ByteJaggedArray with shape {0} into starts with shape {1}".format(self._starts.shape, starts.shape)) + raise ValueError("cannot fit ByteJaggedArray with shape {0} into starts with shape {1}".format(self._starts.shape, starts.shape)) bytestarts = starts * self._subdtype.itemsize bytestops = bytestarts + self.counts stops = self._divitemsize(bytestops) if (stops[:-1] > starts[1:]).any(): - raise IndexError("cannot fit contents of ByteJaggedArray into the given starts array") + raise ValueError("cannot fit contents of ByteJaggedArray into the given starts array") elif starts is None: stops = awkward.util.toarray(stops, awkward.util.INDEXTYPE) if self._stops.shape != stops.shape: - raise IndexError("cannot fit ByteJaggedArray with shape {0} into stops with shape {1}".format(self._stops.shape, stops.shape)) + raise ValueError("cannot fit ByteJaggedArray with shape {0} into stops with shape {1}".format(self._stops.shape, stops.shape)) bytestops = stops * self._subdtype.itemsize bytestarts = bytestops - self.counts starts = self._divitemsize(bytestarts) if (stops[:-1] > starts[1:]).any(): - raise IndexError("cannot fit contents of ByteJaggedArray into the given stops array") + raise ValueError("cannot fit contents of ByteJaggedArray into the given stops array") else: bytestarts, bytestops = starts * self._subdtype.itemsize, stops * self._subdtype.itemsize if not awkward.util.numpy.array_equal(bytestops - bytestarts, self.counts): - raise IndexError("cannot fit contents of ByteJaggedArray into the given starts and stops arrays") + raise ValueError("cannot fit contents of ByteJaggedArray into the given starts and stops arrays") JaggedArray._validstartsstops(bytestarts, bytestops) if (bytestops - bytestarts != (stops - starts) * self._subdtype.itemsize).any(): diff --git a/awkward/array/masked.py b/awkward/array/masked.py index 17757d18..23050432 100644 --- a/awkward/array/masked.py +++ b/awkward/array/masked.py @@ -32,25 +32,97 @@ import numbers import awkward.array.base +import awkward.type +import awkward.util + +class MaskedArray(awkward.array.base.AwkwardArrayWithContent): + ### WTF were the designers of numpy.ma thinking? + # @staticmethod + # def is_masked(x): + # return awkward.util.numpy.ma.is_masked(x) + # masked = awkward.util.numpy.ma.masked + + @staticmethod + def is_masked(x): + if isinstance(x, MaskedArray): + # numpy.ma.is_masked(array) if any element is masked + if x.maskedwhen: + return x.mask.any() + else: + return not x.mask.all() + else: + # numpy.ma.is_masked(x) if x represents a masked constant + return x is MaskedArray.masked + masked = None -class MaskedArray(awkward.array.base.AwkwardArray): def __init__(self, mask, content, maskedwhen=True): - raise NotImplementedError - - def copy(self, index=None, content=None): - raise NotImplementedError - - def deepcopy(self, index=None, content=None): - raise NotImplementedError + self.mask = mask + self.content = content + self.maskedwhen = maskedwhen + + def copy(self, mask=None, content=None, maskedwhen=None): + out = self.__class__.__new__(self.__class__) + out._mask = self._mask + out._content = self._content + out._maskedwhen = self._maskedwhen + if mask is not None: + out._mask = mask + if content is not None: + out._content = content + if maskedwhen is not None: + out._maskedwhen = maskedwhen + return out + + def deepcopy(self, mask=None, content=None): + out = self.copy(mask=mask, content=content) + out._mask = awkward.util.deepcopy(out._mask) + out._content = awkward.util.deepcopy(out._content) + return out + + def _mine(self, overrides): + mine = {} + mine["maskedwhen"] = overrides.pop("maskedwhen", self._maskedwhen) + return mine def empty_like(self, **overrides): - raise NotImplementedError + mine = self._mine(overrides) + if isinstance(self._content, awkward.util.numpy.ndarray): + return self.copy(content=awkward.util.numpy.empty_like(self._content), **mine) + else: + return self.copy(content=self._content.empty_like(**overrides), **mine) def zeros_like(self, **overrides): - raise NotImplementedError + mine = self._mine(overrides) + if isinstance(self._content, awkward.util.numpy.ndarray): + return self.copy(content=awkward.util.numpy.zeros_like(self._content), **mine) + else: + return self.copy(content=self._content.zeros_like(**overrides), **mine) def ones_like(self, **overrides): - raise NotImplementedError + mine = self._mine(overrides) + if isinstance(self._content, awkward.util.numpy.ndarray): + return self.copy(content=awkward.util.numpy.ones_like(self._content), **mine) + else: + return self.copy(content=self._content.ones_like(**overrides), **mine) + + @property + def mask(self): + return self._mask + + @mask.setter + def mask(self, value): + value = awkward.util.toarray(value, awkward.util.MASKTYPE, awkward.util.numpy.ndarray) + if len(value.shape) != 1: + raise ValueError("mask must have 1-dimensional shape") + if not issubclass(value.dtype.type, (awkward.util.numpy.bool_, awkward.util.numpy.bool)): + value = (value != 0) + self._mask = value + + def boolmask(self, maskedwhen=True): + if maskedwhen == self._maskedwhen: + return self._mask + else: + return awkward.util.numpy.logical_not(self._mask) @property def content(self): @@ -58,158 +130,392 @@ def content(self): @content.setter def content(self, value): - raise NotImplementedError + self._content = awkward.util.toarray(value, awkward.util.DEFAULTTYPE) @property - def type(self): - raise NotImplementedError + def maskedwhen(self): + return self._maskedwhen - def __len__(self): - raise NotImplementedError + @maskedwhen.setter + def maskedwhen(self, value): + self._maskedwhen = bool(value) @property - def shape(self): - raise NotImplementedError + def dtype(self): + return self._content.dtype + + def __len__(self): + return len(self._mask) @property - def dtype(self): - raise NotImplementedError + def shape(self): + return (len(self._mask),) + self._content.shape[1:] @property - def base(self): - raise NotImplementedError + def type(self): + return awkward.type.ArrayType(len(self._mask), awkward.type.fromarray(self._content).to) def _valid(self): - raise NotImplementedError - - def _argfields(self, function): - raise NotImplementedError + if len(self._mask) > len(self._content): + raise ValueError("mask length ({0}) must be the same as (or shorter than) the content length ({1})".format(len(self._mask), len(self._content))) def __iter__(self): - raise NotImplementedError + self._valid() + + mask = self._mask + lenmask = len(mask) + content = self._content + maskedwhen = self._maskedwhen + masked = self.masked + + i = 0 + while i < lenmask: + if mask[i] == maskedwhen: + yield masked + else: + yield content[i] + i += 1 def __getitem__(self, where): - raise NotImplementedError - - def __setitem__(self, where, what): - raise NotImplementedError - + self._valid() + + if awkward.util.isstringslice(where): + return self.copy(content=self._content[where]) + + if isinstance(where, tuple) and len(where) == 0: + return self + if not isinstance(where, tuple): + where = (where,) + head, tail = where[0], where[1:] + + if isinstance(head, awkward.util.integer): + if self._mask[head] == self._maskedwhen: + if tail != (): + raise ValueError("masked element ({0}) is not subscriptable".format(self.masked)) + return self.masked + else: + return self._content[:len(self._mask)][(head,) + tail] + + else: + mask = self._mask[head] + if tail != () and ((self._maskedwhen and mask.any()) or (not self._maskedwhen and not mask.all())): + raise ValueError("masked element ({0}) is not subscriptable".format(self.masked)) + else: + return self.copy(mask=mask, content=self._content[:len(self._mask)][(head,) + tail]) + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - raise NotImplementedError + import awkward.array.objects + + if method != "__call__": + return NotImplemented + + tokeep = None + for x in inputs: + if isinstance(x, MaskedArray): + x._valid() + if tokeep is None: + tokeep = x.boolmask(maskedwhen=False) + else: + tokeep = tokeep & x.boolmask(maskedwhen=False) + + assert tokeep is not None + + inputs = list(inputs) + for i in range(len(inputs)): + if isinstance(inputs[i], IndexedMaskedArray): + inputs[i] = inputs[i]._content[inputs[i]._mask[tokeep]] + elif isinstance(inputs[i], MaskedArray): + inputs[i] = inputs[i]._content[tokeep] + elif isinstance(inputs[i], (awkward.util.numpy.ndarray, awkward.array.base.AwkwardArray)): + inputs[i] = inputs[i][tokeep] + else: + try: + for first in inputs[i]: + break + except TypeError: + pass + else: + inputs[i] = awkward.util.numpy.array(inputs[i], copy=False)[tokeep] + + # compute only the non-masked elements + result = getattr(ufunc, method)(*inputs, **kwargs) + + # put the masked out values back + index = awkward.util.numpy.full(len(tokeep), -1, dtype=awkward.util.INDEXTYPE) + index[tokeep] = awkward.util.numpy.arange(awkward.util.numpy.count_nonzero(tokeep)) + + if isinstance(result, tuple): + return tuple(awkward.array.objects.Methods.maybemixin(type(x), IndexedMaskedArray)(index, x, maskedwhen=-1) if isinstance(x, (awkward.util.numpy.ndarray, awkward.array.base.AwkwardBase)) else x for x in result) + elif method == "at": + return None + else: + return awkward.array.objects.Methods.maybemixin(type(result), IndexedMaskedArray)(index, result, maskedwhen=-1) + + def indexed(self): + maskindex = awkward.util.numpy.arange(len(self), dtype=awkward.util.INDEXTYPE) + maskindex[self.boolmask(maskedwhen=True)] = -1 + return IndexedMaskedArray(maskindex, self._content, maskedwhen=-1) def any(self): - raise NotImplementedError + return self._content[self.boolmask(maskedwhen=False)].any() def all(self): - raise NotImplementedError + return self._content[self.boolmask(maskedwhen=False)].all() @classmethod def concat(cls, first, *rest): raise NotImplementedError - @property - def columns(self): - raise NotImplementedError - - @property - def allcolumns(self): - raise NotImplementedError - def pandas(self): raise NotImplementedError -# class MaskedArray(awkward.array.base.AwkwardArray): -# def __init__(self, mask, content, maskedwhen=True): -# self.mask = mask -# self.content = content -# self.maskedwhen = maskedwhen - -# @property -# def mask(self): -# return self._mask - -# @mask.setter -# def mask(self, value): -# value = self._toarray(value, self.MASKTYPE, (numpy.ndarray, awkward.array.base.AwkwardArray)) - -# if len(value.shape) != 1: -# raise TypeError("mask must have 1-dimensional shape") -# if value.shape[0] == 0: -# value = value.view(self.MASKTYPE) -# if not issubclass(value.dtype.type, (numpy.bool_, numpy.bool)): -# raise TypeError("mask must have boolean dtype") +class BitMaskedArray(MaskedArray): + def __init__(self, mask, content, maskedwhen=True, lsborder=False): + super(BitMaskedArray, self).__init__(mask, content, maskedwhen=maskedwhen) + self.lsborder = lsborder -# self._mask = value + @classmethod + def fromboolmask(cls, mask, content, maskedwhen=True, lsborder=False): + return BitMaskedArray(BitMaskedArray.bool2bit(mask, lsborder=lsborder), content, maskedwhen=maskedwhen, lsborder=lsborder) + + def copy(self, mask=None, content=None, maskedwhen=None, lsborder=None): + out = super(BitMaskedArray, self).copy(mask=mask, content=content, maskedwhen=maskedwhen) + out._lsborder = self._lsborder + if lsborder is not None: + out._lsborder = lsborder + return out + + def _mine(self, overrides): + mine = {} + mine["maskedwhen"] = overrides.pop("maskedwhen", self._maskedwhen) + mine["lsborder"] = overrides.pop("lsborder", self._lsborder) + return mine -# @property -# def boolmask(self): -# return self._mask + @property + def mask(self): + return self._mask -# @boolmask.setter -# def boolmask(self, value): -# self.mask = value + @mask.setter + def mask(self, value): + value = awkward.util.toarray(value, awkward.util.BITMASKTYPE, awkward.util.numpy.ndarray) + if len(value.shape) != 1: + raise ValueError("mask must have 1-dimensional shape") + self._mask = value.view(awkward.util.BITMASKTYPE) -# @property -# def content(self): -# return self._content + def __len__(self): + return len(self._content) -# @content.setter -# def content(self, value): -# self._content = self._toarray(value, self.CHARTYPE, (numpy.ndarray, awkward.array.base.AwkwardArray)) + @property + def shape(self): + return self._content.shape + + @staticmethod + def _ceildiv8(x): + return -(-x >> 3) # this is int(math.ceil(x / 8)) + + @staticmethod + def bit2bool(bitmask, lsborder=False): + out = awkward.util.numpy.unpackbits(bitmask) + if lsborder: + out = out.reshape(-1, 8)[:,::-1].reshape(-1) + return out.view(awkward.util.MASKTYPE) -# @property -# def maskedwhen(self): -# return self._maskedwhen + @staticmethod + def bool2bit(boolmask, lsborder=False): + boolmask = awkward.util.toarray(boolmask, awkward.util.MASKTYPE, awkward.util.numpy.ndarray) + if len(boolmask.shape) != 1: + raise ValueError("boolmask must have 1-dimensional shape") + if not issubclass(boolmask.dtype.type, (awkward.util.numpy.bool_, awkward.util.numpy.bool)): + boolmask = (boolmask != 0) + + if lsborder: + # maybe pad the length for reshape + length = BitMaskedArray._ceildiv8(len(boolmask)) * 8 + if length != len(boolmask): + out = awkward.util.numpy.empty(length, dtype=boolmask.dtype) + out[:len(boolmask)] = boolmask + else: + out = boolmask + + # reverse the order in groups of 8 + out = out.reshape(-1, 8)[:,::-1].reshape(-1) + + else: + # numpy.packbits encodes as msb (most significant bit); already in the right order + out = boolmask + + return awkward.util.numpy.packbits(out) + + def boolmask(self, maskedwhen=True): + if maskedwhen == self._maskedwhen: + bitmask = self._mask + else: + bitmask = awkward.util.numpy.bitwise_not(self._mask) + return self.bit2bool(bitmask, lsborder=self._lsborder)[:len(self._content)] -# @maskedwhen.setter -# def maskedwhen(self, value): -# self._maskedwhen = bool(value) - -# @property -# def dtype(self): -# return self._content.dtype + @property + def lsborder(self): + return self._lsborder -# @property -# def shape(self): -# return self._content.shape + @lsborder.setter + def lsborder(self, value): + self._lsborder = bool(value) -# def __len__(self): -# return len(self._content) + def _valid(self): + if len(self._mask) != self._ceildiv8(len(self._content)): + raise ValueError("mask length ({0}) must be equal to ceil(content length / 8) ({1})".format(len(self._mask), self._ceildiv8(len(self._content)))) -# def __getitem__(self, where): -# if self._isstring(where): -# return MaskedArray(self._mask, self._content[where], maskedwhen=self._maskedwhen) + def __iter__(self): + self._valid() + + one = awkward.util.numpy.uint8(1) + zero = awkward.util.numpy.uint8(0) + mask = self._mask + content = self._content + lencontent = len(content) + maskedwhen = self._maskedwhen + masked = self.masked + + if self._lsborder: + byte = i = 0 + bit = start = awkward.util.numpy.uint8(1) + while i < lencontent: + if ((mask[byte] & bit) != 0) == self._maskedwhen: + yield masked + else: + yield content[i] + bit <<= one + if bit == zero: + bit = start + byte += 1 + i += 1 + + else: + byte = i = 0 + bit = start = awkward.util.numpy.uint8(128) + while i < lencontent: + if ((mask[byte] & bit) != 0) == self._maskedwhen: + yield masked + else: + yield content[i] + bit >>= one + if bit == zero: + bit = start + byte += 1 + i += 1 + + def _maskat(self, where): + bytepos = awkward.util.numpy.right_shift(where, 3) # where // 8 + bitpos = where - 8*bytepos # where % 8 + + if self._lsborder: + bitmask = awkward.util.numpy.left_shift(1, bitpos) + else: + bitmask = awkward.util.numpy.right_shift(128, bitpos) + + if isinstance(bitmask, awkward.util.numpy.ndarray): + bitmask = bitmask.astype(awkward.util.BITMASKTYPE) + else: + bitmask = awkward.util.BITMASKTYPE.type(bitmask) + + return bytepos, bitmask + + def _maskwhere(self, where): + if isinstance(where, awkward.util.integer): + bytepos, bitmask = self._maskat(where) + return awkward.util.numpy.bitwise_and(self._mask[bytepos], bitmask) != 0 + + elif isinstance(where, slice): + # assumes a small slice; for a big slice, it could be faster to unpack the whole mask + return self._maskwhere(awkward.util.numpy.arange(*where.indices(len(self._content)))) + + else: + where = awkward.util.numpy.array(where, copy=False) + if len(where.shape) == 1 and issubclass(where.dtype.type, awkward.util.numpy.integer): + byteposes, bitmasks = self._maskat(where) + awkward.util.numpy.bitwise_and(bitmasks, self._mask[byteposes], bitmasks) + return bitmasks.astype(awkward.util.numpy.bool_) + + elif len(where.shape) == 1 and issubclass(where.dtype.type, (awkward.util.numpy.bool, awkward.util.numpy.bool_)): + # scales with the size of the mask anyway, so go ahead and unpack the whole mask + unpacked = awkward.util.numpy.unpackbits(self._mask).view(awkward.util.MASKTYPE) -# if not isinstance(where, tuple): -# where = (where,) -# head, tail = where[0], where[1:] + if self._lsborder: + unpacked = unpacked.reshape(-1, 8)[:,::-1].reshape(-1)[:len(where)] + else: + unpacked = unpacked[:len(where)] -# if isinstance(head, (numbers.Integral, numpy.integer)): -# if self._mask[head] == self._maskedwhen: -# return numpy.ma.masked -# else: -# return self._content[self._singleton(where)] -# else: -# return MaskedArray(self._mask[head], self._content[self._singleton(where)], maskedwhen=self._maskedwhen) + return unpacked[where] -class BitMaskedArray(MaskedArray): - def __init__(self, mask, content, maskedwhen=True, lsb=True): - raise NotImplementedError + else: + raise TypeError("cannot interpret shape {0}, dtype {1} as a fancy index or mask".format(where.shape, where.dtype)) - def copy(self, index=None, content=None): - raise NotImplementedError + def __getitem__(self, where): + self._valid() + + if awkward.util.isstringslice(where): + return self.copy(content=self._content[where]) + + if isinstance(where, tuple) and len(where) == 0: + return self + if not isinstance(where, tuple): + where = (where,) + head, tail = where[0], where[1:] + + if isinstance(head, awkward.util.integer): + if self._maskwhere(head) == self._maskedwhen: + if tail != (): + raise ValueError("masked element ({0}) is not subscriptable".format(self.masked)) + return self.masked + else: + return self._content[(head,) + tail] + + else: + mask = self._maskwhere(head) + if tail != () and ((self._maskedwhen and mask.any()) or (not self._maskedwhen and not mask.all())): + raise ValueError("masked element ({0}) is not subscriptable".format(self.masked)) + else: + return self.copy(mask=self.bool2bit(mask, lsborder=self._lsborder), content=self._content[(head,) + tail], lsborder=self._lsborder) - def deepcopy(self, index=None, content=None): + @classmethod + def concat(cls, first, *rest): raise NotImplementedError - def empty_like(self, **overrides): + def pandas(self): raise NotImplementedError - def zeros_like(self, **overrides): - raise NotImplementedError +class IndexedMaskedArray(MaskedArray): + def __init__(self, mask, content, maskedwhen=-1): + super(IndexedMaskedArray, self).__init__(mask, content, maskedwhen=maskedwhen) + self._isvalid = False + + def copy(self, mask=None, content=None, maskedwhen=None): + out = self.__class__.__new__(self.__class__) + out._mask = self._mask + out._content = self._content + out._maskedwhen = self._maskedwhen + out._isvalid = self._isvalid + if mask is not None: + out._mask = mask + if content is not None: + out._content = content + if maskedwhen is not None: + out._maskedwhen = maskedwhen + return out - def ones_like(self, **overrides): - raise NotImplementedError + @property + def mask(self): + return self._mask + + @mask.setter + def mask(self, value): + value = awkward.util.toarray(value, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) + if not issubclass(value.dtype.type, awkward.util.numpy.integer): + raise TypeError("starts must have integer dtype") + if len(value.shape) != 1: + raise ValueError("mask must have 1-dimensional shape") + self._mask = value + self._isvalid = False @property def content(self): @@ -217,225 +523,86 @@ def content(self): @content.setter def content(self, value): - raise NotImplementedError + self._content = awkward.util.toarray(value, awkward.util.DEFAULTTYPE) + self._isvalid = False @property - def type(self): - raise NotImplementedError - - def __len__(self): - raise NotImplementedError + def maskedwhen(self): + return self._maskedwhen - @property - def shape(self): - raise NotImplementedError + @maskedwhen.setter + def maskedwhen(self, value): + if not isinstance(value, awkward.util.integer): + raise TypeError("maskedwhen must be an integer for IndexedMaskedArray") + self._maskedwhen = value - @property - def dtype(self): - raise NotImplementedError - - @property - def base(self): - raise NotImplementedError + def boolmask(self, maskedwhen=True): + if maskedwhen: + return self._mask == self._maskedwhen + else: + return self._mask != self._maskedwhen def _valid(self): - raise NotImplementedError - - def _argfields(self, function): - raise NotImplementedError + if not self._isvalid: + if len(self._mask) != 0: + if self._mask.max() > len(self._content): + raise ValueError("maximum mask-index ({0}) is beyond the length of the content ({1})".format(self._mask.max(), len(self._content))) + if (self._mask[self._mask != self._maskedwhen] < 0).any(): + raise ValueError("mask-index has negative values (other than maskedwhen)") + self._isvalid = True def __iter__(self): - raise NotImplementedError + self._valid() + + mask = self._mask + lenmask = len(mask) + content = self._content + maskedwhen = self._maskedwhen + masked = self.masked + + i = 0 + while i < lenmask: + maskindex = mask[i] + if maskindex == maskedwhen: + yield masked + else: + yield content[maskindex] + i += 1 def __getitem__(self, where): - raise NotImplementedError - - def __setitem__(self, where, what): - raise NotImplementedError - - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - raise NotImplementedError - - def any(self): - raise NotImplementedError - - def all(self): - raise NotImplementedError + self._valid() + + if awkward.util.isstringslice(where): + return self.copy(content=self._content[where]) + + if isinstance(where, tuple) and len(where) == 0: + return self + if not isinstance(where, tuple): + where = (where,) + head, tail = where[0], where[1:] + + if isinstance(head, awkward.util.integer): + maskindex = self._mask[head] + if maskindex == self._maskedwhen: + if tail != (): + raise ValueError("masked element ({0}) is not subscriptable".format(self.masked)) + return self.masked + else: + return self._content[(maskindex,) + tail] + + else: + maskindex = self._mask[head] + if tail != () and (maskindex == self._maskedwhen).any(): + raise ValueError("masked element ({0}) is not subscriptable".format(self.masked)) + else: + return self.copy(mask=maskindex) + + def indexed(self): + return self @classmethod def concat(cls, first, *rest): raise NotImplementedError - @property - def columns(self): - raise NotImplementedError - - @property - def allcolumns(self): - raise NotImplementedError - def pandas(self): raise NotImplementedError - -# class BitMaskedArray(MaskedArray): -# @staticmethod -# def fromboolmask(mask, content, maskedwhen=True, lsb=True): -# out = BitMaskedArray([], content, maskedwhen=maskedwhen, lsb=lsb) -# out.boolmask = mask -# return out - -# def __init__(self, mask, content, maskedwhen=True, lsb=True): -# self.mask = mask -# self.content = content -# self.maskedwhen = maskedwhen -# self.lsb = lsb - -# @property -# def mask(self): -# return self._mask - -# @mask.setter -# def mask(self, value): -# value = self._toarray(value, self.BITMASKTYPE, (numpy.ndarray, awkward.array.base.AwkwardArray)) - -# if len(value.shape) != 1: -# raise TypeError("mask must have 1-dimensional shape") - -# self._mask = value.view(self.BITMASKTYPE) - -# @property -# def boolmask(self): -# out = numpy.unpackbits(self._mask) -# if self._lsb: -# out = out.reshape(-1, 8)[:,::-1].reshape(-1) -# return out.view(self.MASKTYPE)[:len(self._content)] - -# @boolmask.setter -# def boolmask(self, value): -# value = numpy.array(value, copy=False) - -# if len(value.shape) != 1: -# raise TypeError("boolmask must have 1-dimensional shape") -# if not issubclass(value.dtype.type, (numpy.bool, numpy.bool_)): -# raise TypeError("boolmask must have boolean type") - -# if self._lsb: -# # maybe pad the length for reshape -# length = 8*((len(value) + 8 - 1) >> 3) # ceil(len(value) / 8.0) * 8 -# if length != len(value): -# out = numpy.empty(length, dtype=numpy.bool_) -# out[:len(value)] = value -# else: -# out = value - -# # reverse the order in groups of 8 -# out = out.reshape(-1, 8)[:,::-1].reshape(-1) - -# else: -# # numpy.packbits encodes as msb (most significant bit); already in the right order -# out = value - -# self._mask = numpy.packbits(out) - -# @property -# def lsb(self): -# return self._lsb - -# @lsb.setter -# def lsb(self, value): -# self._lsb = bool(value) - -# def _maskat(self, where): -# bytepos = numpy.right_shift(where, 3) # where // 8 -# bitpos = where - 8*bytepos # where % 8 - -# if self.lsb: -# bitmask = numpy.left_shift(1, bitpos) -# else: -# bitmask = numpy.right_shift(128, bitpos) - -# if isinstance(bitmask, numpy.ndarray): -# bitmask = bitmask.astype(self.BITMASKTYPE) -# else: -# bitmask = self.BITMASKTYPE.type(bitmask) - -# return bytepos, bitmask - -# def _maskwhere(self, where): -# if isinstance(where, (numbers.Integral, numpy.integer)): -# bytepos, bitmask = self._maskat(where) -# return numpy.bitwise_and(self._mask[bytepos], bitmask) != 0 - -# elif isinstance(where, slice): -# # assumes a small slice; for a big slice, it could be faster to unpack the whole mask -# return self._maskwhere(numpy.arange(*where.indices(len(self._content)))) - -# else: -# where = numpy.array(where, copy=False) -# if len(where.shape) == 1 and issubclass(where.dtype.type, numpy.integer): -# byteposes, bitmasks = self._maskat(where) -# numpy.bitwise_and(bitmasks, self._mask[byteposes], bitmasks) -# return bitmasks.astype(numpy.bool_) - -# elif len(where.shape) == 1 and issubclass(where.dtype.type, (numpy.bool, numpy.bool_)): -# # scales with the size of the mask anyway, so go ahead and unpack the whole mask -# unpacked = numpy.unpackbits(self._mask).view(self.MASKTYPE) - -# if self.lsb: -# unpacked = unpacked.reshape(-1, 8)[:,::-1].reshape(-1)[:len(where)] -# else: -# unpacked = unpacked[:len(where)] - -# return unpacked[where] - -# else: -# raise TypeError("cannot interpret shape {0}, dtype {1} as a fancy index or mask".format(where.shape, where.dtype)) - -# def _setmask(self, where, valid): -# if isinstance(where, (numbers.Integral, numpy.integer)): -# bytepos, bitmask = self._maskat(where) -# if self._maskedwhen != valid: -# self._mask[bytepos] |= bitmask -# else: -# self._mask[bytepos] &= numpy.bitwise_not(bitmask) - -# elif isinstance(where, slice): -# # assumes a small slice; for a big slice, it could be faster to unpack the whole mask -# self._setmask(numpy.arange(*where.indices(len(self._content))), valid) - -# else: -# where = numpy.array(where, copy=False) -# if len(where.shape) == 1 and issubclass(where.dtype.type, numpy.integer): -# bytepos, bitmask = self._maskat(where) -# if self._maskedwhen != valid: -# numpy.bitwise_or.at(self._mask, bytepos, bitmask) -# else: -# numpy.bitwise_and.at(self._mask, bytepos, numpy.bitwise_not(bitmask)) - -# elif len(where.shape) == 1 and issubclass(where.dtype.type, (numpy.bool, numpy.bool_)): -# tmp = self.boolmask -# if self._maskedwhen != valid: -# tmp[where] = True -# else: -# tmp[where] = False -# self.boolmask = tmp - -# else: -# raise TypeError("cannot interpret shape {0}, dtype {1} as a fancy index or mask".format(where.shape, where.dtype)) - -# def __getitem__(self, where): -# if self._isstring(where): -# return MaskedArray(self._mask, self._content[where], maskedwhen=self._maskedwhen) - -# if not isinstance(where, tuple): -# where = (where,) -# head, tail = where[0], where[1:] - -# if isinstance(head, (numbers.Integral, numpy.integer)): -# if self._maskwhere(head) == self._maskedwhen: -# return numpy.ma.masked -# else: -# return self._content[self._singleton(where)] - -# else: -# return MaskedArray(self._maskwhere(head), self._content[self._singleton(where)], maskedwhen=self._maskedwhen) diff --git a/awkward/array/objects.py b/awkward/array/objects.py index 33a264e6..5d366739 100644 --- a/awkward/array/objects.py +++ b/awkward/array/objects.py @@ -28,9 +28,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import types - import awkward.array.base +import awkward.type import awkward.util class Methods(object): @@ -52,7 +51,7 @@ def maybemixin(sample, awkwardtype): else: return awkwardtype -class ObjectArray(awkward.array.base.AwkwardArray): +class ObjectArray(awkward.array.base.AwkwardArrayWithContent): def __init__(self, content, generator, *args, **kwargs): self.content = content self.generator = generator @@ -161,19 +160,7 @@ def __len__(self): @property def type(self): - out = awkward.type.fromarray(self._content) - out.to = self._generator - return out - - @property - def base(self): - return self._content.base - - def _argfields(self, function): - if (isinstance(function, types.FunctionType) and function.__code__.co_argcount == 1) or isinstance(self._content, awkward.util.numpy.ndarray): - return awkward.util._argfields(function) - else: - return self._content._argfields(function) + return awkward.type.fromarray(*(self._content.shape + (self._generator,))) def __iter__(self): for x in self._content: @@ -202,18 +189,6 @@ def __getitem__(self, where): else: return [x[tail] for x in content] - def __setitem__(self, where, what): - self._content[where] = what - - def __delitem__(self, where): - if isinstance(where, awkward.util.string): - del self._content[where] - elif awkward.util.isstringslice(where): - for x in where: - del self._content[x] - else: - raise TypeError("invalid index for removing column from Table: {0}".format(where)) - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if method != "__call__": return NotImplemented @@ -221,6 +196,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): contents = [] for x in inputs: if isinstance(x, ObjectArray): + x._valid() contents.append(x._content) else: contents.append(x) @@ -242,17 +218,5 @@ def all(self): def concat(cls, first, *rest): raise NotImplementedError - @property - def columns(self): - if isinstance(self._content, awkward.util.numpy.ndarray): - raise TypeError("array has no Table, and hence no columns") - return self._content.columns - - @property - def allcolumns(self): - if isinstance(self._content, awkward.util.numpy.ndarray): - raise TypeError("array has no Table, and hence no columns") - return self._content.allcolumns - def pandas(self): raise NotImplementedError diff --git a/awkward/array/sparse.py b/awkward/array/sparse.py deleted file mode 100644 index 9c871c0f..00000000 --- a/awkward/array/sparse.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python - -# Copyright (c) 2018, DIANA-HEP -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import awkward.array.base - -class SparseArray(awkward.array.base.AwkwardArray): - def __init__(self, index, content, shape, default=0): - raise NotImplementedError - - def copy(self, index=None, content=None): - raise NotImplementedError - - def deepcopy(self, index=None, content=None): - raise NotImplementedError - - def empty_like(self, **overrides): - raise NotImplementedError - - def zeros_like(self, **overrides): - raise NotImplementedError - - def ones_like(self, **overrides): - raise NotImplementedError - - @property - def content(self): - return self._content - - @content.setter - def content(self, value): - raise NotImplementedError - - @property - def type(self): - raise NotImplementedError - - def __len__(self): - raise NotImplementedError - - @property - def shape(self): - raise NotImplementedError - - @property - def dtype(self): - raise NotImplementedError - - @property - def base(self): - raise NotImplementedError - - def _valid(self): - raise NotImplementedError - - def _argfields(self, function): - raise NotImplementedError - - def __iter__(self): - raise NotImplementedError - - def __getitem__(self, where): - raise NotImplementedError - - def __setitem__(self, where, what): - raise NotImplementedError - - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - raise NotImplementedError - - def any(self): - raise NotImplementedError - - def all(self): - raise NotImplementedError - - @classmethod - def concat(cls, first, *rest): - raise NotImplementedError - - @property - def columns(self): - raise NotImplementedError - - @property - def allcolumns(self): - raise NotImplementedError - - def pandas(self): - raise NotImplementedError diff --git a/awkward/array/table.py b/awkward/array/table.py index 5dd68856..c1be4f00 100644 --- a/awkward/array/table.py +++ b/awkward/array/table.py @@ -62,7 +62,7 @@ def __getattr__(self, name): if content is not None: return content[self._index] - raise AttributeError("neither _{0} is not a column in this {1}".format(name, self._table.rowname)) + raise AttributeError("{0} is not a column in this {1}".format(repr(name), self._table.rowname)) def __getitem__(self, where): if isinstance(where, awkward.util.string): @@ -244,40 +244,6 @@ def content(self, value): value[n] = awkward.util.toarray(value[n], awkward.util.DEFAULTTYPE) self._content = value - def _valid(self): - return True - - def _argfields(self, function): - if not isinstance(function, types.FunctionType): - raise TypeError("function (or lambda) required") - - required = function.__code__.co_varnames[:function.__code__.co_argcount] - has_varargs = (function.__code__.co_flags & 0x04) != 0 - has_kwargs = (function.__code__.co_flags & 0x08) != 0 - - args = [] - kwargs = {} - - order = self.columns - - for i, n in enumerate(required): - if n in self._content: - args.append(n) - elif str(i) in self._content: - args.append(str(i)) - else: - args.append(order[i]) - - if has_varargs: - while str(i) in self._content: - args.append(str(i)) - i += 1 - - if has_kwargs: - kwargs = [n for n in self._content if n not in required] - - return args, kwargs - @property def dtype(self): return awkward.util.numpy.dtype([(n, x.dtype) for n, x in self._content.items()]) @@ -379,7 +345,8 @@ def _newslice(self, head): if issubclass(head.dtype.type, awkward.util.numpy.integer): length = self._length() negative = (head < 0) - head[negative] += length + if negative.any(): + head[negative] += length if not awkward.util.numpy.bitwise_and(0 <= head, head < length).all(): raise IndexError("some indexes out of bounds for length {0}".format(length)) @@ -455,6 +422,9 @@ def __getitem__(self, where): where = (where,) head, tail = where[0], where[1:] + if tail != (): + raise IndexError("cannot pass multidimensional indexes through a Table") + newslice = self._newslice(head) if isinstance(newslice, awkward.util.integer): @@ -468,7 +438,7 @@ def __getitem__(self, where): def __setitem__(self, where, what): if self._view is not None: - raise ValueError("new columns can only be attached to the original table, not a view (try table.base['col'] = array)") + raise ValueError("new columns can only be attached to the original Table, not a view (try table.base['col'] = array)") if isinstance(where, awkward.util.string): self._content[where] = awkward.util.toarray(what, awkward.util.DEFAULTTYPE) @@ -484,7 +454,7 @@ def __setitem__(self, where, what): def __delitem__(self, where): if self._view is not None: - raise ValueError("columns can only be removed from the original table, not a view (try del table.base['col'])") + raise ValueError("columns can only be removed from the original Table, not a view (try del table.base['col'])") if isinstance(where, awkward.util.string): del self._content[where] @@ -501,6 +471,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): inputsdict = None for x in inputs: if isinstance(x, Table): + x._valid() + if inputsdict is None: inputsdict = awkward.util.OrderedDict([(n, []) for n in x._content]) table = x diff --git a/awkward/array/union.py b/awkward/array/union.py index 8c00f5e0..40e99b85 100644 --- a/awkward/array/union.py +++ b/awkward/array/union.py @@ -28,173 +28,390 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import functools + import awkward.array.base +import awkward.type +import awkward.util class UnionArray(awkward.array.base.AwkwardArray): def __init__(self, tags, index, contents): - raise NotImplementedError + self.tags = tags + self.index = index + self.contents = contents - def copy(self, index=None, content=None): - raise NotImplementedError - - def deepcopy(self, index=None, content=None): - raise NotImplementedError + @classmethod + def fromtags(cls, tags, contents): + out = cls.__new__(cls) + out.tags = tags + out.index = awkward.util.numpy.empty(out._tags.shape, dtype=awkward.util.INDEXTYPE) + out.contents = contents + + if len(out._tags.reshape(-1)) > 0 and out._tags.reshape(-1).max() >= len(out._contents): + raise ValueError("maximum tag is {0} but there are only {1} contents arrays".format(out._tags.reshape(-1).max(), len(out._contents))) + + for tag, content in enumerate(out._contents): + mask = (out._tags == tag) + out._index[mask] = awkward.util.numpy.arange(awkward.util.numpy.count_nonzero(mask)) + + return out + + def copy(self, tags=None, index=None, contents=None): + out = self.__class__.__new__(self.__class__) + out._tags = self._tags + out._index = self._index + out._contents = self._contents + out._dtype = self._dtype + out._isvalid = self._isvalid + if tags is not None: + out.tags = tags + if index is not None: + out.index = index + if contents is not None: + out.contents = contents + return out + + def deepcopy(self, tags=None, index=None, contents=None): + out = self.copy(tags=tags, index=index, contents=contents) + out._tags = awkward.util.deepcopy(out._tags) + out._index = awkward.util.deepcopy(out._index) + out._contents = [awkward.util.deepcopy(x) for x in out._contents] + return out def empty_like(self, **overrides): - raise NotImplementedError + return self.copy(contents=[awkward.util.numpy.empty_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.empty_like(**overrides) for x in self._contents]) def zeros_like(self, **overrides): - raise NotImplementedError + return self.copy(contents=[awkward.util.numpy.zeros_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.zeros_like(**overrides) for x in self._contents]) def ones_like(self, **overrides): - raise NotImplementedError + return self.copy(contents=[awkward.util.numpy.ones_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.ones_like(**overrides) for x in self._contents]) @property - def content(self): - return self._content + def tags(self): + return self._tags + + @tags.setter + def tags(self, value): + value = awkward.util.toarray(value, awkward.util.TAGTYPE, awkward.util.numpy.ndarray) + if not issubclass(value.dtype.type, awkward.util.numpy.integer): + raise TypeError("tags must have integer dtype") + if (value < 0).any(): + raise ValueError("tags must be a non-negative array") + self._tags = value + self._isvalid = False - @content.setter - def content(self, value): - raise NotImplementedError + @property + def index(self): + return self._index + + @index.setter + def index(self, value): + value = awkward.util.toarray(value, awkward.util.INDEXTYPE, awkward.util.numpy.ndarray) + if not issubclass(value.dtype.type, awkward.util.numpy.integer): + raise TypeError("index must have integer dtype") + if (value < 0).any(): + raise ValueError("index must be a non-negative array") + self._index = value + self._isvalid = False @property - def type(self): - raise NotImplementedError + def contents(self): + return self._contents + + @contents.setter + def contents(self, value): + try: + iter(value) + except TypeError: + raise TypeError("contents must be iterable") + value = tuple(awkward.util.toarray(x, awkward.util.DEFAULTTYPE) for x in value) + if len(value) == 0: + raise ValueError("contents must be non-empty") + self._contents = value + self._dtype = None + self._isvalid = False - def __len__(self): - raise NotImplementedError + @property + def dtype(self): + if self._dtype is None: + if all(issubclass(x.dtype.type, (awkward.util.numpy.bool_, awkward.util.numpy.bool)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.bool_) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.int8)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.int8) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.uint8)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.uint8) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.int8, awkward.util.numpy.uint8, awkward.util.numpy.int16)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.int16) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.uint8, awkward.util.numpy.uint16)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.uint16) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.int8, awkward.util.numpy.uint8, awkward.util.numpy.int16, awkward.util.numpy.uint16, awkward.util.numpy.int32)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.int32) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.uint8, awkward.util.numpy.uint16, awkward.util.numpy.uint32)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.uint32) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.int8, awkward.util.numpy.uint8, awkward.util.numpy.int16, awkward.util.numpy.uint16, awkward.util.numpy.int32, awkward.util.numpy.uint32, awkward.util.numpy.int64)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.int64) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.uint8, awkward.util.numpy.uint16, awkward.util.numpy.uint32, awkward.util.numpy.uint64)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.uint64) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.float16)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.float16) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.float16, awkward.util.numpy.float32)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.float32) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.float16, awkward.util.numpy.float32, awkward.util.numpy.float64)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.float64) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.float16, awkward.util.numpy.float32, awkward.util.numpy.float64, awkward.util.numpy.float128)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.float128) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.integer, awkward.util.numpy.floating)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.float64) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.complex64)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.complex64) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.complex64, awkward.util.numpy.complex128)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.complex128) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.complex64, awkward.util.numpy.complex128, awkward.util.numpy.complex256)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.complex256) + + elif all(issubclass(x.dtype.type, (awkward.util.numpy.integer, awkward.util.numpy.floating, awkward.util.numpy.complexfloating)) for x in self._contents): + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.complex256) + + else: + self._dtype = awkward.util.numpy.dtype(awkward.util.numpy.object_) + + return self._dtype @property def shape(self): - raise NotImplementedError + first = self._contents[0].shape + if self.dtype.kind == "O" or not all(x.shape == first for x in self._contents[1:]): + return self._tags.shape + else: + return self._tags.shape + first - @property - def dtype(self): - raise NotImplementedError + def __len__(self): + return len(self._tags) @property - def base(self): - raise NotImplementedError + def type(self): + return awkward.type.ArrayType(*(self._tags.shape + (functools.reduce(lambda a, b: a | b, [awkward.type.fromarray(x).to for x in self._contents]),))) def _valid(self): - raise NotImplementedError + if not self._isvalid: + if len(self._tags.shape) > len(self._index.shape): + raise ValueError("tags length ({0}) must be less than or equal to index length ({1})".format(len(self._tags.shape), len(self._index.shape))) - def _argfields(self, function): - raise NotImplementedError + if self._tags.shape[1:] != self._index.shape[1:]: + raise ValueError("tags dimensionality ({0}) must be equal to index dimensionality ({1})".format(self._tags.shape[1:], self._index.shape[1:])) + + if len(self._tags.reshape(-1)) > 0 and self._tags.reshape(-1).max() >= len(self._contents): + raise ValueError("maximum tag is {0} but there are only {1} contents arrays".format(self._tags.reshape(-1).max(), len(self._contents))) + + index = self._index[:len(self._tags)] + for tag in awkward.util.numpy.unique(self._tags): + maxindex = index[self._tags == tag].reshape(-1).max() + if maxindex >= len(self._contents[tag]): + raise ValueError("maximum index ({0}) must be less than the length of all contents arrays ({1})".format(maxindex, len(self._contents[tag]))) + + self._isvalid = True def __iter__(self): - raise NotImplementedError + self._valid() - def __getitem__(self, where): - raise NotImplementedError + tags = self._tags + lentags = len(self._tags) + index = self._index + contents = self._contents + i = 0 + while i < lentags: + yield contents[tags[i]][index[i]] + i += 1 + + def __getitem__(self, where): + self._valid() + + if awkward.util.isstringslice(where): + contents = [] + for tag in awkward.util.numpy.unique(self._tags): + contents.append(self._contents[tag][where]) + if len(contents) == 0: + return self.copy(contents=[self._contents[0][where]]) + else: + return self.copy(contents=contents) + + if isinstance(where, tuple) and len(where) == 0: + return self + if not isinstance(where, tuple): + where = (where,) + head, tail = where[:len(self._tags.shape)], where[len(self._tags.shape):] + + tags = self._tags[head] + index = self._index[:len(self._tags)][head] + + if len(tags.shape) == len(index.shape) == 0: + return self._contents[tags][(index,) + tail] + else: + if len(tags) == 0: + return self._contents[0][(index,) + tail] + elif (tags == tags[0]).all(): + return self._contents[tags[0]][(index,) + tail] + else: + return self.copy(tags=tags, index=index) + def __setitem__(self, where, what): - raise NotImplementedError + import awkward.array.index + + if what.shape[:len(self._tags.shape)] != self._tags.shape: + raise ValueError("array to assign does not have the same starting shape as tags") + + if isinstance(where, awkward.util.string): + for tag in awkward.util.numpy.unique(self._tags): + inverseindex = awkward.array.index.invert(self._index[:len(self._tags)][self._tags == tag]) + self._contents[tag][where] = awkward.array.index.IndexedArray(inverseindex, what) + + elif awkward.util.isstringslice(where): + if len(where) != len(what): + raise ValueError("number of keys ({0}) does not match number of provided arrays ({1})".format(len(where), len(what))) + for tag in awkward.util.numpy.unique(self._tags): + inverseindex = awkward.array.index.invert(self._index[:len(self._tags)][self._tags == tag]) + for x, y in zip(where, what): + self._contents[tag][x] = awkward.array.index.IndexedArray(inverseindex, y) + + else: + raise TypeError("invalid index for assigning column to Table: {0}".format(where)) + + def __delitem__(self, where): + if isinstance(where, awkward.util.string): + for tag in awkward.util.numpy.unique(self._tags): + del self._contents[tag][where] + + elif awkward.util.isstringslice(where): + for tag in awkward.util.numpy.unique(self._tags): + for x in where: + del self._contents[tag][x] + + else: + raise TypeError("invalid index for assigning column to Table: {0}".format(where)) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - raise NotImplementedError + import awkward.array.objects + + if method != "__call__": + return NotImplemented + + tags = [] + for x in inputs: + if isinstance(x, UnionArray): + x._valid() + tags.append(x._tags) + assert len(tags) > 0 + + if any(x.shape != tags[0].shape for x in tags[1:]): + raise ValueError("cannot {0} UnionArrays because tag shapes differ".format(ufunc)) + + combos = awkward.util.numpy.stack(tags, axis=-1).view([(str(i), x.dtype) for i, x in enumerate(tags)]).reshape(tags[0].shape) + + outtags = awkward.util.numpy.empty(tags[0].shape, dtype=awkward.util.TAGTYPE) + outindex = awkward.util.numpy.empty(tags[0].shape, dtype=awkward.util.INDEXTYPE) + + out = None + contents = {} + types = {} + for outtag, combo in enumerate(awkward.util.numpy.unique(combos)): + mask = (combos == combo) + outtags[mask] = outtag + outindex[mask] = awkward.util.numpy.arange(awkward.util.numpy.count_nonzero(mask)) + + result = getattr(ufunc, method)(*[x[mask] for x in inputs], **kwargs) + + if isinstance(result, tuple): + if out is None: + out = list(result) + for i, x in enumerate(result): + if isinstance(x, (awkward.util.numpy.ndarray, awkward.array.base.AwkwardArray)): + if i not in contents: + contents[i] = [] + contents[i].append(x) + types[i] = type(x) + + elif method == "at": + pass + + else: + if isinstance(result, (awkward.util.numpy.ndarray, awkward.array.base.AwkwardArray)): + if None not in contents: + contents[None] = [] + contents[None].append(result) + types[None] = type(result) + + if out is None: + if None in contents: + return awkward.array.objects.Methods.maybemixin(types[None], UnionArray)(outtags, outindex, contents[None]) + else: + return None + else: + for i in range(len(out)): + if i in contents: + out[i] = awkward.array.objects.Methods.maybemixin(types[i], UnionArray)(outtags, outindex, contents[i]) + return tuple(out) def any(self): - raise NotImplementedError + self._valid() + index = self._index[:len(self._tag)] + for tag in awkward.util.numpy.unique(self._tags): + if self._contents[tag][index[self._tags == tag]].any(): + return True + return False def all(self): - raise NotImplementedError + self._valid() + index = self._index[:len(self._tag)] + for tag in awkward.util.numpy.unique(self._tags): + if not self._contents[tag][index[self._tags == tag]].all(): + return False + return True @classmethod def concat(cls, first, *rest): raise NotImplementedError + @property + def base(self): + return self._base + @property def columns(self): - raise NotImplementedError + out = None + for content in self._contents: + if out is None: + out = content.columns + else: + out = [x for x in content.columns if x in out] + return out @property def allcolumns(self): - raise NotImplementedError + out = None + for content in self._contents: + if out is None: + out = content.allcolumns + else: + out = [x for x in content.allcolumns if x in out] + return out def pandas(self): raise NotImplementedError - -# class UnionArray(awkward.array.base.AwkwardArray): -# @classmethod -# def fromtags(cls, tags, contents): -# raise NotImplementedError - -# def __init__(self, tags, index, contents): -# self.tags = tags -# self.index = index -# self.contents = contents - -# @property -# def tags(self): -# return self._tags - -# @tags.setter -# def tags(self, value): -# value = self._toarray(value, self.INDEXTYPE, (numpy.ndarray, awkward.array.base.AwkwardArray)) - -# if len(value.shape) != 1: -# raise TypeError("tags must have 1-dimensional shape") -# if value.shape[0] == 0: -# value = value.view(self.INDEXTYPE) -# if not issubclass(value.dtype.type, numpy.integer): -# raise TypeError("tags must have integer dtype") - -# self._tags = value - -# @property -# def index(self): -# return self._index - -# @index.setter -# def index(self, value): -# value = self._toarray(value, self.INDEXTYPE, (numpy.ndarray, awkward.array.base.AwkwardArray)) - -# if len(value.shape) != 1: -# raise TypeError("index must have 1-dimensional shape") -# if value.shape[0] == 0: -# value = value.view(self.INDEXTYPE) -# if not issubclass(value.dtype.type, numpy.integer): -# raise TypeError("index must have integer dtype") - -# self._index = value - -# @property -# def contents(self): -# return self._contents - -# @contents.setter -# def contents(self, value): -# self._contents = tuple(self._toarray(x, self.CHARTYPE, (numpy.ndarray, awkward.array.base.AwkwardArray)) for x in value) - -# @property -# def dtype(self): -# return numpy.dtype(object) - -# @property -# def shape(self): -# return (len(self._tags),) - -# def __len__(self): -# return len(self._tags) - -# def __getitem__(self, where): -# if self._isstring(where): -# return UnionArray(self._tags, self._index, tuple(x[where] for x in self._contents)) - -# if self._tags.shape != self._index.shape: -# raise ValueError("tags shape ({0}) does not match index shape ({1})".format(self._tags.shape, self._index.shape)) - -# if not isinstance(where, tuple): -# where = (where,) -# head, tail = where[0], where[1:] - -# tags = self._tags[head] -# index = self._index[head] -# assert tags.shape == index.shape - -# uniques = numpy.unique(tags) -# if len(uniques) == 1: -# return self._contents[uniques[0]][self._singleton((index,) + tail)] -# else: -# return UnionArray(tags, index, self._contents) diff --git a/awkward/array/virtual.py b/awkward/array/virtual.py index e496739e..9f048579 100644 --- a/awkward/array/virtual.py +++ b/awkward/array/virtual.py @@ -29,251 +29,285 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import awkward.array.base +import awkward.type import awkward.util class VirtualArray(awkward.array.base.AwkwardArray): - def __init__(self, generator, cache=None, persistentkey=None, dtype=None, shape=None): - raise NotImplementedError - - def copy(self, index=None, content=None): - raise NotImplementedError - - def deepcopy(self, index=None, content=None): - raise NotImplementedError + class TransientKey(object): + def __init__(self, id): + self._id = id + def __repr__(self): + return "".format(repr(self._id)) + def __hash__(self): + return hash((VirtualArray.TransientKey, self._id)) + def __eq__(self, other): + return isinstance(other, VirtualArray.TransientKey) and self._id == other._id + def __ne__(self, other): + return not self.__eq__(other) + def __getstate__(self): + raise RuntimeError("VirtualArray.TransientKeys are not unique across processes, and hence should not be serialized") + + def __init__(self, generator, cache=None, persistentkey=None, type=None): + self.generator = generator + self.cache = cache + self.persistentkey = persistentkey + self.type = type + self._array = None + self._setitem = None + self._delitem = None + + def copy(self, generator=None, cache=None, persistentkey=None, type=None): + out = self.__class__.__new__(self.__class__) + out._generator = self._generator + out._cache = self._cache + out._persistentkey = self._persistentkey + out._type = self._type + out._array = self._array + if self._setitem is None: + out._setitem = None + else: + out._setitem = awkward.util.OrderedDict(self._setitem.items()) + if self._delitem is None: + out._delitem = None + else: + out._delitem = list(self._delitem) + if generator is not None: + out.generator = generator + if cache is not None: + out.cache = cache + if persistentkey is not None: + out.persistentkey = persistentkey + if type is not None: + out.type = type + return out + + def deepcopy(self, generator=None, cache=None, persistentkey=None, type=None): + out = self.copy(generator=generator, cache=cache, persistentkey=persistentkey, type=type) + out._array = awkward.util.deepcopy(out._array) + if out._setitem is not None: + for n in list(out._setitem): + out._setitem[n] = awkward.util.deepcopy(out._setitem[n]) + return out def empty_like(self, **overrides): - raise NotImplementedError + if isinstance(self.array, awkward.util.numpy.ndarray): + return awkward.util.numpy.empty_like(array) + else: + return self.array.empty_like(**overrides) def zeros_like(self, **overrides): - raise NotImplementedError + if isinstance(self.array, awkward.util.numpy.ndarray): + return awkward.util.numpy.zeros_like(array) + else: + return self.array.zeros_like(**overrides) def ones_like(self, **overrides): - raise NotImplementedError + if isinstance(self.array, awkward.util.numpy.ndarray): + return awkward.util.numpy.ones_like(array) + else: + return self.array.ones_like(**overrides) @property - def content(self): - return self._content + def generator(self): + return self._generator - @content.setter - def content(self, value): - raise NotImplementedError + @generator.setter + def generator(self, value): + if not callable(value): + raise TypeError("generator must be a callable (of zero arguments)") + self._generator = value + + @property + def cache(self): + return self._cache + + @cache.setter + def cache(self, value): + if not value is None and not (callable(getattr(value, "__getitem__", None)) and callable(getattr(value, "__setitem__", None)) and callable(getattr(value, "__delitem__", None))): + raise TypeError("cache must be None, a dict, or have __getitem__/__setitem__/__delitem__ methods") + self._cache = value + + @property + def persistentkey(self): + return self._persistentkey + + @persistentkey.setter + def persistentkey(self, value): + if value is not None and not isinstance(value, awkward.util.string): + raise TypeError("persistentkey must be None or a string") + self._persistentkey = value @property def type(self): - raise NotImplementedError + if self._type is None or self.ismaterialized: + return awkward.type.fromarray(self.array) + else: + return self._type + + @type.setter + def type(self, value): + if value is not None and not isinstance(value, awkward.type.ArrayType): + raise TypeError("type must be None or an awkward type (to set Numpy parameters, use awkward.util.fromnumpy(shape, dtype, masked=False))") + self._type = value def __len__(self): - raise NotImplementedError + return self.shape[0] @property def shape(self): - raise NotImplementedError + return self.type.shape @property def dtype(self): - raise NotImplementedError + return self.type.dtype + + def _valid(self): + pass @property - def base(self): - raise NotImplementedError + def key(self): + if self._persistentkey is not None: + return self._persistentkey + else: + return self.TransientKey(id(self)) - def _valid(self): - raise NotImplementedError + @property + def array(self): + # Normal states: + # (1) no cache and _array is None: make a new one + # (2) no cache and _array is an array: return _array + # (3) have a cache and _array is None: make a new one (filling cache) + # (4) have a cache and _array is a key and cache[key] was evicted: make a new one (filling cache) + # (5) have a cache and _array is a key and cache[key] exists: return cache[key] + # + # Abnormal states (user manually changed cache after materialization): + # (6) no cache and _array is a key (user removed _cache): make a new one + # (7) have a cache and _array is an array (user added _cache): fill cache and return _array + + if self._array is None: + # states (1) and (3) + return self.materialize() + + elif self._cache is None: + if isinstance(self._array, (VirtualArray.TransientKey, awkward.util.string)): + # abnormal state (6) + return self.materialize() + else: + # state (2) + return self._array + + else: + if isinstance(self._array, (VirtualArray.TransientKey, awkward.util.string)): + try: + # state (5) + return self._cache[self._array] + except: + # state (4), taking any error in __getitem__ as evidence that it was evicted + return self.materialize() + else: + # abnormal state (7) + self._cache[self.key] = self._array + return self._array - def _argfields(self, function): - raise NotImplementedError + @property + def ismaterialized(self): + if self._cache is None: + return isinstance(self._array, (awkward.util.numpy.ndarray, awkward.array.base.AwkwardArray)) + else: + return self._array is not None and self._array in self._cache + + def materialize(self): + array = awkward.util.toarray(self._generator(), awkward.util.DEFAULTTYPE) + if self._setitem is not None: + for n, x in self._setitem.items(): + array[n] = x + if self._delitem is not None: + for n in self._delitem: + del array[n] + + if self._type is not None and self._type != awkward.type.fromarray(array): + raise TypeError("materialized array has type\n\n{0}\n\nexpected type\n\n{1}".format(awkward.type.fromarray(array).__str__(indent=" "), self._type.__str__(indent=" "))) + + if self._cache is None: + # states (1), (2), and (6) + self._array = array + else: + # states (3) and (4) + self._array = self.key + self._cache[self._array] = array + + return array + + def __del__(self): + # TransientKeys are based on runtime ids, which Python may reuse after an object is garbage collected + # they *MUST* be removed from the cache to avoid confusion; persistentkeys can (and should) stay in + if getattr(self, "_cache", None) is not None and isinstance(self._array, VirtualArray.TransientKey): + try: + del self._cache[self._array] + except: + pass def __iter__(self): - raise NotImplementedError + return iter(self.array) + + def __array__(self, *args, **kwargs): + return awkward.util.numpy.array(self.array, *args, **kwargs) def __getitem__(self, where): - raise NotImplementedError + return self.array[where] def __setitem__(self, where, what): - raise NotImplementedError + self.array[where] = what + if self._type is not None: + self._type = awkward.type.fromarray(array) + if self._setitem is None: + self._setitem = awkward.util.OrderedDict() + self._setitem[where] = what + + def __delitem__(self, where): + del self.array[where] + if self._type is not None: + self._type = awkward.type.fromarray(array) + if self._setitem is not None and where in self._setitem: + del self._setitem + if self._delitem is None: + self._delitem = [] + if where not in self._delitem: + self._delitem.append(where) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - raise NotImplementedError + if method != "__call__": + return NotImplemented + + inputs = list(inputs) + for i in range(len(inputs)): + if isinstance(inputs[i], VirtualArray): + inputs[i]._valid() + inputs[i] = inputs[i].array + return getattr(ufunc, method)(*inputs, **kwargs) + def any(self): - raise NotImplementedError + return self.array.any() def all(self): - raise NotImplementedError + return self.array.all() @classmethod def concat(cls, first, *rest): raise NotImplementedError + @property + def base(self): + return self.array.base + @property def columns(self): - raise NotImplementedError + return self.array.columns @property def allcolumns(self): - raise NotImplementedError + return self.array.allcolumns def pandas(self): raise NotImplementedError - -# class VirtualArray(awkward.array.base.AwkwardArray): -# class TransientKey(object): -# def __init__(self, id): -# self._id = id -# def __repr__(self): -# return "".format(self._id) -# def __hash__(self): -# return hash((VirtualArray.TransientKey, self._id)) -# def __eq__(self, other): -# return isinstance(other, VirtualArray.TransientKey) and self._id == other._id -# def __ne__(self, other): -# return not self.__eq__(other) -# def __getstate__(self): -# raise RuntimeError("VirtualArray.TransientKeys are not unique across processes, and hence should not be serialized") - -# def __init__(self, generator, cache=None, persistentkey=None, dtype=None, shape=None): -# self.generator = generator -# self.cache = cache -# self.persistentkey = persistentkey -# self._array = None - -# if dtype is None: -# self._dtype = dtype -# else: -# self._dtype = numpy.dtype(dtype) - -# if shape is None or (isinstance(shape, tuple) and len(shape) != 0 and all(isinstance(x, (numbers.Integral, numpy.integer)) and x >= 0 for x in shape)): -# self._shape = shape -# else: -# raise TypeError("shape must be None (unknown) or a non-empty tuple of non-negative integers") - -# @property -# def generator(self): -# return self._generator - -# @generator.setter -# def generator(self, value): -# if not callable(value): -# raise TypeError("generator must be a callable (of zero arguments)") -# self._generator = value - -# @property -# def cache(self): -# return self._cache - -# @cache.setter -# def cache(self, value): -# if not value is None and not (callable(getattr(value, "__getitem__", None)) and callable(getattr(value, "__setitem__", None)) and callable(getattr(value, "__delitem__", None))): -# raise TypeError("cache must be a dict or have __getitem__/__setitem__/__delitem__ methods") -# self._cache = value - -# @property -# def persistentkey(self): -# return self._persistentkey - -# @persistentkey.setter -# def persistentkey(self, value): -# if value is not None and not isinstance(value, awkward.util.string): -# raise TypeError("persistentkey must be a string or None") -# self._persistentkey = value - -# @property -# def dtype(self): -# if self._dtype is not None: -# return self._dtype -# else: -# return self.array.dtype - -# @property -# def shape(self): -# if self._shape is not None: -# return self._shape -# else: -# return self.array.shape - -# @property -# def key(self): -# if self._persistentkey is not None: -# return self._persistentkey -# else: -# return self.TransientKey(id(self)) - -# @property -# def array(self): -# # Normal states: (1) no cache and _array is None: make a new one -# # (2) no cache and _array is an array: return _array -# # (3) have a cache and _array is None: make a new one (filling cache) -# # (4) have a cache and _array is a key and cache[key] was evicted: make a new one (filling cache) -# # (5) have a cache and _array is a key and cache[key] exists: return cache[key] -# # -# # Abnormal states: (6) no cache and _array is a key (user removed _cache): make a new one -# # (7) have a cache and _array is an array (user added _cache): fill cache and return _array - -# something = self._array - -# if something is None: -# # states (1) and (3) -# return self.materialize() - -# elif self._cache is None: -# if isinstance(something, (VirtualArray.TransientKey, awkward.util.string)): -# # abnormal state (6) -# return self.materialize() -# else: -# # state (2) -# return something - -# else: -# if isinstance(something, (VirtualArray.TransientKey, awkward.util.string)): -# try: -# # state (5) -# return self._cache[something] -# except: -# # state (4), taking any error in __getitem__ as evidence that it was evicted -# return self.materialize() -# else: -# # abnormal state (7) -# self._cache[self.key] = something -# return something - -# @property -# def ismaterialized(self): -# if self._cache is None: -# return isinstance(self._array, (numpy.ndarray, awkward.array.base.AwkwardArray)) -# else: -# return self._array is not None and self._array in self._cache - -# def materialize(self): -# array = self._toarray(self.generator(), self.CHARTYPE, (numpy.ndarray, awkward.array.base.AwkwardArray)) - -# if self._dtype is not None and self._dtype != array.dtype: -# raise ValueError("materialized array has dtype {0}, expected dtype {1}".format(array.dtype, self._dtype)) -# if self._shape is not None and self._shape != array.shape: -# raise ValueError("materialized array has shape {0}, expected shape {1}".format(array.shape, self._shape)) -# if len(array.shape) == 0: -# raise ValueError("materialized object is scalar: {0}".format(array)) - -# if self._cache is None: -# # states (1), (2), and (6) -# self._array = array -# else: -# # states (3) and (4) -# self._array = self.key -# self._cache[self._array] = array - -# return array - -# def __del__(self): -# # TransientKeys are based on runtime ids, which Python may reuse after an object is garbage collected -# # they *MUST* be removed from the cache to avoid confusion; persistentkeys can (and should) stay in -# if self._cache is not None and isinstance(self._array, VirtualArray.TransientKey): -# try: -# del self._cache[self._array] -# except: -# pass - -# def __len__(self): -# return self.shape[0] - -# def __getitem__(self, where): -# return self.array[where] diff --git a/awkward/generate.py b/awkward/generate.py index 66f868eb..7ae1a6b0 100644 --- a/awkward/generate.py +++ b/awkward/generate.py @@ -35,9 +35,8 @@ import awkward.array.base import awkward.util from awkward.array.chunked import ChunkedArray, AppendableArray -from awkward.array.indexed import IndexedMaskedArray from awkward.array.jagged import JaggedArray -from awkward.array.masked import BitMaskedArray +from awkward.array.masked import BitMaskedArray, IndexedMaskedArray from awkward.array.objects import ObjectArray from awkward.array.table import Table from awkward.array.union import UnionArray diff --git a/awkward/type.py b/awkward/type.py index a0419be2..bb317b0a 100644 --- a/awkward/type.py +++ b/awkward/type.py @@ -52,6 +52,11 @@ def fromarray(array): return array.type def fromnumpy(shape, dtype, masked=False): + if not isinstance(shape, tuple): + shape = (shape,) + if not isinstance(dtype, awkward.util.numpy.dtype): + dtype = awkward.util.numpy.dtype(dtype) + if masked: return OptionType(fromnumpy(shape, dtype)) elif dtype.subdtype is not None: @@ -309,13 +314,6 @@ def dtype(self): else: return self._to.dtype - @property - def jshape(self): - if isinstance(self._to, awkward.util.numpy.dtype): - return (self._takes, self._to) - else: - return (self._takes,) + self._to.jshape - def _isnumpy(self, seen): if id(self) in seen: return False @@ -389,10 +387,6 @@ def dtype(self): out.append((n, x.dtype)) return awkward.util.numpy.dtype(out) - @property - def jshape(self): - return (dict((n, x if isinstance(x, awkward.util.numpy.dtype) else x.jshape) for n, x in self._fields.items()),) - def _isnumpy(self, seen): if id(self) in seen: return False @@ -469,10 +463,6 @@ def shape(self): def dtype(self): raise TypeError("Union has no Numpy dtype") - @property - def jshape(self): - return ([x.jshape for x in self._possibilities],) - def _isnumpy(self, seen): return False @@ -552,10 +542,6 @@ def shape(self): def dtype(self): return self._type.dtype - @property - def jshape(self): - return ([self._type.jshape, None],) - def _isnumpy(self, seen): if id(self) in seen: return False diff --git a/awkward/util.py b/awkward/util.py index 5a614597..dfa209b9 100644 --- a/awkward/util.py +++ b/awkward/util.py @@ -77,11 +77,12 @@ def is_intstring(x): integer = (numbers.Integral, numpy.integer) +DEFAULTTYPE = numpy.dtype(numpy.float64) CHARTYPE = numpy.dtype(numpy.uint8) INDEXTYPE = numpy.dtype(numpy.int64) +TAGTYPE = numpy.dtype(numpy.uint8) MASKTYPE = numpy.dtype(numpy.bool_) BITMASKTYPE = numpy.dtype(numpy.uint8) -DEFAULTTYPE = numpy.dtype(numpy.float64) def toarray(value, defaultdtype, passthrough=None): import awkward.array.base @@ -154,15 +155,6 @@ def iscomparison(ufunc): ufunc is numpy.greater or ufunc is numpy.greater_equal) -def _argfields(function): - if not isinstance(function, types.FunctionType): - raise TypeError("apply method requires a function (or lambda)") - - if function.__code__.co_argcount != 1: - raise TypeError("apply method requires a one-argument function (or lambda) when applied to non-Tables") - - return None, None - try: NDArrayOperatorsMixin = numpy.lib.mixins.NDArrayOperatorsMixin diff --git a/awkward/version.py b/awkward/version.py index 464b2e41..900ccf0b 100644 --- a/awkward/version.py +++ b/awkward/version.py @@ -30,7 +30,7 @@ import re -__version__ = "0.1.0" +__version__ = "0.2.0" version = __version__ version_info = tuple(re.split(r"[-\.]", __version__)) diff --git a/tests/test_chunked.py b/tests/test_chunked.py index 7ecc4af5..8d8d59de 100644 --- a/tests/test_chunked.py +++ b/tests/test_chunked.py @@ -159,89 +159,91 @@ def test_chunked_get2d(self): self.assertEqual(a[[True, False, True, False, True, False, True, False, True, False], 0].tolist(), [0.0, 2.0, 4.0, 6.0, 8.0]) self.assertEqual(a[[True, False, True, False, True, False, True, False, True, False], 1].tolist(), [0.0, 2.2, 4.4, 6.6, 8.8]) -# def test_appendable_append(self): -# a = AppendableArray.empty(lambda: numpy.empty(3, numpy.float64)) -# self.assertEqual(a.tolist(), []) -# self.assertEqual(len(a.chunks), 0) -# self.assertEqual(a.offsets, [0]) - -# a.append(0.0) -# self.assertEqual(a.tolist(), [0.0]) -# self.assertEqual(len(a.chunks), 1) -# self.assertEqual(a.offsets, [0, 1]) - -# a.append(1.1) -# self.assertEqual(a.tolist(), [0.0, 1.1]) -# self.assertEqual(len(a.chunks), 1) -# self.assertEqual(a.offsets, [0, 2]) - -# a.append(2.2) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2]) -# self.assertEqual(len(a.chunks), 1) -# self.assertEqual(a.offsets, [0, 3]) - -# a.append(3.3) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3]) -# self.assertEqual(len(a.chunks), 2) -# self.assertEqual(a.offsets, [0, 3, 4]) - -# a.append(4.4) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4]) -# self.assertEqual(len(a.chunks), 2) -# self.assertEqual(a.offsets, [0, 3, 5]) - -# a.append(5.5) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5]) -# self.assertEqual(len(a.chunks), 2) -# self.assertEqual(a.offsets, [0, 3, 6]) - -# a.append(6.6) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6]) -# self.assertEqual(len(a.chunks), 3) -# self.assertEqual(a.offsets, [0, 3, 6, 7]) - -# a.append(7.7) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7]) -# self.assertEqual(len(a.chunks), 3) -# self.assertEqual(a.offsets, [0, 3, 6, 8]) - -# a.append(8.8) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]) -# self.assertEqual(len(a.chunks), 3) -# self.assertEqual(a.offsets, [0, 3, 6, 9]) - -# a.append(9.9) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) -# self.assertEqual(len(a.chunks), 4) -# self.assertEqual(a.offsets, [0, 3, 6, 9, 10]) - -# def test_appendable_extend(self): -# a = AppendableArray.empty(lambda: numpy.empty(3, numpy.float64)) -# self.assertEqual(a.tolist(), []) -# self.assertEqual(len(a.chunks), 0) -# self.assertEqual(a.offsets, [0]) - -# a.extend([0.0, 1.1]) -# self.assertEqual(a.tolist(), [0.0, 1.1]) -# self.assertEqual(len(a.chunks), 1) -# self.assertEqual(a.offsets, [0, 2]) - -# a.extend([2.2, 3.3]) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3]) -# self.assertEqual(len(a.chunks), 2) -# self.assertEqual(a.offsets, [0, 3, 4]) - -# a.extend([4.4, 5.5]) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5]) -# self.assertEqual(len(a.chunks), 2) -# self.assertEqual(a.offsets, [0, 3, 6]) - -# a.extend([6.6, 7.7]) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7]) -# self.assertEqual(len(a.chunks), 3) -# self.assertEqual(a.offsets, [0, 3, 6, 8]) - -# a.extend([8.8, 9.9]) -# self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) -# self.assertEqual(len(a.chunks), 4) -# self.assertEqual(a.offsets, [0, 3, 6, 9, 10]) + def test_appendable_append(self): + a = AppendableArray(3, numpy.float64) + self.assertEqual(a.tolist(), []) + self.assertEqual(len(a.chunks), 0) + self.assertEqual(a.offsets.tolist(), [0]) + + a.append(0.0) + self.assertEqual(a.tolist(), [0.0]) + self.assertEqual(len(a.chunks), 1) + self.assertEqual(a.offsets.tolist(), [0, 1]) + + a.append(1.1) + self.assertEqual(a.tolist(), [0.0, 1.1]) + self.assertEqual(len(a.chunks), 1) + self.assertEqual(a.offsets.tolist(), [0, 2]) + + a.append(2.2) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2]) + self.assertEqual(len(a.chunks), 1) + self.assertEqual(a.offsets.tolist(), [0, 3]) + + a.append(3.3) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3]) + self.assertEqual(len(a.chunks), 2) + self.assertEqual(a.offsets.tolist(), [0, 3, 4]) + + a.append(4.4) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4]) + self.assertEqual(len(a.chunks), 2) + self.assertEqual(a.offsets.tolist(), [0, 3, 5]) + + a.append(5.5) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5]) + self.assertEqual(len(a.chunks), 2) + self.assertEqual(a.offsets.tolist(), [0, 3, 6]) + + a.append(6.6) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6]) + self.assertEqual(len(a.chunks), 3) + self.assertEqual(a.offsets.tolist(), [0, 3, 6, 7]) + + a.append(7.7) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7]) + self.assertEqual(len(a.chunks), 3) + self.assertEqual(a.offsets.tolist(), [0, 3, 6, 8]) + + a.append(8.8) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]) + self.assertEqual(len(a.chunks), 3) + self.assertEqual(a.offsets.tolist(), [0, 3, 6, 9]) + + a.append(9.9) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + self.assertEqual([a[i] for i in range(len(a))], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + self.assertEqual(len(a.chunks), 4) + self.assertEqual(a.offsets.tolist(), [0, 3, 6, 9, 10]) + + def test_appendable_extend(self): + a = AppendableArray(3, numpy.float64) + self.assertEqual(a.tolist(), []) + self.assertEqual(len(a.chunks), 0) + self.assertEqual(a.offsets.tolist(), [0]) + + a.extend([0.0, 1.1]) + self.assertEqual(a.tolist(), [0.0, 1.1]) + self.assertEqual(len(a.chunks), 1) + self.assertEqual(a.offsets.tolist(), [0, 2]) + + a.extend([2.2, 3.3]) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3]) + self.assertEqual(len(a.chunks), 2) + self.assertEqual(a.offsets.tolist(), [0, 3, 4]) + + a.extend([4.4, 5.5]) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5]) + self.assertEqual(len(a.chunks), 2) + self.assertEqual(a.offsets.tolist(), [0, 3, 6]) + + a.extend([6.6, 7.7]) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7]) + self.assertEqual(len(a.chunks), 3) + self.assertEqual(a.offsets.tolist(), [0, 3, 6, 8]) + + a.extend([8.8, 9.9]) + self.assertEqual(a.tolist(), [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + self.assertEqual([a[i] for i in range(len(a))], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + self.assertEqual(len(a.chunks), 4) + self.assertEqual(a.offsets.tolist(), [0, 3, 6, 9, 10]) diff --git a/tests/test_indexed.py b/tests/test_indexed.py index a6ae10da..b8edd017 100644 --- a/tests/test_indexed.py +++ b/tests/test_indexed.py @@ -126,3 +126,139 @@ def test_indexed_byteindexed(self): a = IndexedArray([1, 2, 3], ByteIndexedArray([12, 8, 4, 0], b"\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00", numpy.int32)) self.assertEqual([a[i] for i in range(len(a))], [2, 1, 0]) self.assertEqual(a[:].tolist(), [2, 1, 0]) + + def test_sparse_get(self): + a = SparseArray(10, [1, 3, 5, 7, 9], [100, 101, 102, 103, 104]) + + self.assertEqual(a.tolist(), [0, 100, 0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual([a[i].tolist() for i in range(len(a))], [0, 100, 0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual([a[i : i + 1].tolist() for i in range(len(a))], [[0], [100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual([a[i : i + 2].tolist() for i in range(len(a) - 1)], [[0, 100], [100, 0], [0, 101], [101, 0], [0, 102], [102, 0], [0, 103], [103, 0], [0, 104]]) + + self.assertEqual(a[:].tolist(), [0, 100, 0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual(a[1:].tolist(), [100, 0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual(a[2:].tolist(), [0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual(a[2:-1].tolist(), [0, 101, 0, 102, 0, 103, 0]) + self.assertEqual(a[2:-2].tolist(), [0, 101, 0, 102, 0, 103]) + self.assertEqual(a[:-2].tolist(), [0, 100, 0, 101, 0, 102, 0, 103]) + self.assertEqual(a[::2].tolist(), [0, 0, 0, 0, 0]) + self.assertEqual(a[1::2].tolist(), [100, 101, 102, 103, 104]) + self.assertEqual(a[2::2].tolist(), [0, 0, 0, 0]) + self.assertEqual(a[3::2].tolist(), [101, 102, 103, 104]) + self.assertEqual(a[::-1].tolist(), [104, 0, 103, 0, 102, 0, 101, 0, 100, 0]) + self.assertEqual(a[-2::-1].tolist(), [0, 103, 0, 102, 0, 101, 0, 100, 0]) + self.assertEqual(a[-3::-1].tolist(), [103, 0, 102, 0, 101, 0, 100, 0]) + self.assertEqual(a[-3:0:-1].tolist(), [103, 0, 102, 0, 101, 0, 100]) + self.assertEqual(a[-3:1:-1].tolist(), [103, 0, 102, 0, 101, 0]) + self.assertEqual(a[::-2].tolist(), [104, 103, 102, 101, 100]) + self.assertEqual(a[-1::-2].tolist(), [104, 103, 102, 101, 100]) + self.assertEqual(a[-2::-2].tolist(), [0, 0, 0, 0, 0]) + self.assertEqual(a[-3::-2].tolist(), [103, 102, 101, 100]) + self.assertEqual(a[[1, 3, 5, 7, 9]].tolist(), [100, 101, 102, 103, 104]) + self.assertEqual(a[[1, 3, 5, 7, 8, 9]].tolist(), [100, 101, 102, 103, 0, 104]) + self.assertEqual(a[[1, 3, 5, 9, 7]].tolist(), [100, 101, 102, 104, 103]) + self.assertEqual(a[[1, 3, 5, 9, 8, 7]].tolist(), [100, 101, 102, 104, 0, 103]) + self.assertEqual(a[[False, True, False, True, False, True, False, True, False, True]].tolist(), [100, 101, 102, 103, 104]) + self.assertEqual(a[[True, True, False, True, False, True, False, True, False, True]].tolist(), [0, 100, 101, 102, 103, 104]) + self.assertEqual(a[[True, True, True, True, False, True, False, True, False, True]].tolist(), [0, 100, 0, 101, 102, 103, 104]) + + self.assertEqual([a[1:][i].tolist() for i in range(9)], [100, 0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual([a[[1, 3, 5, 9, 8, 7]][i].tolist() for i in range(6)], [100, 101, 102, 104, 0, 103]) + self.assertEqual([a[[True, True, True, True, False, True, False, True, False, True]][i].tolist() for i in range(7)], [0, 100, 0, 101, 102, 103, 104]) + + self.assertEqual(a.dense.tolist(), [0, 100, 0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual([a.dense[i].tolist() for i in range(len(a))], [0, 100, 0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual([a.dense[i : i + 1].tolist() for i in range(len(a))], [[0], [100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual([a.dense[i : i + 2].tolist() for i in range(len(a) - 1)], [[0, 100], [100, 0], [0, 101], [101, 0], [0, 102], [102, 0], [0, 103], [103, 0], [0, 104]]) + + self.assertEqual(a[:].dense.tolist(), [0, 100, 0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual(a[1:].dense.tolist(), [100, 0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual(a[2:].dense.tolist(), [0, 101, 0, 102, 0, 103, 0, 104]) + self.assertEqual(a[2:-1].dense.tolist(), [0, 101, 0, 102, 0, 103, 0]) + self.assertEqual(a[2:-2].dense.tolist(), [0, 101, 0, 102, 0, 103]) + self.assertEqual(a[:-2].dense.tolist(), [0, 100, 0, 101, 0, 102, 0, 103]) + self.assertEqual(a[::2].dense.tolist(), [0, 0, 0, 0, 0]) + self.assertEqual(a[1::2].dense.tolist(), [100, 101, 102, 103, 104]) + self.assertEqual(a[2::2].dense.tolist(), [0, 0, 0, 0]) + self.assertEqual(a[3::2].dense.tolist(), [101, 102, 103, 104]) + self.assertEqual(a[::-1].dense.tolist(), [104, 0, 103, 0, 102, 0, 101, 0, 100, 0]) + self.assertEqual(a[-2::-1].dense.tolist(), [0, 103, 0, 102, 0, 101, 0, 100, 0]) + self.assertEqual(a[-3::-1].dense.tolist(), [103, 0, 102, 0, 101, 0, 100, 0]) + self.assertEqual(a[-3:0:-1].dense.tolist(), [103, 0, 102, 0, 101, 0, 100]) + self.assertEqual(a[-3:1:-1].dense.tolist(), [103, 0, 102, 0, 101, 0]) + self.assertEqual(a[::-2].dense.tolist(), [104, 103, 102, 101, 100]) + self.assertEqual(a[-1::-2].dense.tolist(), [104, 103, 102, 101, 100]) + self.assertEqual(a[-2::-2].dense.tolist(), [0, 0, 0, 0, 0]) + self.assertEqual(a[-3::-2].dense.tolist(), [103, 102, 101, 100]) + + self.assertEqual([a[1:].dense[i].tolist() for i in range(9)], [100, 0, 101, 0, 102, 0, 103, 0, 104]) + + def test_sparse_get2d_content(self): + a = SparseArray(10, [1, 3, 5, 7, 9], [[100], [101], [102], [103], [104]]) + + self.assertEqual(a.tolist(), [[0], [100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual([a[i].tolist() for i in range(len(a))], [[0], [100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual([a[i : i + 1].tolist() for i in range(len(a))], [[[0]], [[100]], [[0]], [[101]], [[0]], [[102]], [[0]], [[103]], [[0]], [[104]]]) + self.assertEqual([a[i : i + 2].tolist() for i in range(len(a) - 1)], [[[0], [100]], [[100], [0]], [[0], [101]], [[101], [0]], [[0], [102]], [[102], [0]], [[0], [103]], [[103], [0]], [[0], [104]]]) + + self.assertEqual(a[:].tolist(), [[0], [100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual(a[1:].tolist(), [[100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual(a[2:].tolist(), [[0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual(a[2:-1].tolist(), [[0], [101], [0], [102], [0], [103], [0]]) + self.assertEqual(a[2:-2].tolist(), [[0], [101], [0], [102], [0], [103]]) + self.assertEqual(a[:-2].tolist(), [[0], [100], [0], [101], [0], [102], [0], [103]]) + self.assertEqual(a[::2].tolist(), [[0], [0], [0], [0], [0]]) + self.assertEqual(a[1::2].tolist(), [[100], [101], [102], [103], [104]]) + self.assertEqual(a[2::2].tolist(), [[0], [0], [0], [0]]) + self.assertEqual(a[3::2].tolist(), [[101], [102], [103], [104]]) + self.assertEqual(a[::-1].tolist(), [[104], [0], [103], [0], [102], [0], [101], [0], [100], [0]]) + self.assertEqual(a[-2::-1].tolist(), [[0], [103], [0], [102], [0], [101], [0], [100], [0]]) + self.assertEqual(a[-3::-1].tolist(), [[103], [0], [102], [0], [101], [0], [100], [0]]) + self.assertEqual(a[-3:0:-1].tolist(), [[103], [0], [102], [0], [101], [0], [100]]) + self.assertEqual(a[-3:1:-1].tolist(), [[103], [0], [102], [0], [101], [0]]) + self.assertEqual(a[::-2].tolist(), [[104], [103], [102], [101], [100]]) + self.assertEqual(a[-1::-2].tolist(), [[104], [103], [102], [101], [100]]) + self.assertEqual(a[-2::-2].tolist(), [[0], [0], [0], [0], [0]]) + self.assertEqual(a[-3::-2].tolist(), [[103], [102], [101], [100]]) + self.assertEqual(a[[1, 3, 5, 7, 9]].tolist(), [[100], [101], [102], [103], [104]]) + self.assertEqual(a[[1, 3, 5, 7, 8, 9]].tolist(), [[100], [101], [102], [103], [0], [104]]) + self.assertEqual(a[[1, 3, 5, 9, 7]].tolist(), [[100], [101], [102], [104], [103]]) + self.assertEqual(a[[1, 3, 5, 9, 8, 7]].tolist(), [[100], [101], [102], [104], [0], [103]]) + self.assertEqual(a[[False, True, False, True, False, True, False, True, False, True]].tolist(), [[100], [101], [102], [103], [104]]) + self.assertEqual(a[[True, True, False, True, False, True, False, True, False, True]].tolist(), [[0], [100], [101], [102], [103], [104]]) + self.assertEqual(a[[True, True, True, True, False, True, False, True, False, True]].tolist(), [[0], [100], [0], [101], [102], [103], [104]]) + + self.assertEqual([a[1:][i].tolist() for i in range(9)], [[100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual([a[[1, 3, 5, 9, 8, 7]][i].tolist() for i in range(6)], [[100], [101], [102], [104], [0], [103]]) + self.assertEqual([a[[True, True, True, True, False, True, False, True, False, True]][i].tolist() for i in range(7)], [[0], [100], [0], [101], [102], [103], [104]]) + + self.assertEqual(a.dense.tolist(), [[0], [100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual([a.dense[i].tolist() for i in range(len(a))], [[0], [100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual([a.dense[i : i + 1].tolist() for i in range(len(a))], [[[0]], [[100]], [[0]], [[101]], [[0]], [[102]], [[0]], [[103]], [[0]], [[104]]]) + self.assertEqual([a.dense[i : i + 2].tolist() for i in range(len(a) - 1)], [[[0], [100]], [[100], [0]], [[0], [101]], [[101], [0]], [[0], [102]], [[102], [0]], [[0], [103]], [[103], [0]], [[0], [104]]]) + + self.assertEqual(a[:].dense.tolist(), [[0], [100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual(a[1:].dense.tolist(), [[100], [0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual(a[2:].dense.tolist(), [[0], [101], [0], [102], [0], [103], [0], [104]]) + self.assertEqual(a[2:-1].dense.tolist(), [[0], [101], [0], [102], [0], [103], [0]]) + self.assertEqual(a[2:-2].dense.tolist(), [[0], [101], [0], [102], [0], [103]]) + self.assertEqual(a[:-2].dense.tolist(), [[0], [100], [0], [101], [0], [102], [0], [103]]) + self.assertEqual(a[::2].dense.tolist(), [[0], [0], [0], [0], [0]]) + self.assertEqual(a[1::2].dense.tolist(), [[100], [101], [102], [103], [104]]) + self.assertEqual(a[2::2].dense.tolist(), [[0], [0], [0], [0]]) + self.assertEqual(a[3::2].dense.tolist(), [[101], [102], [103], [104]]) + self.assertEqual(a[::-1].dense.tolist(), [[104], [0], [103], [0], [102], [0], [101], [0], [100], [0]]) + self.assertEqual(a[-2::-1].dense.tolist(), [[0], [103], [0], [102], [0], [101], [0], [100], [0]]) + self.assertEqual(a[-3::-1].dense.tolist(), [[103], [0], [102], [0], [101], [0], [100], [0]]) + self.assertEqual(a[-3:0:-1].dense.tolist(), [[103], [0], [102], [0], [101], [0], [100]]) + self.assertEqual(a[-3:1:-1].dense.tolist(), [[103], [0], [102], [0], [101], [0]]) + self.assertEqual(a[::-2].dense.tolist(), [[104], [103], [102], [101], [100]]) + self.assertEqual(a[-1::-2].dense.tolist(), [[104], [103], [102], [101], [100]]) + self.assertEqual(a[-2::-2].dense.tolist(), [[0], [0], [0], [0], [0]]) + self.assertEqual(a[-3::-2].dense.tolist(), [[103], [102], [101], [100]]) + + self.assertEqual([a[1:].dense[i].tolist() for i in range(9)], [[100], [0], [101], [0], [102], [0], [103], [0], [104]]) + + def test_indexed_ufunc(self): + a = SparseArray(10, [1, 3, 5, 7, 9], [100, 101, 102, 103, 104]) + self.assertEqual((a + 100).tolist(), [100, 200, 100, 201, 100, 202, 100, 203, 100, 204]) diff --git a/tests/test_masked.py b/tests/test_masked.py index ebce2458..a2bab5dd 100644 --- a/tests/test_masked.py +++ b/tests/test_masked.py @@ -38,86 +38,97 @@ class Test(unittest.TestCase): def runTest(self): pass - ################### old tests - - # def test_masked_get(self): - # a = MaskedArray([True, False, True, False, True, False, True, False, True, False], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=True) - # self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) - # self.assertTrue(numpy.ma.is_masked(a[0])) - # self.assertFalse(numpy.ma.is_masked(a[1])) - # self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) - # self.assertFalse(numpy.ma.is_masked(a[5:][0])) - # self.assertTrue(numpy.ma.is_masked(a[5:][1])) - # self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) - # self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) - - # def test_masked_get_flip(self): - # a = MaskedArray([False, True, False, True, False, True, False, True, False, True], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=False) - # self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) - # self.assertTrue(numpy.ma.is_masked(a[0])) - # self.assertFalse(numpy.ma.is_masked(a[1])) - # self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) - # self.assertFalse(numpy.ma.is_masked(a[5:][0])) - # self.assertTrue(numpy.ma.is_masked(a[5:][1])) - # self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) - # self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) - - # def test_bitmasked_get(self): - # a = BitMaskedArray.fromboolmask([True, False, True, False, True, False, True, False, True, False], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=True, lsb=True) - # self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) - # self.assertTrue(numpy.ma.is_masked(a[0])) - # self.assertFalse(numpy.ma.is_masked(a[1])) - # self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) - # self.assertFalse(numpy.ma.is_masked(a[5:][0])) - # self.assertTrue(numpy.ma.is_masked(a[5:][1])) - # self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) - # self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) - - # a = BitMaskedArray.fromboolmask([True, False, True, False, True, False, True, False, True, False], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=True, lsb=False) - # self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) - # self.assertTrue(numpy.ma.is_masked(a[0])) - # self.assertFalse(numpy.ma.is_masked(a[1])) - # self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) - # self.assertFalse(numpy.ma.is_masked(a[5:][0])) - # self.assertTrue(numpy.ma.is_masked(a[5:][1])) - # self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) - # self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) - - # def test_bitmasked_get_flip(self): - # a = BitMaskedArray.fromboolmask([False, True, False, True, False, True, False, True, False, True], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=False, lsb=True) - # self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) - # self.assertTrue(numpy.ma.is_masked(a[0])) - # self.assertFalse(numpy.ma.is_masked(a[1])) - # self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) - # self.assertFalse(numpy.ma.is_masked(a[5:][0])) - # self.assertTrue(numpy.ma.is_masked(a[5:][1])) - # self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) - # self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) - - # a = BitMaskedArray.fromboolmask([False, True, False, True, False, True, False, True, False, True], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=False, lsb=False) - # self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) - # self.assertTrue(numpy.ma.is_masked(a[0])) - # self.assertFalse(numpy.ma.is_masked(a[1])) - # self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) - # self.assertFalse(numpy.ma.is_masked(a[5:][0])) - # self.assertTrue(numpy.ma.is_masked(a[5:][1])) - # self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) - # self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) - - # def test_bitmasked_arrow(self): - # # Apache Arrow layout example - # # https://github.com/apache/arrow/blob/master/format/Layout.md#null-bitmaps - # a = BitMaskedArray.fromboolmask([True, True, False, True, False, True], [0, 1, 999, 2, 999, 3], maskedwhen=False, lsb=True) - # self.assertEqual(a.tolist(), [0, 1, None, 2, None, 3]) - - # # extra gunk at the end of the array - # a = BitMaskedArray.fromboolmask([True, True, False, True, False, True, True, True], [0, 1, 999, 2, 999, 3], maskedwhen=False, lsb=True) - # self.assertEqual(a.tolist(), [0, 1, None, 2, None, 3]) - - # # opposite sign - # a = BitMaskedArray.fromboolmask([True, True, False, True, False, True, False, False], [0, 1, 999, 2, 999, 3], maskedwhen=False, lsb=True) - # self.assertEqual(a.tolist(), [0, 1, None, 2, None, 3]) - - # # doubled - # a = BitMaskedArray.fromboolmask([True, True, False, True, False, True, True, True, False, True, False, True], [0, 1, 999, 2, 999, 3, 0, 1, 999, 2, 999, 3], maskedwhen=False, lsb=True) - # self.assertEqual(a.tolist(), [0, 1, None, 2, None, 3, 0, 1, None, 2, None, 3]) + def test_masked_get(self): + a = MaskedArray([True, False, True, False, True, False, True, False, True, False], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=True) + self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) + self.assertTrue(MaskedArray.is_masked(a[0])) + self.assertFalse(MaskedArray.is_masked(a[1])) + self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) + self.assertFalse(MaskedArray.is_masked(a[5:][0])) + self.assertTrue(MaskedArray.is_masked(a[5:][1])) + self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) + self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) + + def test_masked_get_flip(self): + a = MaskedArray([False, True, False, True, False, True, False, True, False, True], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=False) + self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) + self.assertTrue(MaskedArray.is_masked(a[0])) + self.assertFalse(MaskedArray.is_masked(a[1])) + self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) + self.assertFalse(MaskedArray.is_masked(a[5:][0])) + self.assertTrue(MaskedArray.is_masked(a[5:][1])) + self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) + self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) + + def test_masked_ufunc(self): + a = MaskedArray([True, False, True, False, True, False, True, False, True, False], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=True) + b = MaskedArray([True, True, True, True, True, False, False, False, False, False], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=True) + self.assertEqual((a + b).tolist(), [None, None, None, None, None, 11.0, None, 15.4, None, 19.8]) + self.assertEqual((a + [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]).tolist(), [None, 2.2, None, 6.6, None, 11.0, None, 15.4, None, 19.8]) + self.assertEqual((a + numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])).tolist(), [None, 2.2, None, 6.6, None, 11.0, None, 15.4, None, 19.8]) + self.assertEqual((a + IndexedMaskedArray([-1, -1, -1, 1, -1, 2, -1, 4, -1, 3], [0.0, 1.1, 2.2, 3.3, 4.4])).tolist(), [None, None, None, 4.4, None, 7.7, None, 12.100000000000001, None, 13.2]) + + def test_bitmasked_get(self): + a = BitMaskedArray.fromboolmask([True, False, True, False, True, False, True, False, True, False], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=True, lsborder=True) + self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) + self.assertTrue(MaskedArray.is_masked(a[0])) + self.assertFalse(MaskedArray.is_masked(a[1])) + self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) + self.assertFalse(MaskedArray.is_masked(a[5:][0])) + self.assertTrue(MaskedArray.is_masked(a[5:][1])) + self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) + self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) + + a = BitMaskedArray.fromboolmask([True, False, True, False, True, False, True, False, True, False], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=True, lsborder=False) + self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) + self.assertTrue(MaskedArray.is_masked(a[0])) + self.assertFalse(MaskedArray.is_masked(a[1])) + self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) + self.assertFalse(MaskedArray.is_masked(a[5:][0])) + self.assertTrue(MaskedArray.is_masked(a[5:][1])) + self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) + self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) + + def test_bitmasked_get_flip(self): + a = BitMaskedArray.fromboolmask([False, True, False, True, False, True, False, True, False, True], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=False, lsborder=True) + self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) + self.assertTrue(MaskedArray.is_masked(a[0])) + self.assertFalse(MaskedArray.is_masked(a[1])) + self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) + self.assertFalse(MaskedArray.is_masked(a[5:][0])) + self.assertTrue(MaskedArray.is_masked(a[5:][1])) + self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) + self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) + + a = BitMaskedArray.fromboolmask([False, True, False, True, False, True, False, True, False, True], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], maskedwhen=False, lsborder=False) + self.assertEqual(a.tolist(), [None, 1.1, None, 3.3, None, 5.5, None, 7.7, None, 9.9]) + self.assertTrue(MaskedArray.is_masked(a[0])) + self.assertFalse(MaskedArray.is_masked(a[1])) + self.assertEqual(a[5:].tolist(), [5.5, None, 7.7, None, 9.9]) + self.assertFalse(MaskedArray.is_masked(a[5:][0])) + self.assertTrue(MaskedArray.is_masked(a[5:][1])) + self.assertEqual(a[[3, 2, 1]].tolist(), [3.3, None, 1.1]) + self.assertEqual(a[[True, True, True, True, True, False, False, False, False, False]].tolist(), [None, 1.1, None, 3.3, None]) + + def test_bitmasked_arrow(self): + # Apache Arrow layout example + # https://github.com/apache/arrow/blob/master/format/Layout.md#null-bitmaps + a = BitMaskedArray.fromboolmask([True, True, False, True, False, True], [0, 1, 999, 2, 999, 3], maskedwhen=False, lsborder=True) + self.assertEqual(a.tolist(), [0, 1, None, 2, None, 3]) + + # extra gunk at the end of the array + a = BitMaskedArray.fromboolmask([True, True, False, True, False, True, True, True], [0, 1, 999, 2, 999, 3], maskedwhen=False, lsborder=True) + self.assertEqual(a.tolist(), [0, 1, None, 2, None, 3]) + + # opposite sign + a = BitMaskedArray.fromboolmask([True, True, False, True, False, True, False, False], [0, 1, 999, 2, 999, 3], maskedwhen=False, lsborder=True) + self.assertEqual(a.tolist(), [0, 1, None, 2, None, 3]) + + # doubled + a = BitMaskedArray.fromboolmask([True, True, False, True, False, True, True, True, False, True, False, True], [0, 1, 999, 2, 999, 3, 0, 1, 999, 2, 999, 3], maskedwhen=False, lsborder=True) + self.assertEqual(a.tolist(), [0, 1, None, 2, None, 3, 0, 1, None, 2, None, 3]) + + def test_indexedmasked_get(self): + a = IndexedMaskedArray([-1, 0, -1, 1, -1, 2, -1, 4, -1, 3], [0.0, 1.1, 2.2, 3.3, 4.4]) + self.assertEqual(a.tolist(), [None, 0.0, None, 1.1, None, 2.2, None, 4.4, None, 3.3]) + self.assertEqual([a[i] for i in range(len(a))], [None, 0.0, None, 1.1, None, 2.2, None, 4.4, None, 3.3]) diff --git a/tests/test_objects.py b/tests/test_objects.py new file mode 100644 index 00000000..fb23fa9c --- /dev/null +++ b/tests/test_objects.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python + +# Copyright (c) 2018, DIANA-HEP +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import struct +import unittest + +import numpy + +from awkward import * + +class Test(unittest.TestCase): + def runTest(self): + pass + + def test_object_floats(self): + class Point(object): + def __init__(self, array): + self.x, self.y, self.z = array + def __repr__(self): + return "".format(self.x, self.y, self.z) + def __eq__(self, other): + return isinstance(other, Point) and self.x == other.x and self.y == other.y and self.z == other.z + + a = ObjectArray([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]], Point) + self.assertEqual(a[0], Point([1.1, 2.2, 3.3])) + self.assertEqual(a[1], Point([4.4, 5.5, 6.6])) + self.assertEqual(a[2], Point([7.7, 8.8, 9.9])) + self.assertEqual(a[:].tolist(), [Point([1.1, 2.2, 3.3]), Point([4.4, 5.5, 6.6]), Point([7.7, 8.8, 9.9])]) + self.assertEqual(a[::2].tolist(), [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) + self.assertEqual(a[[True, False, True]].tolist(), [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) + self.assertEqual(a[[2, 0]].tolist(), [Point([7.7, 8.8, 9.9]), Point([1.1, 2.2, 3.3])]) + + def test_object_bytes(self): + class Point(object): + def __init__(self, bytes): + self.x, self.y, self.z = struct.unpack("ddd", bytes) + def __repr__(self): + return "".format(self.x, self.y, self.z) + def __eq__(self, other): + return isinstance(other, Point) and self.x == other.x and self.y == other.y and self.z == other.z + + a = ObjectArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]).view("u1").reshape(-1, 24), Point) + self.assertEqual(a[0], Point(numpy.array([1.1, 2.2, 3.3]).tobytes())) + self.assertEqual(a[1], Point(numpy.array([4.4, 5.5, 6.6]).tobytes())) + self.assertEqual(a[2], Point(numpy.array([7.7, 8.8, 9.9]).tobytes())) + self.assertEqual(a[:].tolist(), [Point(numpy.array([1.1, 2.2, 3.3]).tobytes()), Point(numpy.array([4.4, 5.5, 6.6]).tobytes()), Point(numpy.array([7.7, 8.8, 9.9]).tobytes())]) + self.assertEqual(a[::2].tolist(), [Point(numpy.array([1.1, 2.2, 3.3]).tobytes()), Point(numpy.array([7.7, 8.8, 9.9]).tobytes())]) + self.assertEqual(a[[True, False, True]].tolist(), [Point(numpy.array([1.1, 2.2, 3.3]).tobytes()), Point(numpy.array([7.7, 8.8, 9.9]).tobytes())]) + self.assertEqual(a[[2, 0]].tolist(), [Point(numpy.array([7.7, 8.8, 9.9]).tobytes()), Point(numpy.array([1.1, 2.2, 3.3]).tobytes())]) + + def test_object_indexedbytes(self): + class Point(object): + def __init__(self, array): + self.x, self.y, self.z = array + def __repr__(self): + return "".format(self.x, self.y, self.z) + def __eq__(self, other): + return isinstance(other, Point) and self.x == other.x and self.y == other.y and self.z == other.z + + a = ObjectArray(ByteIndexedArray([0, 24, 48], numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]).view("u1"), numpy.dtype((float, 3))), Point) + self.assertEqual(a[0], Point([1.1, 2.2, 3.3])) + self.assertEqual(a[1], Point([4.4, 5.5, 6.6])) + self.assertEqual(a[2], Point([7.7, 8.8, 9.9])) + self.assertEqual(a[:].tolist(), [Point([1.1, 2.2, 3.3]), Point([4.4, 5.5, 6.6]), Point([7.7, 8.8, 9.9])]) + self.assertEqual(a[::2].tolist(), [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) + self.assertEqual(a[[True, False, True]].tolist(), [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) + self.assertEqual(a[[2, 0]].tolist(), [Point([7.7, 8.8, 9.9]), Point([1.1, 2.2, 3.3])]) + + def test_object_jaggedbytes(self): + class Point(object): + def __init__(self, array): + self.x, self.y, self.z = array + def __repr__(self): + return "".format(self.x, self.y, self.z) + def __eq__(self, other): + return isinstance(other, Point) and self.x == other.x and self.y == other.y and self.z == other.z + + a = ObjectArray(ByteJaggedArray.fromoffsets([0, 24, 48, 72], numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]).view("u1"), float), Point) + self.assertEqual(a[0], Point([1.1, 2.2, 3.3])) + self.assertEqual(a[1], Point([4.4, 5.5, 6.6])) + self.assertEqual(a[2], Point([7.7, 8.8, 9.9])) + self.assertEqual(a[:].tolist(), [Point([1.1, 2.2, 3.3]), Point([4.4, 5.5, 6.6]), Point([7.7, 8.8, 9.9])]) + self.assertEqual(a[::2].tolist(), [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) + self.assertEqual(a[[True, False, True]].tolist(), [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) + self.assertEqual(a[[2, 0]].tolist(), [Point([7.7, 8.8, 9.9]), Point([1.1, 2.2, 3.3])]) diff --git a/tests/test_table.py b/tests/test_table.py index fe643357..920ba9df 100644 --- a/tests/test_table.py +++ b/tests/test_table.py @@ -143,25 +143,25 @@ def test_table_mask_mask(self): self.assertEqual(a[[True, True, False, False, False, False, False, True, True, True]][[False, True, True, True, False]].tolist(), [{"0": 1, "1": 1.1}, {"0": 7, "1": 7.7}, {"0": 8, "1": 8.8}]) self.assertEqual(a[[True, True, False, False, False, False, False, True, True, True]][[False, True, True, True, False]][1].tolist(), {"0": 7, "1": 7.7}) - # def test_indexed_table(self): - # a = Table([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) - # self.assertEqual(IndexedArray([5, 3, 7, 5], a)["1"].tolist(), [5.5, 3.3, 7.7, 5.5]) + def test_indexed_table(self): + a = Table([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + self.assertEqual(IndexedArray([5, 3, 7, 5], a)["1"].tolist(), [5.5, 3.3, 7.7, 5.5]) - # def test_masked_table(self): - # a = Table([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) - # self.assertEqual(MaskedArray([False, True, True, True, True, False, False, False, False, True], a, maskedwhen=False)["1"].tolist(), [None, 1.1, 2.2, 3.3, 4.4, None, None, None, None, 9.9]) + def test_masked_table(self): + a = Table([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + self.assertEqual(MaskedArray([False, True, True, True, True, False, False, False, False, True], a, maskedwhen=False)["1"].tolist(), [None, 1.1, 2.2, 3.3, 4.4, None, None, None, None, 9.9]) def test_jagged_table(self): a = Table([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) self.assertEqual(JaggedArray.fromoffsets([0, 3, 5, 5, 10], a).tolist(), [[{"0": 0, "1": 0.0}, {"0": 1, "1": 1.1}, {"0": 2, "1": 2.2}], [{"0": 3, "1": 3.3}, {"0": 4, "1": 4.4}], [], [{"0": 5, "1": 5.5}, {"0": 6, "1": 6.6}, {"0": 7, "1": 7.7}, {"0": 8, "1": 8.8}, {"0": 9, "1": 9.9}]]) self.assertEqual(JaggedArray.fromoffsets([0, 3, 5, 5, 10], a)["1"].tolist(), [[0.0, 1.1, 2.2], [3.3, 4.4], [], [5.5, 6.6, 7.7, 8.8, 9.9]]) - # def test_chunked_table(self): - # a = Table([0, 1, 2, 3], [0.0, 1.1, 2.2, 3.3]) - # b = Table([4, 5, 6, 7, 8, 9], [4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) - # c = ChunkedArray([a, b]) - # self.assertEqual(c["1"][6], 6.6) + def test_chunked_table(self): + a = Table([0, 1, 2, 3], [0.0, 1.1, 2.2, 3.3]) + b = Table([4, 5, 6, 7, 8, 9], [4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + c = ChunkedArray([a, b]) + self.assertEqual(c["1"][6], 6.6) - # def test_virtual_table(self): - # a = VirtualArray(lambda: Table([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) - # self.assertEqual(a.tolist(), [{"0": 0, "1": 0.0}, {"0": 1, "1": 1.1}, {"0": 2, "1": 2.2}, {"0": 3, "1": 3.3}, {"0": 4, "1": 4.4}, {"0": 5, "1": 5.5}, {"0": 6, "1": 6.6}, {"0": 7, "1": 7.7}, {"0": 8, "1": 8.8}, {"0": 9, "1": 9.9}]) + def test_virtual_table(self): + a = VirtualArray(lambda: Table([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) + self.assertEqual(a.tolist(), [{"0": 0, "1": 0.0}, {"0": 1, "1": 1.1}, {"0": 2, "1": 2.2}, {"0": 3, "1": 3.3}, {"0": 4, "1": 4.4}, {"0": 5, "1": 5.5}, {"0": 6, "1": 6.6}, {"0": 7, "1": 7.7}, {"0": 8, "1": 8.8}, {"0": 9, "1": 9.9}]) diff --git a/tests/test_union.py b/tests/test_union.py index d778c3f0..bb49c0a3 100644 --- a/tests/test_union.py +++ b/tests/test_union.py @@ -38,6 +38,26 @@ class Test(unittest.TestCase): def runTest(self): pass - # def test_union_get(self): - # a = UnionArray([0, 1, 0, 1, 0, 1, 0, 1, 0, 1], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [[0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], [0, 100, 200, 300, 400, 500, 600, 700, 800, 900]]) - # self.assertEqual(a.tolist(), [0.0, 100, 2.2, 300, 4.4, 500, 6.6, 700, 8.8, 900]) + def test_union_get(self): + a = UnionArray([0, 1, 0, 1, 0, 1, 0, 1, 0, 1], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [[0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], [0, 100, 200, 300, 400, 500, 600, 700, 800, 900]]) + self.assertEqual(a.tolist(), [0.0, 100, 2.2, 300, 4.4, 500, 6.6, 700, 8.8, 900]) + self.assertEqual([a[i] for i in range(len(a))], [0.0, 100, 2.2, 300, 4.4, 500, 6.6, 700, 8.8, 900]) + self.assertEqual(a[:].tolist(), [0.0, 100, 2.2, 300, 4.4, 500, 6.6, 700, 8.8, 900]) + self.assertEqual(a[1:].tolist(), [100, 2.2, 300, 4.4, 500, 6.6, 700, 8.8, 900]) + self.assertEqual(a[2:].tolist(), [2.2, 300, 4.4, 500, 6.6, 700, 8.8, 900]) + self.assertEqual(a[:-1].tolist(), [0.0, 100, 2.2, 300, 4.4, 500, 6.6, 700, 8.8]) + self.assertEqual(a[1:-2].tolist(), [100, 2.2, 300, 4.4, 500, 6.6, 700]) + self.assertEqual(a[2:-2].tolist(), [2.2, 300, 4.4, 500, 6.6, 700]) + self.assertEqual([a[2:-2][i] for i in range(6)], [2.2, 300, 4.4, 500, 6.6, 700]) + + self.assertEqual(a[[1, 2, 3, 8, 8, 1]].tolist(), [100, 2.2, 300, 8.8, 8.8, 100]) + self.assertEqual([a[[1, 2, 3, 8, 8, 1]][i] for i in range(6)], [100, 2.2, 300, 8.8, 8.8, 100]) + + self.assertEqual(a[[False, True, True, True, False, True, False, False, False, False]].tolist(), [100, 2.2, 300, 500]) + self.assertEqual([a[[False, True, True, True, False, True, False, False, False, False]][i] for i in range(4)], [100, 2.2, 300, 500]) + + def test_union_ufunc(self): + a = UnionArray.fromtags([0, 1, 1, 0, 0], [[100, 200, 300], [1.1, 2.2]]) + b = UnionArray.fromtags([1, 1, 0, 1, 0], [[10.1, 20.2], [123, 456, 789]]) + self.assertEqual((a + a).tolist(), [200, 2.2, 4.4, 400, 600]) + self.assertEqual((a + b).tolist(), [223, 457.1, 12.3, 989, 320.2]) diff --git a/tests/test_virtual.py b/tests/test_virtual.py index 9948f8b0..a935695d 100644 --- a/tests/test_virtual.py +++ b/tests/test_virtual.py @@ -34,135 +34,62 @@ import numpy from awkward import * +import awkward.type class Test(unittest.TestCase): def runTest(self): pass - ################### old tests - - # def test_virtual_nocache(self): - # a = VirtualArray(lambda: [1, 2, 3]) - # self.assertFalse(a.ismaterialized) - # self.assertTrue(numpy.array_equal(a[:], numpy.array([1, 2, 3]))) - # self.assertTrue(a.ismaterialized) - - # a = VirtualArray(lambda: range(10)) - # self.assertFalse(a.ismaterialized) - # self.assertTrue(numpy.array_equal(a[::2], numpy.array([0, 2, 4, 6, 8]))) - # self.assertTrue(a.ismaterialized) - - # a = VirtualArray(lambda: range(10)) - # self.assertFalse(a.ismaterialized) - # self.assertTrue(numpy.array_equal(a[[5, 3, 6, 0, 6]], numpy.array([5, 3, 6, 0, 6]))) - # self.assertTrue(a.ismaterialized) - - # a = VirtualArray(lambda: range(10)) - # self.assertFalse(a.ismaterialized) - # self.assertTrue(numpy.array_equal(a[[True, False, True, False, True, False, True, False, True, False]], numpy.array([0, 2, 4, 6, 8]))) - # self.assertTrue(a.ismaterialized) - - # def test_virtual_transientcache(self): - # cache = {} - # a = VirtualArray(lambda: [1, 2, 3], cache=cache) - # self.assertFalse(a.ismaterialized) - # a[:] - # self.assertTrue(a.ismaterialized) - # self.assertEqual(list(cache), [a.TransientKey(id(a))]) - # self.assertEqual(list(cache), [a.key]) - # self.assertTrue(numpy.array_equal(cache[a.key], numpy.array([1, 2, 3]))) - # del a - - # def test_virtual_persistentcache(self): - # cache = {} - # a = VirtualArray(lambda: [1, 2, 3], cache=cache, persistentkey="find-me-again") - # self.assertFalse(a.ismaterialized) - # a[:] - # self.assertTrue(a.ismaterialized) - # self.assertEqual(list(cache), ["find-me-again"]) - # self.assertEqual(list(cache), [a.key]) - # self.assertTrue(numpy.array_equal(cache[a.key], numpy.array([1, 2, 3]))) - # del a - - # def test_virtual_dontmaterialize(self): - # a = VirtualArray(lambda: [1, 2, 3], dtype=int, shape=(3,)) - # self.assertFalse(a.ismaterialized) - # self.assertEqual(a.dtype, numpy.dtype(int)) - # self.assertEqual(a.shape, (3,)) - # self.assertEqual(len(a), 3) - # self.assertEqual(a._array, None) - # self.assertFalse(a.ismaterialized) - # self.assertTrue(numpy.array_equal(a[:], numpy.array([1, 2, 3]))) - # self.assertTrue(a.ismaterialized) - - # def test_virtualobject_floats(self): - # class Point(object): - # def __init__(self, array): - # self.x, self.y, self.z = array - # def __repr__(self): - # return "".format(self.x, self.y, self.z) - # def __eq__(self, other): - # return isinstance(other, Point) and self.x == other.x and self.y == other.y and self.z == other.z - - # a = VirtualObjectArray(Point, [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]]) - # self.assertEqual(a[0], Point([1.1, 2.2, 3.3])) - # self.assertEqual(a[1], Point([4.4, 5.5, 6.6])) - # self.assertEqual(a[2], Point([7.7, 8.8, 9.9])) - # self.assertEqual(a[:], [Point([1.1, 2.2, 3.3]), Point([4.4, 5.5, 6.6]), Point([7.7, 8.8, 9.9])]) - # self.assertEqual(a[::2], [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) - # self.assertEqual(a[[True, False, True]], [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) - # self.assertEqual(a[[2, 0]], [Point([7.7, 8.8, 9.9]), Point([1.1, 2.2, 3.3])]) - - # def test_virtualobject_bytes(self): - # class Point(object): - # def __init__(self, bytes): - # self.x, self.y, self.z = struct.unpack("ddd", bytes) - # def __repr__(self): - # return "".format(self.x, self.y, self.z) - # def __eq__(self, other): - # return isinstance(other, Point) and self.x == other.x and self.y == other.y and self.z == other.z - - # a = VirtualObjectArray(Point, numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]).view("u1").reshape(-1, 24)) - # self.assertEqual(a[0], Point(numpy.array([1.1, 2.2, 3.3]).tobytes())) - # self.assertEqual(a[1], Point(numpy.array([4.4, 5.5, 6.6]).tobytes())) - # self.assertEqual(a[2], Point(numpy.array([7.7, 8.8, 9.9]).tobytes())) - # self.assertEqual(a[:], [Point(numpy.array([1.1, 2.2, 3.3]).tobytes()), Point(numpy.array([4.4, 5.5, 6.6]).tobytes()), Point(numpy.array([7.7, 8.8, 9.9]).tobytes())]) - # self.assertEqual(a[::2], [Point(numpy.array([1.1, 2.2, 3.3]).tobytes()), Point(numpy.array([7.7, 8.8, 9.9]).tobytes())]) - # self.assertEqual(a[[True, False, True]], [Point(numpy.array([1.1, 2.2, 3.3]).tobytes()), Point(numpy.array([7.7, 8.8, 9.9]).tobytes())]) - # self.assertEqual(a[[2, 0]], [Point(numpy.array([7.7, 8.8, 9.9]).tobytes()), Point(numpy.array([1.1, 2.2, 3.3]).tobytes())]) - - # def test_virtualobject_indexedbytes(self): - # class Point(object): - # def __init__(self, array): - # self.x, self.y, self.z = array - # def __repr__(self): - # return "".format(self.x, self.y, self.z) - # def __eq__(self, other): - # return isinstance(other, Point) and self.x == other.x and self.y == other.y and self.z == other.z - - # a = VirtualObjectArray(Point, ByteIndexedArray([0, 24, 48], numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]).view("u1"), numpy.dtype((float, 3)))) - # self.assertEqual(a[0], Point([1.1, 2.2, 3.3])) - # self.assertEqual(a[1], Point([4.4, 5.5, 6.6])) - # self.assertEqual(a[2], Point([7.7, 8.8, 9.9])) - # self.assertEqual(a[:], [Point([1.1, 2.2, 3.3]), Point([4.4, 5.5, 6.6]), Point([7.7, 8.8, 9.9])]) - # self.assertEqual(a[::2], [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) - # self.assertEqual(a[[True, False, True]], [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) - # self.assertEqual(a[[2, 0]], [Point([7.7, 8.8, 9.9]), Point([1.1, 2.2, 3.3])]) - - # def test_virtualobject_jaggedbytes(self): - # class Point(object): - # def __init__(self, array): - # self.x, self.y, self.z = array - # def __repr__(self): - # return "".format(self.x, self.y, self.z) - # def __eq__(self, other): - # return isinstance(other, Point) and self.x == other.x and self.y == other.y and self.z == other.z - - # a = VirtualObjectArray(Point, ByteJaggedArray.fromoffsets([0, 24, 48, 72], numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]).view("u1"), float)) - # self.assertEqual(a[0], Point([1.1, 2.2, 3.3])) - # self.assertEqual(a[1], Point([4.4, 5.5, 6.6])) - # self.assertEqual(a[2], Point([7.7, 8.8, 9.9])) - # self.assertEqual(a[:], [Point([1.1, 2.2, 3.3]), Point([4.4, 5.5, 6.6]), Point([7.7, 8.8, 9.9])]) - # self.assertEqual(a[::2], [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) - # self.assertEqual(a[[True, False, True]], [Point([1.1, 2.2, 3.3]), Point([7.7, 8.8, 9.9])]) - # self.assertEqual(a[[2, 0]], [Point([7.7, 8.8, 9.9]), Point([1.1, 2.2, 3.3])]) + def test_virtual_nocache(self): + a = VirtualArray(lambda: [1, 2, 3]) + self.assertFalse(a.ismaterialized) + self.assertTrue(numpy.array_equal(a[:], numpy.array([1, 2, 3]))) + self.assertTrue(a.ismaterialized) + + a = VirtualArray(lambda: range(10)) + self.assertFalse(a.ismaterialized) + self.assertTrue(numpy.array_equal(a[::2], numpy.array([0, 2, 4, 6, 8]))) + self.assertTrue(a.ismaterialized) + + a = VirtualArray(lambda: range(10)) + self.assertFalse(a.ismaterialized) + self.assertTrue(numpy.array_equal(a[[5, 3, 6, 0, 6]], numpy.array([5, 3, 6, 0, 6]))) + self.assertTrue(a.ismaterialized) + + a = VirtualArray(lambda: range(10)) + self.assertFalse(a.ismaterialized) + self.assertTrue(numpy.array_equal(a[[True, False, True, False, True, False, True, False, True, False]], numpy.array([0, 2, 4, 6, 8]))) + self.assertTrue(a.ismaterialized) + + def test_virtual_transientcache(self): + cache = {} + a = VirtualArray(lambda: [1, 2, 3], cache=cache) + self.assertFalse(a.ismaterialized) + a[:] + self.assertTrue(a.ismaterialized) + self.assertEqual(list(cache), [a.TransientKey(id(a))]) + self.assertEqual(list(cache), [a.key]) + self.assertTrue(numpy.array_equal(cache[a.key], numpy.array([1, 2, 3]))) + del a + + def test_virtual_persistentcache(self): + cache = {} + a = VirtualArray(lambda: [1, 2, 3], cache=cache, persistentkey="find-me-again") + self.assertFalse(a.ismaterialized) + a[:] + self.assertTrue(a.ismaterialized) + self.assertEqual(list(cache), ["find-me-again"]) + self.assertEqual(list(cache), [a.key]) + self.assertTrue(numpy.array_equal(cache[a.key], numpy.array([1, 2, 3]))) + del a + + def test_virtual_dontmaterialize(self): + a = VirtualArray(lambda: [1, 2, 3], type=awkward.type.fromnumpy(3, int)) + self.assertFalse(a.ismaterialized) + self.assertEqual(a.dtype, numpy.dtype(int)) + self.assertEqual(a.shape, (3,)) + self.assertEqual(len(a), 3) + self.assertEqual(a._array, None) + self.assertFalse(a.ismaterialized) + self.assertTrue(numpy.array_equal(a[:], numpy.array([1, 2, 3]))) + self.assertTrue(a.ismaterialized)