Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

Commit

Permalink
Merge pull request #14 from scikit-hep/feature-persistence
Browse files Browse the repository at this point in the history
persistence
  • Loading branch information
jpivarski authored Oct 24, 2018
2 parents dd30cc6 + 141ff9d commit d23a327
Show file tree
Hide file tree
Showing 25 changed files with 1,958 additions and 867 deletions.
4 changes: 3 additions & 1 deletion awkward/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@

from awkward.generate import fromiter

from awkward.persist import serialize, deserialize, save, load, tohdf5, fromhdf5

# convenient access to the version number
from awkward.version import __version__

__all__ = ["ChunkedArray", "AppendableArray", "IndexedArray", "ByteIndexedArray", "SparseArray", "JaggedArray", "ByteJaggedArray", "MaskedArray", "BitMaskedArray", "IndexedMaskedArray", "Methods", "ObjectArray", "Table", "UnionArray", "VirtualArray", "fromiter", "__version__"]
__all__ = ["ChunkedArray", "AppendableArray", "IndexedArray", "ByteIndexedArray", "SparseArray", "JaggedArray", "ByteJaggedArray", "MaskedArray", "BitMaskedArray", "IndexedMaskedArray", "Methods", "ObjectArray", "Table", "UnionArray", "VirtualArray", "fromiter", "serialize", "deserialize", "save", "load", "tohdf5", "fromhdf5", "__version__"]
27 changes: 24 additions & 3 deletions awkward/array/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@

import types

import awkward.persist
import awkward.type
import awkward.util

class AwkwardArray(awkward.util.NDArrayOperatorsMixin):
Expand All @@ -38,6 +40,16 @@ def __array__(self, *args, **kwargs):
# raise Exception("{0} {1}".format(args, kwargs))
return awkward.util.numpy.array(self, *args, **kwargs)

def __getstate__(self):
state = {}
awkward.persist.serialize(self, state)
return state

def __setstate__(self, state):
out = awkward.persist.deserialize(state)
self.__dict__.update(out.__dict__)
self.__class__ = out.__class__

def __iter__(self):
for i in range(len(self)):
yield self[i]
Expand All @@ -51,6 +63,18 @@ def __str__(self):
def __repr__(self):
return "<{0} {1} at {2:012x}>".format(self.__class__.__name__, str(self), id(self))

@property
def type(self):
return awkward.type.ArrayType(*(self._getshape() + (awkward.type._resolve(self._gettype({}), {}),)))

@property
def dtype(self):
return self.type.dtype

@property
def shape(self):
return self.type.shape

def _try_tolist(self, x):
try:
return x.tolist()
Expand Down Expand Up @@ -84,9 +108,6 @@ def tolist(self):
out.append(self._try_tolist(x))
return out

def _valid(self):
pass

def valid(self):
try:
self._valid()
Expand Down
83 changes: 49 additions & 34 deletions awkward/array/chunked.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import awkward.array.base
import awkward.persist
import awkward.type
import awkward.util

Expand Down Expand Up @@ -76,6 +77,15 @@ def ones_like(self, **overrides):
mine = self._mine(overrides)
return self.copy([awkward.util.numpy.ones_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.ones_like(**overrides) for x in self._chunks], counts=list(self._counts), **mine)

def __awkward_persist__(self, ident, fill, **kwargs):
self.knowcounts()
self._valid()
n = self.__class__.__name__
return {"id": ident,
"call": ["awkward", n],
"args": [{"list": [fill(x, n + ".chunk", **kwargs) for c, x in zip(self._counts, self._chunks) if c > 0]},
fill(awkward.util.numpy.array([c for c in self._counts if c > 0]), n + ".counts", **kwargs)]}

@property
def chunks(self):
return self._chunks
Expand Down Expand Up @@ -129,11 +139,11 @@ def knowcounts(self, until=None):
def knowtype(self, at):
if not 0 <= at < len(self._chunks):
raise ValueError("cannot knowtype at chunkid {0} with {1} chunks".format(at, len(self._chunks)))
tpe = awkward.type.fromarray(self._chunks[at])
if tpe.takes == 0:
chunk = self._chunks[at]
if len(chunk) == 0:
self._types[at] = ()
else:
self._types[at] = tpe.to
self._types[at] = awkward.type.fromarray(chunk).to
return self._types[at]

def global2chunkid(self, index, return_normalized=False):
Expand Down Expand Up @@ -219,7 +229,7 @@ def local2global(self, index, chunkid):
else:
raise TypeError("local2global requires index and chunkid to be integers or arrays of integers")

def _type(self):
def _gettype(self, seen):
for tpe in self._types:
if tpe is not None and tpe is not ():
break
Expand All @@ -230,24 +240,24 @@ def _type(self):
break
else:
tpe = awkward.util.DEFAULTTYPE
return awkward.type.ArrayType(len(self), tpe)

@property
def type(self):
return self._valid()
for i in range(len(self._types)):
if self._types[i] is None or self._types[i] is () or self._types[i] is tpe:
pass
elif self._types[i] == tpe: # valid if all chunks have the same high-level type
self._types[i] = tpe # once checked, make them identically equal for faster checking next time
else:
raise TypeError("chunks do not have matching types:\n\n{0}\n\nversus\n\n{1}".format(awkward.type._str(tpe, indent=" "), awkward.type._str(self._types[i], indent=" ")))

return tpe

def _getshape(self):
return (len(self),)

def __len__(self):
self.knowcounts()
return self.offsets[-1]

@property
def shape(self):
return self.type.shape

@property
def dtype(self):
return self.type.dtype

def _slices(self):
# perhaps this should be a (public) @staticmethod that finds the largest possible slices to serve no more than one chunk each from a set of ChunkedArrays
self.knowcounts()
Expand All @@ -260,17 +270,7 @@ def _valid(self):
for i, count in enumerate(self._counts):
if count != len(self._chunks[i]):
raise ValueError("count[{0}] does not agree with len(chunk[{0}])".format(i))

tpe = self._type()
for i in range(len(self._types)):
if self._types[i] is None or self._types[i] is () or self._types[i] is tpe.to:
pass
elif self._types[i] == tpe.to: # valid if all chunks have the same high-level type
self._types[i] = tpe.to # once checked, make them identically equal for faster checking next time
else:
raise TypeError("chunks do not have matching types:\n\n{0}\n\nversus\n\n{1}".format(tpe.to.__str__(indent=" "), self._types[i].__str__(indent=" ")))

return tpe
self._gettype({})

def __str__(self):
if self.countsknown:
Expand Down Expand Up @@ -633,6 +633,23 @@ def _mine(self, overrides):
mine["dtype"] = overrides.pop("dtype", self._dtype)
return mine

def __awkward_persist__(self, ident, fill, **kwargs):
self._valid()
n = self.__class__.__name__

chunks = []
for c, x in zip(self._counts, self._chunks):
if 0 < c < len(x):
chunks.append(x[:c])
elif 0 < c:
chunks.append(x)

return {"id": ident,
"call": ["awkward", n],
"args": [{"tuple": list(self._chunkshape)},
{"call": ["awkward.persist", "json2dtype"], "args": [awkward.persist.dtype2json(self._dtype)]},
{"list": [fill(x, n + ".chunk", **kwargs) for x in chunks]}]}

@property
def chunkshape(self):
return self._chunkshape
Expand All @@ -644,7 +661,7 @@ def chunkshape(self, value):
else:
try:
for x in value:
assert isinstance(x, awkward.util.integer) and value > 0
assert isinstance(x, awkward.util.integer) and x > 0
except TypeError:
raise TypeError("chunkshape must be an integer or a tuple of integers")
except AssertionError:
Expand Down Expand Up @@ -696,13 +713,11 @@ def offsets(self):
import awkward.array.jagged
return awkward.array.jagged.counts2offsets(self._counts)

@property
def type(self):
return awkward.type.ArrayType(*(self.shape + (self._dtype,)))
def _gettype(self, seen):
return self._dtype

@property
def shape(self):
return (len(self),) + self._chunkshape[1:]
def _getshape(self):
return sum(self._counts)

def _valid(self):
pass
Expand Down
Loading

0 comments on commit d23a327

Please sign in to comment.