diff --git a/.travis.yml b/.travis.yml index e38d78c..e8b0dd9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,11 @@ python: # - "3.5" - incompatible due to use of type hints - "3.6" - "3.7" - - "2.7" + - "3.8" + - "3.9" + - "3.10" + - "3.11" + - "pypy3" # PyPy versions # - "pypy" # - "pypy3.5" - incompatible @@ -14,6 +18,6 @@ install: - "pip install '.[test]'" script: - "python run_tests.py" - - "coverage run --branch --include=./py2/pyxtension/Json.py,./py2/pyxtension/streams.py --omit=./py2/pyxtension/tests/* run_tests.py" + - "coverage run --branch run_tests.py" after_success: - coveralls \ No newline at end of file diff --git a/README.md b/README.md index d11dcd0..4710d9c 100644 --- a/README.md +++ b/README.md @@ -1,69 +1,44 @@ -# pyxtension -[![build Status](https://travis-ci.org/asuiu/pyxtension.svg?branch=master)](https://travis-ci.org/asuiu/pyxtension) -[![Coverage Status](https://coveralls.io/repos/asuiu/pyxtension/badge.svg?branch=master&service=github)](https://coveralls.io/github/asuiu/pyxtension?branch=master) +# streamerate +[![build Status](https://travis-ci.org/asuiu/streamerate.svg?branch=master)](https://travis-ci.org/asuiu/streamerate) +[![Coverage Status](https://coveralls.io/repos/asuiu/streamerate/badge.svg?branch=master&service=github)](https://coveralls.io/github/asuiu/streamerate?branch=master) -[pyxtension](https://github.com/asuiu/pyxtension) is a pure Python MIT-licensed library that includes Scala-like streams (using [Fluent Interface pattern](https://en.wikipedia.org/wiki/Fluent_interface)), Json with attribute access syntax, and other common-use stuff. +__[streamerate](https://github.com/asuiu/streamerate)__ is a powerful pure-Python library inspired by **[Fluent Interface pattern](https://en.wikipedia.org/wiki/Fluent_interface)** (used by Java 8 streams), providing a chainable and expressive approach to processing iterable data. -###### Note: -**Drop support & maintenance for Python 2.x version, due to [Py2 death](https://www.python.org/doc/sunset-python-2/).** +By leveraging the **[Fluent Interface pattern](https://en.wikipedia.org/wiki/Fluent_interface)**, [streamerate](https://github.com/asuiu/streamerate) enables you to chain together multiple operations, such as filtering, mapping, and reducing, to create complex data processing pipelines with ease. With streamerate, you can write elegant and readable code that efficiently operates on streams of data, facilitating the development of clean and expressive Python applications. + + +__[streamerate](https://github.com/asuiu/streamerate)__ empowers you to write elegant and functional code, unlocking the full potential of your iterable data processing pipelines + +The library is distributed under the permissive [MIT license](https://opensource.org/license/mit/), allowing you to freely use, modify, and distribute it in both open-source and commercial projects. + +*Note:* __[streamerate](https://github.com/asuiu/streamerate)__ originated as part of the [pyxtension](https://github.com/asuiu/pyxtension) project but has since been migrated as a standalone library. - Although Py2 version will remain in the repository, I won't update PyPi package, so the last Py2 version of the `pyxtension` available at [PyPi](https://pypi.org/project/pyxtension/) will remain [`1.12.7`](https://pypi.org/project/pyxtension/1.12.7/) -Starting with [`1.13.0`](https://pypi.org/project/pyxtension/1.13.0/) I've migrated the packaging & distributing method to [Wheel](https://pythonwheels.com/). - ## Installation ``` -pip install pyxtension +pip install streamerate ``` or from Github: ``` -git clone https://github.com/asuiu/pyxtension.git -cd pyxtension +git clone https://github.com/asuiu/streamerate.git +cd streamerate python setup.py install ``` or ``` -git submodule add https://github.com/asuiu/pyxtension.git +git submodule add https://github.com/asuiu/streamerate.git ``` ## Modules overview -### Json.py -##### Json -A `dict` subclass to represent a Json object. You should be able to use this -absolutely anywhere you can use a `dict`. While this is probably the class you -want to use, there are a few caveats that follow from this being a `dict` under -the hood. - -**Never again will you have to write code like this**: -```python -body = { - 'query': { - 'filtered': { - 'query': { - 'match': {'description': 'addictive'} - }, - 'filter': { - 'term': {'created_by': 'ASU'} - } - } - } -} -``` -From now on, you may simply write the following three lines: -```python -body = Json() -body.query.filtered.query.match.description = 'addictive' -body.query.filtered.filter.term.created_by = 'ASU' -``` ### streams.py #### stream `stream` subclasses `collections.Iterable`. It's the same Python iterable, but with more added methods, suitable for multithreading and multiprocess processings. Used to create stream processing pipelines, similar to those used in [Scala](http://www.scala-lang.org/) and [MapReduce](https://en.wikipedia.org/wiki/MapReduce) programming model. Those who used [Apache Spark](http://spark.apache.org/) [RDD](http://spark.apache.org/docs/latest/programming-guide.html#rdd-operations) functions will find this model of processing very easy to use. -### [streams](https://github.com/asuiu/pyxtension/blob/master/streams.py) +### [streams](https://github.com/asuiu/streamerate/blob/master/streams.py) **Never again will you have to write code like this**: ```python > lst = xrange(1,6) @@ -352,144 +327,11 @@ Inherits `streams.stream` and built-in `dict`, and keeps in memory the dict obje Inherits `streams.sdict` and adds functionality of `collections.defaultdict` from stdlib -### [Json](https://github.com/asuiu/pyxtension/blob/master/Json.py) - -[Json](https://github.com/asuiu/pyxtension/blob/master/Json.py) is a module that provides mapping objects that allow their elements to be accessed both as keys and as attributes: -```python - > from pyxtension.Json import Json - > a = Json({'foo': 'bar'}) - > a.foo - 'bar' - > a['foo'] - 'bar' -``` - -Attribute access makes it easy to create convenient, hierarchical settings objects: -```python - with open('settings.yaml') as fileobj: - settings = Json(yaml.safe_load(fileobj)) - - cursor = connect(**settings.db.credentials).cursor() - - cursor.execute("SELECT column FROM table;") -``` - -### Basic Usage - -Json comes with two different classes, `Json`, and `JsonList`. -Json is fairly similar to native `dict` as it extends it an is a mutable mapping that allow creating, accessing, and deleting key-value pairs as attributes. -`JsonList` is similar to native `list` as it extends it and offers a way to transform the `dict` objects from inside also in `Json` instances. - -#### Construction -###### Directly from a JSON string -```python -> Json('{"key1": "val1", "lst1": [1,2] }') -{u'key1': u'val1', u'lst1': [1, 2]} -``` -###### From `tuple`s: -```python -> Json( ('key1','val1'), ('lst1', [1,2]) ) -{'key1': 'val1', 'lst1': [1, 2]} -# keep in mind that you should provide at least two tuples with key-value pairs -``` -###### As a built-in `dict` -```python -> Json( [('key1','val1'), ('lst1', [1,2])] ) -{'key1': 'val1', 'lst1': [1, 2]} - -Json({'key1': 'val1', 'lst1': [1, 2]}) -{'key1': 'val1', 'lst1': [1, 2]} -``` -#### Convert to a `dict` -```python -> json = Json({'key1': 'val1', 'lst1': [1, 2]}) -> json.toOrig() -{'key1': 'val1', 'lst1': [1, 2]} -``` - -#### Valid Names - -Any key can be used as an attribute as long as: - -1. The key represents a valid attribute (i.e., it is a string comprised only of - alphanumeric characters and underscores that doesn't start with a number) -2. The key does not shadow a class attribute (e.g., get). - -#### Attributes vs. Keys -There is a minor difference between accessing a value as an attribute vs. -accessing it as a key, is that when a dict is accessed as an attribute, it will -automatically be converted to a `Json` object. This allows you to recursively -access keys:: -```python - > attr = Json({'foo': {'bar': 'baz'}}) - > attr.foo.bar - 'baz' -``` -Relatedly, by default, sequence types that aren't `bytes`, `str`, or `unicode` -(e.g., `list`s, `tuple`s) will automatically be converted to `tuple`s, with any -mappings converted to `Json`: -```python - > attr = Json({'foo': [{'bar': 'baz'}, {'bar': 'qux'}]}) - > for sub_attr in attr.foo: - > print(sub_attr.bar) - 'baz' - 'qux' -``` -To get this recursive functionality for keys that cannot be used as attributes, -you can replicate the behavior by using dict syntax on `Json` object:: -```python -> json = Json({1: {'two': 3}}) -> json[1].two -3 -``` -`JsonList` usage examples: -``` -> json = Json('{"lst":[1,2,3]}') -> type(json.lst) - - -> json = Json('{"1":[1,2]}') -> json["1"][1] -2 -``` - - -Assignment as keys will still work:: -```python -> json = Json({'foo': {'bar': 'baz'}}) -> json['foo']['bar'] = 'baz' -> json.foo -{'bar': 'baz'} -``` - -### frozendict -`frozendict` is a simple immutable dictionary, where you can't change the internal variables of the class, and they are all immutable objects. Reinvoking `__init__` also doesn't alter the object. - -The API is the same as `dict`, without methods that can change the immutability. - -`frozendict` is also hashable and can be used as keys for other dictionaries, of course with the condition that all values of the frozendict are also hashable. - -```python ->>> from pyxtension import frozendict - ->>> fd = frozendict({"A": "B", "C": "D"}) ->>> print(fd) -{'A': 'B', 'C': 'D'} - ->>> fd["A"] = "C" -TypeError: object is immutable - ->>> hash(fd) --5063792767678978828 -``` - ### License -pyxtension is released under a GNU Public license. -The idea for [Json](https://github.com/asuiu/pyxtension/blob/master/Json.py) module was inspired from [addict](https://github.com/mewwts/addict) and [AttrDict](https://github.com/bcj/AttrDict), -but it has a better performance with lower memory consumption. +streamerate is released under MIT license. ### Alternatives -There are other libraries that support Fluent Interface streams as alternatives to Pyxtension, but being much more poor in features for streaming: +There are other libraries that support Fluent Interface streams as alternatives to streamerate, but being much more poor in features for streaming: - https://pypi.org/project/lazy-streams/ - https://pypi.org/project/pystreams/ - https://pypi.org/project/fluentpy/ @@ -498,4 +340,4 @@ There are other libraries that support Fluent Interface streams as alternatives - https://github.com/sspipe/sspipe -and something quite different from Fluent patterm, that makes kind of Piping: https://github.com/sspipe/sspipe and https://github.com/JulienPalard/Pipe +and something quite different from Fluent pattern, that makes kind of Piping: https://github.com/sspipe/sspipe and https://github.com/JulienPalard/Pipe diff --git a/README.rst b/README.rst deleted file mode 100644 index d178840..0000000 --- a/README.rst +++ /dev/null @@ -1,629 +0,0 @@ -pyxtension -========== - -| |build Status| -| |Coverage Status| - -`pyxtension `__ is a pure Python -MIT-licensed library that includes Scala-like streams, Json with -attribute access syntax, and other common-use stuff. - -Installation ------------- - -:: - - pip install pyxtension - -or from Github: - -:: - - git clone https://github.com/asuiu/pyxtension.git - cd pyxtension - python setup.py install - -or - -:: - - git submodule add https://github.com/asuiu/pyxtension.git - -Modules overview ----------------- - -Json.py -~~~~~~~ - -Json -^^^^ - -| A ``dict`` subclass to represent a Json object. You should be able to - use this -| absolutely anywhere you can use a ``dict``. While this is probably the - class you -| want to use, there are a few caveats that follow from this being a - ``dict`` under -| the hood. - -**Never again will you have to write code like this**: - -.. code:: python - - body = { - 'query': { - 'filtered': { - 'query': { - 'match': {'description': 'addictive'} - }, - 'filter': { - 'term': {'created_by': 'ASU'} - } - } - } - } - -From now on, you may simply write the following three lines: - -.. code:: python - - body = Json() - body.query.filtered.query.match.description = 'addictive' - body.query.filtered.filter.term.created_by = 'ASU' - -streams.py -~~~~~~~~~~ - -stream -^^^^^^ - -| ``stream`` subclasses ``collections.Iterable``. It's the same Python - iterable, but with more added methods, suitable for multithreading and - multiprocess processings. -| Used to create stream processing pipelines, similar to those used in - `Scala `__ and - `MapReduce `__ programming - model. -| Those who used `Apache Spark `__ - `RDD `__ - functions will find this model of processing very easy to use. - -`streams `__ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Never again will you have to write code like this**: - -.. code:: python - - > lst = xrange(1,6) - > reduce(lambda x, y: x * y, map(lambda _: _ * _, filter(lambda _: _ % 2 == 0, lst))) - 64 - -From now on, you may simply write the following lines: - -.. code:: python - - > the_stream = stream( xrange(1,6) ) - > the_stream.\ - filter(lambda _: _ % 2 == 0).\ - map(lambda _: _ * _).\ - reduce(lambda x, y: x * y) - 64 - -A Word Count `Map-Reduce `__ naive example using multiprocessing map -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code:: python - - corpus = [ - "MapReduce is a programming model and an associated implementation for processing and generating large data sets with a parallel, distributed algorithm on a cluster.", - "At Google, MapReduce was used to completely regenerate Google's index of the World Wide Web", - "Conceptually similar approaches have been very well known since 1995 with the Message Passing Interface standard having reduce and scatter operations."] - - def reduceMaps(m1, m2): - for k, v in m2.iteritems(): - m1[k] = m1.get(k, 0) + v - return m1 - - word_counts = stream(corpus).\ - mpmap(lambda line: stream(line.lower().split(' ')).countByValue()).\ - reduce(reduceMaps) - -Basic methods -^^^^^^^^^^^^^ - -**map(f)** -'''''''''' - -Identic with builtin ``map`` but returns a stream - -**mpmap(f, poolSize=16)** -''''''''''''''''''''''''' - -Parallel ordered map using ``multiprocessing.Pool.imap()``. - -It can replace the ``map`` when need to split computations to multiple -cores, and order of results matters. - -It spawns at most ``poolSize`` processes and applies the ``f`` function. - -The elements in the result stream appears in the same order they appear -in the initial iterable. - -:: - - :type f: (T) -> V - :rtype: `stream` - -**mpfastmap(f, poolSize=16)** -''''''''''''''''''''''''''''' - -Parallel ordered map using ``multiprocessing.Pool.imap_unordered()``. - -It can replace the ``map`` when the ordered of results doesn't matter. - -It spawns at most ``poolSize`` processes and applies the ``f`` function. - -The elements in the result stream appears in the unpredicted order. - -:: - - :type f: (T) -> V - :rtype: `stream` - -**fastmap(f, poolSize=16)** -''''''''''''''''''''''''''' - -| Parallel unordered map using multithreaded pool. -| It can replace the ``map`` when the ordered of results doesn't matter. - -It spawns at most ``poolSize`` threads and applies the ``f`` function. - -The elements in the result stream appears in the unpredicted order. - -Because of CPython -`GIL `__ it's most -usefull for I/O or CPU intensive consuming native functions, or on -Jython or IronPython interpreters. - -:type f: (T) -> V - -:rtype: ``stream`` - -\*\*flatMap(predicate=\_IDENTITY\_FUNC)\*\* -:param predicate: is a function that will receive elements of self collection and return an iterable -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -By default predicate is an identity function - -:type predicate: (V)-> collections.Iterable[T] - -:return: will return stream of objects of the same type of elements from the stream returned by predicate() - -Example: - -.. code:: python - - stream([[1, 2], [3, 4], [4, 5]]).flatMap().toList() == [1, 2, 3, 4, 4, 5] - -**filter(predicate)** -''''''''''''''''''''' - -identic with builtin filter, but returns stream - -**reversed()** -'''''''''''''' - -returns reversed stream - -**exists(predicate)** -''''''''''''''''''''' - -Tests whether a predicate holds for some of the elements of this -sequence. - -:rtype: bool - -Example: - -.. code:: python - - stream([1, 2, 3]).exists(0) -> False - stream([1, 2, 3]).exists(1) -> True - -\*\*keyBy(keyfunc = \_IDENTITY\_FUNC)\*\* -Transforms stream of values to a stream of tuples (key, value) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -:param keyfunc: function to map values to keys - -:type keyfunc: (V) -> T - -:return: stream of Key, Value pairs - -:rtype: stream[( T, V )] - -Example: - -.. code:: python - - stream([1, 2, 3, 4]).keyBy(lambda _:_ % 2) -> [(1, 1), (0, 2), (1, 3), (0, 4)] - -**groupBy()** -''''''''''''' - -groupBy([keyfunc]) -> Make an iterator that returns consecutive keys and -groups from the iterable. - -The iterable needs not to be sorted on the same key function, but the -keyfunction need to return hasable objects. - -:param keyfunc: [Optional] The key is a function computing a key value for each element. - -:type keyfunc: (T) -> (V) - -:return: (key, sub-iterator) grouped by each value of key(value). - -:rtype: stream[ ( V, slist[T] ) ] - -Example: - -.. code:: python - - stream([1, 2, 3, 4]).groupBy(lambda _: _ % 2) -> [(0, [2, 4]), (1, [1, 3])] - -**countByValue()** -'''''''''''''''''' - -Returns a collections.Counter of values - -Example - -.. code:: python - - stream(['a', 'b', 'a', 'b', 'c', 'd']).countByValue() == {'a': 2, 'b': 2, 'c': 1, 'd': 1} - -**distinct()** -'''''''''''''' - -Returns stream of distinct values. Values must be hashable. - -.. code:: python - - stream(['a', 'b', 'a', 'b', 'c', 'd']).distinct() == {'a', 'b', 'c', 'd'} - -**reduce(f, init=None)** -'''''''''''''''''''''''' - -same arguments with builtin reduce() function - -**toSet()** -''''''''''' - -returns sset() instance - -**toList()** -'''''''''''' - -returns slist() instance - -**toMap()** -''''''''''' - -returns sdict() instance - -**sorted(key=None, cmp=None, reverse=False)** -''''''''''''''''''''''''''''''''''''''''''''' - -same arguments with builtin sorted() - -**size()** -'''''''''' - -returns length of stream. Use carefully on infinite streams. - -**join(f)** -''''''''''' - -Returns a string joined by f. Proivides same functionality as str.join() -builtin method. - -if f is basestring, uses it to join the stream, else f should be a -callable that returns a string to be used for join - -**mkString(f)** -''''''''''''''' - -identic with join(f) - -**take(n)** -''''''''''' - -:: - - returns first n elements from stream - -**head()** -'''''''''' - -:: - - returns first element from stream - -**zip()** -''''''''' - -:: - - the same behavior with itertools.izip() - -**throttle(max_req: int, interval: float)** -''''''''' - -:: - - throttles to process at most max_req elements pe every 'interval' seconds. - - -\*\*unique(predicate=\_IDENTITY\_FUNC)\*\* -Returns a stream of unique (according to predicate) elements appearing in the same order as in original stream -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -:: - - The items returned by predicate should be hashable and comparable. - -Statistics related methods -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**entropy()** -''''''''''''' - -calculates the Shannon entropy of the values from stream - -**pstddev()** -''''''''''''' - -Calculates the population standard deviation. - -**mean()** -'''''''''' - -returns the arithmetical mean of the values - -**sum()** -''''''''' - -returns the sum of elements from stream - -\*\*min(key=\_IDENTITY\_FUNC)\*\* -same functionality with builtin min() funcion -''''''''''''''''''''''''''''''''''''''''''''' - -\*\*min\_default(default, key=\_IDENTITY\_FUNC)\*\* -same functionality with min() but returns :default: when called on empty streams -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -**max()** -''''''''' - -same functionality with builtin max() - -\*\*maxes(key=\_IDENTITY\_FUNC)\*\* -returns a stream of max values from stream -'''''''''''''''''''''''''''''''''''''''''' - -\*\*mins(key=\_IDENTITY\_FUNC)\*\* -returns a stream of min values from stream -'''''''''''''''''''''''''''''''''''''''''' - -Other classes -~~~~~~~~~~~~~ - -slist -^^^^^ - -Inherits ``streams.stream`` and built-in ``list`` classes, and keeps in -memory a list allowing faster index access - -sset -^^^^ - -Inherits ``streams.stream`` and built-in ``set`` classes, and keeps in -memory the whole set of values - -sdict -^^^^^ - -Inherits ``streams.stream`` and built-in ``dict``, and keeps in memory -the dict object. - -defaultstreamdict -^^^^^^^^^^^^^^^^^ - -Inherits ``streams.sdict`` and adds functionality of -``collections.defaultdict`` from stdlib - -`throttler `__ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Thread-safe time throttler that can be attached on a stream to limit the number of calls per time interval. -Example: - -.. code:: python - - > from pyxtension.throttler import Throttler - > throttler = Throttler(5, 10) - > stream(range(100)).map(throttler.throttle).map(print).to_list() - - -it will throttle the stream to max 5 calls per every 10 seconds. - -`Json `__ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -`Json `__ is a -module that provides mapping objects that allow their elements to be -accessed both as keys and as attributes: - -.. code:: python - - > from pyxtension.Json import Json - > a = Json({'foo': 'bar'}) - > a.foo - 'bar' - > a['foo'] - 'bar' - -Attribute access makes it easy to create convenient, hierarchical -settings objects: - -.. code:: python - - with open('settings.yaml') as fileobj: - settings = Json(yaml.safe_load(fileobj)) - - cursor = connect(**settings.db.credentials).cursor() - - cursor.execute("SELECT column FROM table;") - -Basic Usage -~~~~~~~~~~~ - -| Json comes with two different classes, ``Json``, and ``JsonList``. -| Json is fairly similar to native ``dict`` as it extends it an is a - mutable mapping that allow creating, accessing, and deleting key-value - pairs as attributes. -| ``JsonList`` is similar to native ``list`` as it extends it and offers - a way to transform the ``dict`` objects from inside also in ``Json`` - instances. - -Construction -^^^^^^^^^^^^ - -Directly from a JSON string -''''''''''''''''''''''''''' - -.. code:: python - - > Json('{"key1": "val1", "lst1": [1,2] }') - {u'key1': u'val1', u'lst1': [1, 2]} - -From ``tuple``\ s: -'''''''''''''''''' - -.. code:: python - - > Json( ('key1','val1'), ('lst1', [1,2]) ) - {'key1': 'val1', 'lst1': [1, 2]} - # keep in mind that you should provide at least two tuples with key-value pairs - -As a built-in ``dict`` -'''''''''''''''''''''' - -.. code:: python - - > Json( [('key1','val1'), ('lst1', [1,2])] ) - {'key1': 'val1', 'lst1': [1, 2]} - - Json({'key1': 'val1', 'lst1': [1, 2]}) - {'key1': 'val1', 'lst1': [1, 2]} - -Convert to a ``dict`` -^^^^^^^^^^^^^^^^^^^^^ - -.. code:: python - - > json = Json({'key1': 'val1', 'lst1': [1, 2]}) - > json.toOrig() - {'key1': 'val1', 'lst1': [1, 2]} - -Valid Names -^^^^^^^^^^^ - -Any key can be used as an attribute as long as: - -#. The key represents a valid attribute (i.e., it is a string comprised - only of - alphanumeric characters and underscores that doesn't start with a - number) -#. The key does not shadow a class attribute (e.g., get). - -Attributes vs. Keys -^^^^^^^^^^^^^^^^^^^ - -| There is a minor difference between accessing a value as an attribute - vs. -| accessing it as a key, is that when a dict is accessed as an - attribute, it will -| automatically be converted to a ``Json`` object. This allows you to - recursively -| access keys:: - -.. code:: python - - > attr = Json({'foo': {'bar': 'baz'}}) - > attr.foo.bar - 'baz' - -| Relatedly, by default, sequence types that aren't ``bytes``, ``str``, - or ``unicode`` -| (e.g., ``list``\ s, ``tuple``\ s) will automatically be converted to - ``tuple``\ s, with any -| mappings converted to ``Json``: - -.. code:: python - - > attr = Json({'foo': [{'bar': 'baz'}, {'bar': 'qux'}]}) - > for sub_attr in attr.foo: - > print(sub_attr.bar) - 'baz' - 'qux' - -| To get this recursive functionality for keys that cannot be used as - attributes, -| you can replicate the behavior by using dict syntax on ``Json`` - object:: - - -.. code:: python - - > json = Json({1: {'two': 3}}) - > json[1].two - 3 - -``JsonList`` usage examples: - -.. code:: python - - > json = Json('{"lst":[1,2,3]}') - > type(json.lst) - - - > json = Json('{"1":[1,2]}') - > json["1"][1] - 2 - -Assignment as keys will still work:: - -.. code:: python - - > json = Json({'foo': {'bar': 'baz'}}) - > json['foo']['bar'] = 'baz' - > json.foo - {'bar': 'baz'} - -License -~~~~~~~ - -| pyxtension is released under a GNU Public license. -| The idea for - `Json `__ - module was inspired from - `addict `__ and - `AttrDict `__, -| but it has a better performance with lower memory consumption. - -.. |build Status| image:: https://travis-ci.org/asuiu/pyxtension.svg?branch=master - :target: https://travis-ci.org/asuiu/pyxtension -.. |Coverage Status| image:: https://coveralls.io/repos/asuiu/pyxtension/badge.svg?branch=master&service=github - :target: https://coveralls.io/github/asuiu/pyxtension?branch=master diff --git a/environment.yml b/environment.yml index 7a16a29..363a0d8 100644 --- a/environment.yml +++ b/environment.yml @@ -1,8 +1,8 @@ -name: pyxtension +name: streamerate channels: - defaults dependencies: - pip - - python=3.10 + - python=3.8 - pip: - -r requirements.txt diff --git a/py2/pyxtension/Json.py b/py2/pyxtension/Json.py deleted file mode 100644 index cf123d6..0000000 --- a/py2/pyxtension/Json.py +++ /dev/null @@ -1,366 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -# Author: ASU -- -# Purpose: utility library -""" -Python module that gives you a dictionary whose values are both gettable and settable using both attribute and getitem syntax -""" -import copy -import json - -from pyxtension.streams import * - -__author__ = 'ASU' -supermethod = lambda self: super(self.__class__, self) - - -class JsonList(slist): - @classmethod - def __decide(cls, j): - if isinstance(j, dict): - return Json(j) - elif isinstance(j, (list, tuple)) and not isinstance(j, JsonList): - return JsonList(map(Json._toJ, j)) - elif isinstance(j, stream): - return JsonList(j.map(Json._toJ).toList()) - else: - return j - - def __init__(self, *args): - slist.__init__(self, stream(*args).map(lambda j: JsonList.__decide(j))) - - def toOrig(self): - return [isinstance(t, (Json, JsonList)) and t.toOrig() or t for t in self] - - def toString(self): - return json.dumps(self) - - -class Json(sdict): - FORBIDEN_METHODS = ('__methods__', '__members__') # Introduced due to PyCharm debugging accessing these methods - - @classmethod - def __myAttrs(cls): - return set(dir(cls)) - - @staticmethod - def load(fp, *args, **kwargs): - """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing - a JSON document) to a Python object. - - If the contents of ``fp`` is encoded with an ASCII based encoding other - than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must - be specified. Encodings that are not ASCII based (such as UCS-2) are - not allowed, and should be wrapped with - ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` - object and passed to ``loads()`` - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - ``object_pairs_hook`` is an optional function that will be called with the - result of any object literal decoded with an ordered list of pairs. The - return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders that rely on the - order that the key and value pairs are decoded (for example, - collections.OrderedDict will remember the order of insertion). If - ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg; otherwise ``JSONDecoder`` is used. - """ - return Json.loads(fp.read(), *args, **kwargs) - - @staticmethod - def loads(*args, **kwargs): - """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON - document) to a Python object. - - If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding - other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name - must be specified. Encodings that are not ASCII based (such as UCS-2) - are not allowed and should be decoded to ``unicode`` first. - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - ``object_pairs_hook`` is an optional function that will be called with the - result of any object literal decoded with an ordered list of pairs. The - return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders that rely on the - order that the key and value pairs are decoded (for example, - collections.OrderedDict will remember the order of insertion). If - ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. - - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN, null, true, false. - This can be used to raise an exception if invalid JSON numbers - are encountered. - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg; otherwise ``JSONDecoder`` is used. - """ - d = json.loads(*args, **kwargs) - if isinstance(d, dict): - return Json(d) - elif isinstance(d, list): - return JsonList(d) - else: - raise NotImplementedError("Unknown JSON format: {}".format(d.__class__)) - - @staticmethod - def fromString(s, *args, **kwargs): - return Json.loads(s, *args, **kwargs) - - __decide = lambda self, j: isinstance(j, dict) and Json(j) or (isinstance(j, list) and slist(j) or j) - - @classmethod - def _toJ(cls, j): - if isinstance(j, Json): - return j - elif isinstance(j, dict): - return Json(j) - elif isinstance(j, JsonList): - return j - elif isinstance(j, list): - return JsonList(j) - else: - return j - - def __init__(self, *args, **kwargs): - if not kwargs and len(args) == 1 and isinstance(args[0], basestring): - d = json.loads(args[0]) - assert isinstance(d, dict) - sdict.__init__(self, d) - elif len(args) >= 2 and isinstance(args[0], tuple): - sdict.__init__(self, args) - else: - sdict.__init__(self, *args, **kwargs) - - def __getitem__(self, name): - """ - This is called when the Dict is accessed by []. E.g. - some_instance_of_Dict['a']; - If the name is in the dict, we return it. Otherwise we set both - the attr and item to a new instance of Dict. - """ - if name in self: - d = sdict.__getitem__(self, name) - if isinstance(d, dict) and not isinstance(d, Json): - j = Json(d) - sdict.__setitem__(self, name, j) - return j - elif isinstance(d, list) and not isinstance(d, JsonList): - j = JsonList(d) - sdict.__setitem__(self, name, j) - return j - elif isinstance(d, set) and not isinstance(d, sset): - j = sset(d) - sdict.__setitem__(self, name, j) - return j - else: - return d - else: - j = Json() - sdict.__setitem__(self, name, j) - return j - - def __getattr__(self, item): - if item in self.FORBIDEN_METHODS: - raise AttributeError("Forbidden methods access to %s. Introduced due to PyCharm debugging problem." % str( - self.FORBIDEN_METHODS)) - - return self.__getitem__(item) - - def __setattr__(self, key, value): - if key not in self.__myAttrs(): - self[key] = value - else: - raise AttributeError("'%s' object attribute '%s' is read-only" % (str(self.__class__), key)) - - def __iter__(self): - return super(Json, self).__iter__() - - def iteritems(self): - return stream(dict.iteritems(self)).map(lambda kv: (kv[0], Json._toJ(kv[1]))) - - def iterkeys(self): - return stream(dict.iterkeys(self)) - - def itervalues(self): - return stream(dict.itervalues(self)).map(Json._toJ) - - def keys(self): - return slist(dict.keys(self)) - - def values(self): - return self.itervalues().toList() - - def items(self): - return self.iteritems().toList() - - def __str__(self): - return json.dumps(self.toOrig(), separators=(',', ':'), encoding='utf-8', default=lambda k: str(k), - sort_keys=True) - - def dump(self, *args, **kwargs): - """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a - ``.write()``-supporting file-like object). - - If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - If ``ensure_ascii`` is true (the default), all non-ASCII characters in the - output are escaped with ``\\uXXXX`` sequences, and the result is a ``str`` - instance consisting of ASCII characters only. If ``ensure_ascii`` is - ``False``, some chunks written to ``fp`` may be ``unicode`` instances. - This usually happens because the input contains unicode strings or the - ``encoding`` parameter is used. Unless ``fp.write()`` explicitly - understands ``unicode`` (as in ``codecs.getwriter``) this is likely to - cause an error. - - If ``check_circular`` is false, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. Since the default item separator is ``', '``, the - output might include trailing whitespace when ``indent`` is specified. - You can use ``separators=(',', ': ')`` to avoid this. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - If *sort_keys* is ``True`` (default: ``False``), then the output of - dictionaries will be sorted by key. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. - """ - return json.dump(self.toOrig(), *args, **kwargs) - - def dumps(self, *args, **kwargs): - """Serialize ``self`` to a JSON formatted ``str``. - - If ``skipkeys`` is false then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - If ``ensure_ascii`` is false, all non-ASCII characters are not escaped, and - the return value may be a ``unicode`` instance. See ``dump`` for details. - - If ``check_circular`` is false, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in - strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. Since the default item separator is ``', '``, the - output might include trailing whitespace when ``indent`` is specified. - You can use ``separators=(',', ': ')`` to avoid this. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - If *sort_keys* is ``True`` (default: ``False``), then the output of - dictionaries will be sorted by key. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. - - """ - return json.dumps(self.toOrig(), *args, **kwargs) - - def toString(self): - """ - :return: deterministic sorted output string, that can be compared - :rtype: str - """ - return str(self) - - """To be removed and make Json serializable""" - - def __eq__(self, y): - return super(Json, self).__eq__(y) - - def __reduce__(self): - return self.__reduce_ex__(2) - - def __reduce_ex__(self, protocol): - return str(self) - - def copy(self): - return Json(super(Json, self).copy()) - - def __deepcopy__(self, memo): - return Json(copy.deepcopy(self.toOrig(), memo)) - - def __delattr__(self, name): - if name in self: - return supermethod(self).__delitem__(name) - else: - raise AttributeError("%s instance has no attribute %s" % (str(self.__class__), name)) - - def toOrig(self): - """ - Converts Json to a native dict - :return: stream dictionary - :rtype: sdict - """ - return sdict( - self.iteritems(). - map(lambda kv: (kv[0], isinstance(kv[1], (Json, JsonList)) and kv[1].toOrig() or kv[1])) - ) - - -class FrozenJson(Json): - def __init__(self, *args, **kwargs): - super(FrozenJson, self).__init__(*args, **kwargs) - - def __setattr__(self, key, value): - raise TypeError("Can not update a FrozenJson instance by (key,value): ({},{})".format(key, value)) - - def __hash__(self): - return hash(self.toString()) diff --git a/py2/pyxtension/__init__.py b/py2/pyxtension/__init__.py deleted file mode 100644 index 7de0e3c..0000000 --- a/py2/pyxtension/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'asuiu' diff --git a/py2/pyxtension/fileutils.py b/py2/pyxtension/fileutils.py deleted file mode 100644 index 7d126ac..0000000 --- a/py2/pyxtension/fileutils.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -# Author: ASU -- -# Purpose: utility library - -from bz2 import BZ2File -from gzip import GzipFile - -__author__ = 'ASU' - - -def openByExtension(filename, mode='r', buffering=-1, compresslevel=9): - """ - :param filename: path to filename - :type filename: basestring - :type mode: basestring - :param buffering: - :type buffering: int - :return: Returns an opened file-like object, decompressing/compressing data depending on file extension - :rtype: file | GzipFile | BZ2File - """ - m = -1 - if 'r' in mode: - m = 0 - elif 'w' in mode: - m = 1 - elif 'a' in mode: - m = 2 - tm = ('r', 'w', 'a') - bText = 't' in mode - - if filename.endswith('.gz'): - return GzipFile(filename, tm[m], compresslevel=compresslevel) - elif filename.endswith('.bz2'): - mode = tm[m] - if bText: mode += 'U' - if buffering <= 1: - buffering = 0 - return BZ2File(filename, mode, buffering=buffering, compresslevel=compresslevel) - else: - return open(filename, mode, buffering=buffering) diff --git a/py2/pyxtension/racelib.py b/py2/pyxtension/racelib.py deleted file mode 100644 index ee88932..0000000 --- a/py2/pyxtension/racelib.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -# Author: ASU -- -# Purpose: Concurrent utility classes (name coming from RACEconditionLIBrary) -# Created: 11/26/2015 -import time - -__author__ = 'ASU' - - -class ContextLock(): - def __init__(self, lock): - """ - :param lock: - :type lock: thread.LockType - """ - self.__lock = lock - - def __enter__(self): - self.__lock.acquire() - - def __exit__(self, exc_type, exc_value, traceback): - self.__lock.release() - return False - -class TimePerformanceLogger: - """ - Used to measure the performance of a code block run within a With Statement Context Manager - """ - - def __init__(self, logger): - """ - :param logger: logger function tha would get argument number of seconds - :type logger: (basestring) -> None - """ - self._logger = logger - - def __enter__(self): - self._t1 = time.time() - - def __exit__(self, exc_type, exc_value, traceback): - self._logger(time.time() - self._t1) - if exc_type: - return False - return True diff --git a/py2/pyxtension/streams.py b/py2/pyxtension/streams.py deleted file mode 100644 index b3dae3b..0000000 --- a/py2/pyxtension/streams.py +++ /dev/null @@ -1,873 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -# Author: ASU -- -# Purpose: utility library for Python2.x -from operator import itemgetter - -import cPickle as pickle - -from Queue import Queue -import struct -import threading -import collections -from itertools import groupby - -try: # Python 3.x doesn't have ifilter, imap - from itertools import ifilter, imap, izip -except ImportError: - ifilter = filter - imap = map - izip = zip -import sys -import math -from collections import defaultdict - -if sys.version_info[0] >= 3: - xrange = range -from pyxtension.fileutils import openByExtension - -__author__ = 'ASU' - -_IDENTITY_FUNC = lambda _: _ - - -class ItrFromFunc(): - def __init__(self, f): - if callable(f): - self._f = f - else: - raise TypeError( - "Argument f to %s should be callable, but f.__class__=%s" % (str(self.__class__), str(f.__class__))) - - def __iter__(self): - return iter(self._f()) - - -class CallableGeneratorContainer(): - def __init__(self, iterableFunctions): - self._ifs = iterableFunctions - - def __call__(self): - return iteratorJoiner(self._ifs) - - -def iteratorJoiner(itrIterables): - for i in itrIterables: - for obj in i: - yield obj - - -class EndQueue: - pass - - -class MapException: - def __init__(self, exc_info): - self.exc_info = exc_info - - -class _IStream(collections.Iterable): - def map(self, f): - ''' - :param f: - :type f: (T) -> V - :return: - :rtype: stream - ''' - return stream(ItrFromFunc(lambda: imap(f, self))) - - @staticmethod - def __map_thread(f, qin, qout): - while True: - el = qin.get() - if isinstance(el, EndQueue): - qin.put(el) - return - try: - newEl = f(el) - qout.put(newEl) - except: - qout.put(MapException(sys.exc_info())) - - @staticmethod - def __fastmap_generator(itr, qin, qout, threadPool): - while 1: - try: - el = next(itr) - except StopIteration: - qin.put(EndQueue()) - for t in threadPool: - t.join() - while not qout.empty(): - newEl = qout.get() - if isinstance(newEl, MapException): - raise newEl.exc_info[0], newEl.exc_info[1], newEl.exc_info[2] - yield newEl - break - else: - qin.put(el) - newEl = qout.get() - if isinstance(newEl, MapException): - raise newEl.exc_info[0], newEl.exc_info[1], newEl.exc_info[2] - yield newEl - - @staticmethod - def __unique_generator(itr, f): - st = set() - for el in itr: - m_el = f(el) - if m_el not in st: - st.add(m_el) - yield el - - def fastmap(self, f, poolSize=16): - """ - Parallel unordered map using multithreaded pool. - It spawns at most poolSize threads and applies the f function. - The elements in the result stream appears in the unpredicted order. - It's most usefull for I/O or CPU intensive consuming functions. - :param f: - :type f: (T) -> V - :param poolSize: number of threads to spawn - :type poolSize: int - :return: - :rtype: stream - """ - assert poolSize > 0 - assert poolSize < 2 ** 12 - Q_SZ = poolSize * 4 - qin = Queue(Q_SZ) - qout = Queue(Q_SZ) - threadPool = [threading.Thread(target=_IStream.__map_thread, args=(f, qin, qout)) for i in xrange(poolSize)] - for t in threadPool: - t.start() - i = 0 - itr = iter(self) - hasNext = True - while i < Q_SZ and hasNext: - try: - el = next(itr) - i += 1 - qin.put(el) - except StopIteration: - hasNext = False - return stream(_IStream.__fastmap_generator(itr, qin, qout, threadPool)) - - def enumerate(self): - return stream(izip(xrange(0, sys.maxint), self)) - - @classmethod - def __flatMapGenerator(cls, itr, f): - for i in itr: - for j in f(i): - yield j - - def flatMap(self, predicate=_IDENTITY_FUNC): - """ - :param predicate: predicate is a function that will receive elements of self collection and return an iterable - By default predicate is an identity function - :type predicate: (self.elementsType)-> collections.Iterable[T] - :return: will return stream of objects of the same type of elements from the stream returned by predicate() - :rtype: stream[T] - """ - return stream(ItrFromFunc(lambda: self.__class__.__flatMapGenerator(self, predicate))) - - def filter(self, predicate=None): - """ - :param predicate: If predicate is None, return the items that are true. - :type predicate: None|(T) -> bool - :rtype: stream - """ - return stream(ItrFromFunc(lambda: ifilter(predicate, self))) - - def reversed(self): - try: - return stream(reversed(self)) - except TypeError: - raise TypeError("Can not reverse stream") - - def exists(self, f): - """ - Tests whether a predicate holds for some of the elements of this sequence. - :param f: - :type f: (T) -> bool - :return: - :rtype: bool - """ - for e in self: - if f(e): - return True - return False - - def keyBy(self, keyfunc=_IDENTITY_FUNC): - """ - :param keyfunc: function to map values to keys - :type keyfunc: (V) -> T - :return: stream of Key, Value pairs - :rtype: stream[( T, V )] - """ - return self.map(lambda h: (keyfunc(h), h)) - - def keystream(self): - """ - Applies only on streams of 2-uples - :return: stream consisted of first element of tuples - :rtype: stream[T] - """ - return self.map(itemgetter(0)) - - def values(self): - """ - Applies only on streams of 2-uples - :return: stream consisted of second element of tuples - :rtype: stream[T] - """ - return self.map(itemgetter(1)) - - def groupBy(self, keyfunc=_IDENTITY_FUNC): - """ - groupBy([keyfunc]) -> Make an iterator that returns consecutive keys and groups from the iterable. - The iterable needs not to be sorted on the same key function, but the keyfunction need to return hasable objects. - :param keyfunc: [Optional] The key is a function computing a key value for each element. - :type keyfunc: (T) -> (V) - :return: (key, sub-iterator) grouped by each value of key(value). - :rtype: stream[ ( V, slist[T] ) ] - """ - # return stream( - # ItrFromFunc(lambda: groupby(sorted(self, key=keyfunc), keyfunc))).map(lambda kv: (kv[0], stream(kv[1]))) - h = defaultdict(slist) - for v in self: - h[keyfunc(v)].append(v) - ##for - return stream(h.iteritems()) - - def groupByToList(self, keyfunc): - """ - groupBy(keyfunc]) -> create an iterator which returns - (key, sub-iterator) grouped by each value of key(value). - """ - return stream( - ItrFromFunc(lambda: groupby(sorted(self, key=keyfunc), keyfunc))).map(lambda kv: (kv[0], slist(kv[1]))) - - def countByValue(self): - return sdict(collections.Counter(self)) - - def distinct(self): - return self.unique() - - def reduce(self, f, init=None): - if init is None: - return reduce(f, self) - else: - return reduce(f, self, init) - - def toSet(self): - """ - - :rtype : sset - """ - return sset(self) - - def toList(self): - ''' - :return: - :rtype: slist - ''' - return slist(self) - - def sorted(self, key=None, cmp=None, reverse=False): - return slist(sorted(self, key=key, cmp=cmp, reverse=reverse)) - - def toMap(self): - return sdict(self) - - def toSumCounter(self): - """ - Elements should be tuples (T, V) where V can be summed - :return: sdict on stream elements - :rtype: sdict[ T, V ] - """ - res = sdict() - for k, v in self: - if k in res: - res[k] += v - else: - res[k] = v - return res - - def toJson(self): - from pyxtension.Json import JsonList - - return JsonList(self) - - def __getitem__(self, i): - itr = iter(self) - tk = 0 - while tk < i: - next(itr) - tk += 1 - return next(itr) - - def __getslice__(self, i, j): - def gs(strm): - itr = iter(strm) - tk = 0 - while tk < i: - next(itr) - tk += 1 - while tk < j: - yield next(itr) - tk += 1 - - return stream(ItrFromFunc(lambda: gs(self))) - - def __add__(self, other): - if not isinstance(other, ItrFromFunc): - othItr = ItrFromFunc(lambda: other) - else: - othItr = other - if isinstance(self._itr, ItrFromFunc): - i = self._itr - else: - i = ItrFromFunc(lambda: self._itr) - return stream(ItrFromFunc(CallableGeneratorContainer((i, othItr)))) - - def __iadd__(self, other): - if not isinstance(other, ItrFromFunc): - othItr = ItrFromFunc(lambda: other) - else: - othItr = other - if isinstance(self._itr, ItrFromFunc): - i = self._itr - else: - j = self._itr - i = ItrFromFunc(lambda: j) - - self._itr = ItrFromFunc(CallableGeneratorContainer((i, othItr))) - return self - - def size(self): - try: - return len(self) - except: - return sum(1 for i in iter(self)) - - def join(self, f=None): - if f is None: - return ''.join(self) - elif isinstance(f, basestring): - return f.join(self) - else: - itr = iter(self) - r = next(itr) - last = bytearray(r) - while True: - try: - n = next(itr) - r += f(last) - last = n - r += n - except StopIteration: - break - return r - - def mkString(self, c): - return self.join(c) - - def take(self, n): - return self[:n] - - def head(self): - return next(iter(self)) - - def sum(self): - return sum(self) - - def min(self, key=_IDENTITY_FUNC): - return min(self, key=key) - - def min_default(self, default, key=_IDENTITY_FUNC): - """ - :param default: returned if there's no minimum in stream (ie empty stream) - :type default: T - :param key: the same meaning as used for the builtin min() - :type key: (T) -> V - :rtype: T - """ - try: - return min(self, key=key) - except ValueError as e: - if "empty sequence" in e.message: - return default - else: - raise - - def max(self, **kwargs): - return max(self, **kwargs) - - def maxes(self, key=_IDENTITY_FUNC): - i = iter(self) - aMaxes = [next(i)] - mval = key(aMaxes[0]) - for v in i: - k = key(v) - if k > mval: - mval = k - aMaxes = [v] - elif k == mval: - aMaxes.append(v) - ##if - return slist(aMaxes) - - def mins(self, key=_IDENTITY_FUNC): - i = iter(self) - aMaxes = [next(i)] - mval = key(aMaxes[0]) - for v in i: - k = key(v) - if k < mval: - mval = k - aMaxes = [v] - elif k == mval: - aMaxes.append(v) - ##if - return slist(aMaxes) - - def entropy(self): - s = self.sum() - return self.map(lambda x: (float(x) / s) * math.log(s / float(x), 2)).sum() - - def pstddev(self): - """Calculates the population standard deviation.""" - sm = 0 - n = 0 - for el in self: - sm += el - n += 1 - if n < 1: - raise ValueError('Standard deviation requires at least one data point') - mean = float(sm) / n - ss = sum((x - mean) ** 2 for x in self) - pvar = ss / n # the population variance - return pvar ** 0.5 - - def mean(self): - """Return the sample arithmetic mean of data. in one single pass""" - sm = 0 - n = 0 - for el in self: - sm += el - n += 1 - if n < 1: - raise ValueError('Mean requires at least one data point') - return sm / float(n) - - def zip(self): - return stream(izip(*(self.toList()))) - - def unique(self, predicate=_IDENTITY_FUNC): - """ - The stream items should be hashable and comparable. - :param predicate: optional, maps the elements to comparable objects - :type predicate: (T) -> U - :return: Unique elements appearing in the same order. Following copies of same elements will be ignored. - :rtype: stream[U] - """ - return stream(ItrFromFunc(lambda: _IStream.__unique_generator(self, predicate))) - - @staticmethod - def binaryToChunk(binaryData): - """ - :param binaryData: binary data to transform into chunk with header - :type binaryData: str - :return: chunk of data with header - :rtype: str - """ - l = len(binaryData) - p = struct.pack(" positions - :rtype: stream [ T ] - """ - - def indexIgnorer(indexSet, _stream): - i = 0 - for el in _stream: - if i not in indexSet: - yield el - i += 1 - - indexSet = sset(indexes) - return stream(ItrFromFunc(lambda: indexIgnorer(indexSet, self))) - - -class stream(_IStream): - def __init__(self, itr=None): - if itr is None: - self._itr = [] - else: - self._itr = itr - - def __iter__(self): - return iter(self._itr) - - def __repr__(self): - if isinstance(self._itr, list): - repr(self._itr) - else: - return object.__repr__(self) - - def __str__(self): - if isinstance(self._itr, list): - return str(self._itr) - else: - return object.__str__(self) - - @staticmethod - def __binaryChunksStreamGenerator(fs, format=" positions - :rtype: stream [ T ] - """ - - def indexIgnorer(indexSet, _stream): - i = 0 - for el in _stream: - if i not in indexSet: - yield el - i += 1 - - sz = self.size() - indexSet = stream(indexes).map(lambda i: i if i >= 0 else i + sz).toSet() - return stream(ItrFromFunc(lambda: indexIgnorer(indexSet, self))) - - def __iadd__(self, x): - return list.__iadd__(self, x) - - def __add__(self, other): - return _IStream.__add__(self, other) - - def __getitem__(self, item): - return list.__getitem__(self, item) - - -class sdict(dict, _IStream): - def __init__(self, *args, **kwrds): - dict.__init__(self, *args, **kwrds) - - def __iter__(self): - return dict.__iter__(self) - - def iteritems(self): - return stream(dict.iteritems(self)) - - def iterkeys(self): - return stream(dict.iterkeys(self)) - - def itervalues(self): - return stream(dict.itervalues(self)) - - def keys(self): - return slist(dict.keys(self)) - - def values(self): - return slist(dict.values(self)) - - def items(self): - return slist(self.iteritems()) - - def update(self, other=None, **kwargs): - super(sdict, self).update(other, **kwargs) - return self - - def toJson(self): - from pyxtension.Json import Json - return Json(self) - - -class defaultstreamdict(sdict): - def __init__(self, default_factory=None, *a, **kw): - if (default_factory is not None and - not callable(default_factory)): - raise TypeError('first argument must be callable') - super(self.__class__, self).__init__(*a, **kw) - if default_factory is None: - self.__default_factory = lambda: object() - else: - self.__default_factory = default_factory - - def __getitem__(self, key): - try: - return super(self.__class__, self).__getitem__(key) - except KeyError: - return self.__missing__(key) - - def __missing__(self, key): - # if self.__default_factory is None: - # raise KeyError(key) - self[key] = value = self.__default_factory() - return value - - def __reduce__(self): - # if self.__default_factory is None: - # args = tuple() - # else: - args = self.__default_factory, - return type(self), args, None, None, iter(self.items()) - - def copy(self): - return self.__copy__() - - def __copy__(self): - return type(self)(self.__default_factory, self) - - def __deepcopy__(self, memo): - import copy - return type(self)(self.__default_factory, - copy.deepcopy(self.items())) - - def __repr__(self): - return 'defaultdict(%s, %s)' % (self.__default_factory, - super(self.__class__, self).__repr__()) - - def __str__(self): - return dict.__str__(self) - - -def smap(f, itr): - return stream(itr).map(f) - - -def sfilter(f, itr): - return stream(itr).filter(f) diff --git a/py2/pyxtension/tests/__init__.py b/py2/pyxtension/tests/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/py2/pyxtension/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/py2/pyxtension/tests/test_Json.py b/py2/pyxtension/tests/test_Json.py deleted file mode 100644 index bc7a7f2..0000000 --- a/py2/pyxtension/tests/test_Json.py +++ /dev/null @@ -1,401 +0,0 @@ -import copy -import json -import sys -import types -import unittest - -from pyxtension.Json import Json, JsonList, FrozenJson -from pyxtension.streams import slist, stream, sdict - -__author__ = 'ASU' - - -class JsonTestCase(unittest.TestCase): - def setUp(self): - self.d = Json((("a", 2), (3, 4), ("d", {"d2": 4}))) - - def testConstructor(self): - self.assertEqual(Json('{"a":2,"4":"5"}'), {"a": 2, "4": "5"}) - - def testBase(self): - self.assertEqual(self.d.a, 2) - self.assertEqual(self.d["a"], 2) - self.assertEqual(self.d.b.c, {}) - self.assertEqual(self.d.d.d2, 4) - self.assertIsInstance(self.d.keys(), slist) - self.assertIsInstance(self.d.iterkeys(), stream) - self.assertIsInstance(self.d.itervalues(), stream) - - def test_strReturnsSortedMap(self): - self.assertEqual('{"4":3,"a":"4"}', str(Json({"a": "4", 4: 3}))) - - def test_strBasics(self): - self.assertEqual(json.dumps({"a": "4"}, separators=(',', ':')), str(Json({"a": "4"}))) - self.assertEqual(json.dumps(dict((("a", 2), (3, 4), ("d", {"d2": 4}))), separators=(',', ':'), sort_keys=True), - str(Json((("a", 2), (3, 4), ("d", {"d2": 4}))))) - self.assertEqual(json.dumps(dict((("a", 2), (3, 4), ("d", {"d2": 4}))), separators=(',', ':'), sort_keys=True), - str(self.d)) - - def test_repr_from_dict(self): - d = {'a': 'a'} - j = Json(d) - self.assertEqual(repr(j), repr(d)) - - def test_repr_used_setattr(self): - j = Json() - j.a = 'a' - self.assertEqual(repr(j), repr({'a': 'a'})) - - def test_forbiden_attrs(self): - j = Json() - with self.assertRaises(AttributeError): - j.__methods__() - - def testUpdateItems(self): - d = Json((("a", 2), (3, 4), ("d", {"d2": 4}))) - d.d.d2 = 3 - self.assertEqual(d.d.d2, 3) - - def testSpecialKeys(self): - d = Json((("__init__", 2), (3, 4), ("d", {"d2": 4}))) - self.assertEqual(d["__init__"], 2) - self.assertNotEquals(d.__init__, 2) - self.assertIsInstance(d.__init__, types.MethodType) - - def testIteritems(self): - b = self.d.iteritems().toList() - self.assertEqual(self.d.iterkeys().toList(), self.d.toList()) - self.assertEqual(b[2][1].d2, 4) - self.assertIsInstance(b[2][1], Json) - self.assertIsInstance(self.d.iteritems(), stream) - self.assertEqual(self.d.iteritems().toList(), [('a', 2), (3, 4), ('d', {'d2': 4})]) - self.assertEqual(self.d.iteritems()[2][1].d2, 4) - self.assertIsInstance(self.d.iteritems(), stream) - self.assertEquals(self.d.iteritems().sorted().toList(), [(3, 4), ('a', 2), ('d', {'d2': 4})]) - self.assertEqual(Json({1: 1, 2: 2, 3: 3}).itervalues().sum(), 6) - - def testJsonList(self): - jlist = Json({'a': [1, 2, {'b': [{'c': 3}, {'d': 4}]}]}) - self.assertEqual(jlist.a[2], {'b': [{'c': 3}, {'d': 4}]}) - self.assertEqual(jlist.a[2].b[1].d, 4) - - def testJsonSetValues(self): - self.d.c = "set" - self.assertEqual(self.d.c, "set") - - def test_toOrigNominal(self): - j = Json() - j.a = Json({'b': 'c'}) - j.toString() - j.toOrig() - repr(j) - d = j.toOrig() - - self.assertIsInstance(d, sdict) - self.assertDictEqual(d, {'a': {'b': 'c'}}) - - def test_NoneValueRemainsNone(self): - j = Json({'a': None}) - self.assertIs(j.a, None) - - def test_ConvertSetToList(self): - j = Json() - j.st = set((1, 2)) - d = j.toOrig() - self.assertIsInstance(d, sdict) - self.assertDictEqual({'st': set([1, 2])}, d) - - def test_serializeDeserialize(self): - serialized = '{"command":"put","details":{"cookie":"cookie1","platform":"fb"}}' - j = Json(serialized) - self.assertEqual(serialized, j.toString()) - -TEST_VAL = [1, 2, 3] -TEST_DICT = {'a': {'b': {'c': TEST_VAL}}} -TEST_DICT_STR = str(TEST_DICT) - - -class TestsFromAddict(unittest.TestCase): - def test_set_one_level_item(self): - some_dict = {'a': TEST_VAL} - prop = Json() - prop['a'] = TEST_VAL - self.assertDictEqual(prop, some_dict) - - def test_set_two_level_items(self): - some_dict = {'a': {'b': TEST_VAL}} - prop = Json() - prop['a']['b'] = TEST_VAL - self.assertDictEqual(prop, some_dict) - - def test_set_three_level_items(self): - prop = Json() - prop['a']['b']['c'] = TEST_VAL - self.assertDictEqual(prop, TEST_DICT) - - def test_set_one_level_property(self): - prop = Json() - prop.a = TEST_VAL - self.assertDictEqual(prop, {'a': TEST_VAL}) - - def test_set_two_level_properties(self): - prop = Json() - prop.a.b = TEST_VAL - self.assertDictEqual(prop, {'a': {'b': TEST_VAL}}) - - def test_set_three_level_properties(self): - prop = Json() - prop.a.b.c = TEST_VAL - self.assertDictEqual(prop, TEST_DICT) - - def test_init_with_dict(self): - self.assertDictEqual(TEST_DICT, Json(TEST_DICT)) - - def test_init_with_kws(self): - prop = Json(a=2, b={'a': 2}, c=[{'a': 2}]) - self.assertDictEqual(prop, {'a': 2, 'b': {'a': 2}, 'c': [{'a': 2}]}) - - def test_init_with_tuples(self): - prop = Json((0, 1), (1, 2), (2, 3)) - self.assertDictEqual(prop, {0: 1, 1: 2, 2: 3}) - - def test_init_with_list(self): - prop = Json([(0, 1), (1, 2), (2, 3)]) - self.assertDictEqual(prop, {0: 1, 1: 2, 2: 3}) - - def test_init_with_generator(self): - prop = Json(((i, i + 1) for i in range(3))) - self.assertDictEqual(prop, {0: 1, 1: 2, 2: 3}) - - def test_init_raises(self): - def init(): - Json(5) - - self.assertRaises(TypeError, init) - - def test_init_with_empty_stuff(self): - a = Json({}) - b = Json([]) - self.assertDictEqual(a, {}) - self.assertDictEqual(b, {}) - - def test_init_with_list_of_dicts(self): - a = Json({'a': [{'b': 2}]}) - self.assertIsInstance(a.a[0], Json) - self.assertEqual(a.a[0].b, 2) - - def test_getitem(self): - prop = Json(TEST_DICT) - self.assertEqual(prop['a']['b']['c'], TEST_VAL) - - def test_getattr(self): - prop = Json(TEST_DICT) - self.assertEqual(prop.a.b.c, TEST_VAL) - - def test_isinstance(self): - self.assertTrue(isinstance(Json(), dict)) - - def test_str(self): - prop = Json(TEST_DICT) - self.assertEqual(str(prop), json.dumps(TEST_DICT, separators=(',', ':'))) - - def test_delitem(self): - prop = Json({'a': 2}) - del prop['a'] - self.assertDictEqual(prop, {}) - - def test_delitem_nested(self): - prop = Json(TEST_DICT) - del prop['a']['b']['c'] - self.assertDictEqual(prop, {'a': {'b': {}}}) - - def test_delattr(self): - prop = Json({'a': 2}) - del prop.a - self.assertDictEqual(prop, {}) - - def test_delattr_nested(self): - prop = Json(TEST_DICT) - del prop.a.b.c - self.assertDictEqual(prop, {'a': {'b': {}}}) - - def test_delitem_delattr(self): - prop = Json(TEST_DICT) - del prop.a['b'] - self.assertDictEqual(prop, {'a': {}}) - - def test_complex_nested_structure(self): - prop = Json() - prop.a = [[Json(), 2], [[]], [1, [2, 3], 0]] - self.assertDictEqual(prop, {'a': [[{}, 2, ], [[]], [1, [2, 3], 0]]}) - - def test_tuple_key(self): - prop = Json() - prop[(1, 2)] = 2 - self.assertDictEqual(prop, {(1, 2): 2}) - self.assertEqual(prop[(1, 2)], 2) - - def test_set_prop_invalid(self): - prop = Json() - - def set_keys(): - prop.keys = 2 - - def set_items(): - prop.items = 3 - - self.assertRaises(AttributeError, set_keys) - self.assertRaises(AttributeError, set_items) - self.assertDictEqual(prop, {}) - - def test_dir_with_members(self): - prop = Json({'__members__': 1}) - dir(prop) - self.assertTrue('__members__' in prop.keys()) - - def test_to_dict(self): - nested = {'a': [{'a': 0}, 2], 'b': {}, 'c': 2} - prop = Json(nested) - regular = prop.toOrig() - self.assertDictEqual(regular, prop) - self.assertDictEqual(regular, nested) - self.assertNotIsInstance(regular, Json) - with self.assertRaises(AttributeError): - regular.a - - def get_attr_deep(): - return regular['a'][0].a - - self.assertRaises(AttributeError, get_attr_deep) - - def test_to_dict_with_tuple(self): - nested = {'a': ({'a': 0}, {2: 0})} - prop = Json(nested) - regular = prop.toOrig() - self.assertDictEqual(regular, prop) - self.assertDictEqual(regular, nested) - self.assertIsInstance(regular['a'], tuple) - self.assertNotIsInstance(regular['a'][0], Json) - - def test_update(self): - old = Json() - old.child.a = 'old a' - old.child.b = 'old b' - old.foo = 'no dict' - - new = Json() - new.child.b = 'new b' - new.child.c = 'new c' - new.foo.now_my_papa_is_a_dict = True - - old.update(new) - - reference = {'foo': {'now_my_papa_is_a_dict': True}, - 'child': {'c': 'new c', 'b': 'new b'}} - - self.assertDictEqual(old, reference) - - def test_update_with_lists(self): - org = Json() - org.a = [1, 2, {'a': 'superman'}] - someother = Json() - someother.b = [{'b': 123}] - org.update(someother) - - correct = {'a': [1, 2, {'a': 'superman'}], - 'b': [{'b': 123}]} - - org.update(someother) - self.assertDictEqual(org, correct) - self.assertIsInstance(org.b[0], Json) - - def test_copy(self): - class MyMutableObject(object): - def __init__(self): - self.attribute = None - - foo = MyMutableObject() - foo.attribute = True - - a = Json() - a.immutable = 42 - a.mutable = foo - - b = a.copy() - - # immutable object should not change - b.immutable = 21 - self.assertEqual(a.immutable, 42) - - # mutable object should change - b.mutable.attribute = False - self.assertEqual(a.mutable.attribute, b.mutable.attribute) - - # changing child of b should not affect a - b.child = "new stuff" - self.assertTrue(isinstance(a.child, Json)) - - def test_deepcopy(self): - class MyMutableObject(object): - def __init__(self): - self.attribute = None - - foo = MyMutableObject() - foo.attribute = True - - a = Json() - a.child.immutable = 42 - a.child.mutable = foo - - b = copy.deepcopy(a) - - # immutable object should not change - b.child.immutable = 21 - self.assertEqual(a.child.immutable, 42) - - # mutable object should not change - b.child.mutable.attribute = False - self.assertTrue(a.child.mutable.attribute) - - # changing child of b should not affect a - b.child = "new stuff" - self.assertTrue(isinstance(a.child, Json)) - - def test_equal_objects_nominal(self): - j1 = Json({'a': 1, 'b': {'c': 'd'}}) - j2 = Json({'a': 1, 'b': {'c': 'd'}}) - j3 = Json({'a': 1, 'b': {'c': 'e'}}) - self.assertEqual(j1, j2) - self.assertNotEqual(j1, j3) - - def test_JsonList_converts_tuples(self): - jl = JsonList([(Json(), 2), [[]], [1, (2, 3), 0]]) - self.assertListEqual(jl, [[{}, 2, ], [[]], [1, [2, 3], 0]]) - - def test_FrozenJson_nominal(self): - frozenJson = FrozenJson({'a': 'b'}) - self.assertEqual(frozenJson.a, 'b') - with self.assertRaises(TypeError): - frozenJson.a = 'c' - with self.assertRaises(TypeError): - frozenJson.b = 'c' - - def test_FrozenJson_hash(self): - d1 = {'a': 'b'} - fj1 = FrozenJson(d1) - d1['b'] = 'c' - fj2 = FrozenJson(d1) - del d1['b'] - fj3 = FrozenJson(d1) - self.assertEqual(fj1, fj3) - self.assertNotEqual(fj1, fj2) - self.assertSetEqual(set([fj1, fj2, fj3]), set([fj1, fj2])) - self.assertTrue(set([fj1, fj2]) <= set([fj2, fj3])) - - -""" -Allow for these test cases to be run from the command line -""" -if __name__ == '__main__': - all_tests = unittest.TestLoader().loadTestsFromModule(sys.modules[__name__]) - unittest.TextTestRunner(verbosity=2).run(all_tests) diff --git a/py2/pyxtension/tests/test_Streams.py b/py2/pyxtension/tests/test_Streams.py deleted file mode 100644 index b6ad702..0000000 --- a/py2/pyxtension/tests/test_Streams.py +++ /dev/null @@ -1,388 +0,0 @@ -from mock import MagicMock - -try: # Python 3.x doesn't have ifilter - from itertools import ifilter -except ImportError: - ifilter = filter -from io import BytesIO - -try: # Python 3.x doesn't have cPickle module - import cPickle as pickle -except ImportError: - import pickle -import unittest -import sys -import time - -if sys.version_info[0] >= 3: - xrange = range - -from pyxtension.streams import stream, slist, sset, sdict, ItrFromFunc, defaultstreamdict - -__author__ = 'ASU' - - -class StreamTestCase(unittest.TestCase): - def setUp(self): - self.s = lambda: stream((1, 2, 3)) - - def testStream(self): - s = self.s - self.assertEquals(list(ifilter(lambda i: i % 2 == 0, s())), [2]) - self.assertEquals(list(s().filter(lambda i: i % 2 == 0)), [2]) - self.assertEquals(s().filter(lambda i: i % 2 == 0).toList(), [2]) - self.assertEquals(s()[1], 2) - self.assertEquals(s()[1:].toList(), [2, 3]) - self.assertEqual(s().take(2).toList(), [1, 2]) - self.assertAlmostEqual(stream((0, 1, 2, 3)).filter(lambda x: x > 0).entropy(), 1.4591479) - self.assertEquals(stream([(1, 2), (3, 4)]).zip().toList(), [(1, 3), (2, 4)]) - - def test_filterFromGeneratorReinstantiatesProperly(self): - s = stream(ItrFromFunc(lambda: (i for i in xrange(5)))) - s = s.filter(lambda e: e % 2 == 0) - self.assertEquals(s.toList(), [0, 2, 4]) - self.assertEquals(s.toList(), [0, 2, 4]) - s = stream(xrange(5)).filter(lambda e: e % 2 == 0) - self.assertEquals(s.toList(), [0, 2, 4]) - self.assertEquals(s.toList(), [0, 2, 4]) - - def test_streamExists(self): - s = stream([0, 1]) - self.assertEqual(s.exists(lambda e: e == 0), True) - self.assertEqual(s.exists(lambda e: e == 2), False) - - def test_stream_str_doesntChangeStream(self): - s = stream(iter((1, 2, 3, 4))) - str(s) - self.assertListEqual(s.toList(), [1, 2, 3, 4]) - - def test_stream_repr_doesntChangeStream(self): - s = stream(iter((1, 2, 3, 4))) - repr(s) - self.assertListEqual(s.toList(), [1, 2, 3, 4]) - - def test_slist_str_nominal(self): - l = [1, 2, 3] - s = slist(l) - s1 = str(s) - self.assertEquals(str(s), str(l)) - - def test_slist_repr_nominal(self): - l = [1, 2, 3] - s = slist(l) - self.assertEquals(repr(s), repr(l)) - - def test_slist_add(self): - l1 = slist([1, 2]) - l2 = slist([3, 4]) - l3 = (l1 + l2) - self.assertIsInstance(l3, stream) - self.assertNotIsInstance(l3, slist) - self.assertNotIsInstance(l3, list) - self.assertListEqual(l3.toList(), [1, 2, 3, 4]) - - def test_slist_iadd(self): - l1 = slist([1, 2]) - l2 = slist([3, 4]) - l1 += l2 - self.assertIsInstance(l1, slist) - self.assertListEqual(l1.toList(), [1, 2, 3, 4]) - - def testStreamToJson(self): - from pyxtension.Json import JsonList - - j = stream((("a", 2), (3, 4))).toJson() - self.assertIsInstance(j, JsonList) - self.assertListEqual(j, [["a", 2], [3, 4]]) - - def testSdictToJson(self): - from pyxtension.Json import Json - - j = stream((("a", 2), (3, 4))).toMap().toJson() - self.assertIsInstance(j, Json) - self.assertEqual(j.a, 2) - self.assertDictEqual(j, {'a': 2, 3: 4}) - - def testStreamList(self): - l = lambda: slist((1, 2, 3)) - self.assertEqual(l().toList(), [1, 2, 3]) - self.assertEqual(l()[-1], 3) - - def testStreamSet(self): - s = lambda: sset([1, 2, 3, 2]) - self.assertEqual(s().size(), 3) - self.assertEqual(s().map(lambda x: x).toList(), [1, 2, 3]) - self.assertEqual(len(s()), 3) - - def test_sdict(self): - d = sdict({1: 2, 3: 4}) - self.assertListEqual(d.iteritems().map(lambda t: t).toList(), [(1, 2), (3, 4)]) - - def testStreamsFromGenerator(self): - sg = stream(ItrFromFunc(lambda: (i for i in range(4)))) - self.assertEqual(sg.size(), 4) - self.assertEqual(sg.size(), 4) - self.assertEqual(sg.filter(lambda x: x > 1).toList(), [2, 3]) - self.assertEqual(sg.filter(lambda x: x > 1).toList(), [2, 3]) - self.assertEqual(sg.map(lambda x: x > 1).toList(), [False, False, True, True]) - self.assertEqual(sg.map(lambda x: x > 1).toList(), [False, False, True, True]) - self.assertEqual(sg.head(), 0) - self.assertEqual(sg.head(), 0) - self.assertEqual(sg.map(lambda i: i ** 2).enumerate().toList(), [(0, 0), (1, 1), (2, 4), (3, 9)]) - self.assertEqual(sg.reduce(lambda x, y: x + y, 5), 11) - - def testStreamPickling(self): - sio = BytesIO() - expected = slist(slist((i,)) for i in xrange(10)) - expected.dumpToPickle(sio) - sio = BytesIO(sio.getvalue()) - - result = stream.loadFromPickled(sio) - self.assertEquals(list(expected), list(result)) - - def test_StreamFileReading(self): - sio = BytesIO() - expected = slist(slist((i,)) for i in xrange(10)) - expected.dumpToPickle(sio) - sio = BytesIO(sio.getvalue()) - - result = stream.loadFromPickled(sio) - self.assertEquals(list(expected), list(result)) - - def test_flatMap_nominal(self): - s = stream([[1, 2], [3, 4], [4, 5]]) - self.assertListEqual(s.flatMap().toList(), [1, 2, 3, 4, 4, 5]) - - def test_flatMap_withPredicate(self): - s = stream(({1: 2, 3: 4}, {5: 6, 7: 8})) - self.assertEquals(s.flatMap(dict.iteritems).toSet(), set(((1, 2), (5, 6), (3, 4), (7, 8)))) - - def test_flatMap_reiteration(self): - l = stream(ItrFromFunc(lambda: (xrange(i) for i in xrange(5)))).flatMap() - self.assertEquals(l.toList(), [0, 0, 1, 0, 1, 2, 0, 1, 2, 3]) - self.assertEquals(l.toList(), - [0, 0, 1, 0, 1, 2, 0, 1, 2, 3]) # second time to assert the regeneration of generator - - def test_flatMap_defaultIdentityFunction(self): - l = slist(({1: 2, 3: 4}, {5: 6, 7: 8})) - self.assertEquals(l.flatMap().toSet(), set((1, 3, 5, 7))) - - def test_sset_updateReturnsSelf(self): - s = sset((1, 2)) - l = s.update((2, 3)) - self.assertEquals(l, set((1, 2, 3))) - - def test_sset_intersection_updateReturnsSelf(self): - self.assertEquals(sset((1, 2)).update(set((2, 3))), set((1, 2, 3))) - - def test_reduceUsesInitProperly(self): - self.assertEquals(slist([sset((1, 2)), sset((3, 4))]).reduce(lambda x, y: x.update(y)), set((1, 2, 3, 4))) - self.assertEquals(slist([sset((1, 2)), sset((3, 4))]).reduce(lambda x, y: x.update(y), sset()), - set((1, 2, 3, 4))) - - def test_ssetChaining(self): - s = sset().add(0).clear().add(1).add(2).remove(2).discard(3).update(set((3, 4, 5))) \ - .intersection_update(set((1, 3, 4))).difference_update(set((4,))).symmetric_difference_update(set((3, 4))) - self.assertEquals(s, set((1, 4))) - - def test_maxes(self): - self.assertEquals(stream(['a', 'abc', 'abcd', 'defg', 'cde']).maxes(lambda s: len(s)), ['abcd', 'defg']) - - def test_mins(self): - self.assertEquals(stream(['abc', 'a', 'abcd', 'defg', 'cde']).mins(lambda s: len(s)), ['a']) - - def test_min_nominal(self): - self.assertEqual(stream([2, 1]).min(), 1) - self.assertEqual(stream(['abc', 'a']).min(key=len), 'a') - - def test_min_raises_on_empty_sequence(self): - with self.assertRaises(ValueError): - stream().min() - - def test_min_default_nominal(self): - self.assertEqual(stream([2, 1]).min_default('default'), 1) - self.assertEqual(stream(['abc', 'a']).min_default('default', key=len), 'a') - self.assertEqual(stream().min_default('default'), 'default') - - def test_defaultstreamdictBasics(self): - dd = defaultstreamdict(slist) - dd[1].append(2) - self.assertEquals(dd, {1: [2]}) - - def test_defaultstreamdictSerialization(self): - dd = defaultstreamdict(slist) - dd[1].append(2) - s = pickle.dumps(dd) - newDd = pickle.loads(s) - self.assertEquals(newDd, dd) - self.assertIsInstance(newDd[1], slist) - - def test_stream_add(self): - s1 = stream([1, 2]) - s2 = stream([3, 4]) - s3 = s1 + s2 - ll = s3.toList() - self.assertEquals(s3.toList(), [1, 2, 3, 4]) - self.assertEquals(s3.toList(), [1, 2, 3, 4]) # second time to exclude one time iterator bug - s1 = s1 + s2 - self.assertEquals(s1.toList(), [1, 2, 3, 4]) - self.assertEquals(s1.toList(), [1, 2, 3, 4]) # second time to exclude one time iterator bug - - def test_stream_iadd(self): - s1 = stream([1, 2]) - s1 += [3, 4] - s1 += stream(xrange(5, 6)) # use xrange to cover the iterator case - self.assertEquals(s1.toList(), [1, 2, 3, 4, 5]) - self.assertEquals(s1.toList(), [1, 2, 3, 4, 5]) # second time to exclude one time iterator bug - self.assertEquals(s1.toList(), [1, 2, 3, 4, 5]) - - def test_stream_getitem(self): - s = stream(i for i in xrange(1)) - self.assertEqual(s[0], 0) - - def test_fastmap_time(self): - def sleepFunc(el): - time.sleep(0.3) - return el * el - - s = stream(xrange(100)) - t1 = time.time() - res = s.fastmap(sleepFunc, poolSize=50).toSet() - dt = time.time() - t1 - expected = set(i * i for i in xrange(100)) - self.assertSetEqual(res, expected) - self.assertLessEqual(dt, 1.5) - - def test_fastmap_nominal(self): - s = stream(xrange(100)) - res = s.fastmap(lambda x: x * x, poolSize=4).toSet() - expected = set(i * i for i in xrange(100)) - self.assertSetEqual(res, expected) - - def test_fastmap_one_el(self): - s = stream([1, ]) - res = s.fastmap(lambda x: x * x, poolSize=4).toSet() - expected = set((1,)) - self.assertSetEqual(res, expected) - - def test_fastmap_no_el(self): - s = stream([]) - res = s.fastmap(lambda x: x * x, poolSize=4).toSet() - expected = set() - self.assertSetEqual(res, expected) - - def test_fastmap_None_el(self): - s = stream([None]) - res = s.fastmap(lambda x: x, poolSize=4).toSet() - expected = set([None]) - self.assertSetEqual(res, expected) - - def test_fastmap_raises_exception(self): - s = stream([None]) - with self.assertRaises(TypeError): - res = s.fastmap(lambda x: x * x, poolSize=4).toSet() - - def test_unique_nominal(self): - s = stream([1, 2, 3, 1, 2]) - self.assertListEqual(s.unique().toList(), [1, 2, 3]) - - def test_unique_mapping(self): - s = stream(['abc', 'def', 'a', 'b', 'ab']) - self.assertListEqual(s.unique(len).toList(), ['abc', 'a', 'ab']) - - def test_unique_empty_stream(self): - s = stream([]) - self.assertListEqual(s.unique().toList(), []) - - def test_unique_generator_stream(self): - s = stream(ItrFromFunc(lambda: xrange(4))) - u = s.unique() - self.assertListEqual(u.toList(), [0, 1, 2, 3]) - self.assertListEqual(u.toList(), [0, 1, 2, 3]) - - def test_pstddev_nominal(self): - s = stream([1, 2, 3, 4]) - self.assertAlmostEqual(s.pstddev(), 1.118033988749895) - - def test_pstddev_exception(self): - with self.assertRaises(ValueError): - stream([]).pstddev() - - def test_mean(self): - self.assertAlmostEqual(stream([1, 2, 3, 4]).mean(), 2.5) - - def test_mean_exception(self): - with self.assertRaises(ValueError): - stream([]).mean() - - def test_toSumCounter_nominal(self): - s = stream([('a', 2), ('a', 4), ('b', 2.1), ('b', 3), ('c', 2)]) - self.assertDictEqual(s.toSumCounter(), {'a': 6, 'b': 5.1, 'c': 2}) - - def test_toSumCounter_onEmptyStream(self): - s = stream([]) - self.assertDictEqual(s.toSumCounter(), {}) - - def test_toSumCounter_onStrings(self): - s = stream([('a', 'b'), ('a', 'c')]) - self.assertDictEqual(s.toSumCounter(), {'a': 'bc'}) - - def test_keyBy_nominal(self): - self.assertListEqual(stream(['a', 'bb', '']).keyBy(len).toList(), [(1, 'a'), (2, 'bb'), (0, '')]) - - def test_keys_nominal(self): - self.assertListEqual(stream([(1, 'a'), (2, 'bb'), (0, '')]).keystream().toList(), [1, 2, 0]) - - def test_values_nominal(self): - self.assertListEqual(stream([(1, 'a'), (2, 'bb'), (0, '')]).values().toList(), ['a', 'bb', '']) - - def test_toMap(self): - self.assertDictEqual(stream(((1, 2), (3, 4))).toMap(), {1: 2, 3: 4}) - - def test_joinWithString(self): - s = "|" - strings = ('a', 'b', 'c') - self.assertEqual(stream(iter(strings)).join(s), s.join(strings)) - - def test_joinWithNone(self): - s = "" - strings = ('a', 'b', 'c') - self.assertEqual(stream(iter(strings)).join(), s.join(strings)) - - def test_joinWithFunction(self): - class F: - def __init__(self): - self.counter = 0 - - def __call__(self, *args, **kwargs): - self.counter += 1 - return str(self.counter) - - strings = ('a', 'b', 'c') - f = F() - self.assertEqual(stream(iter(strings)).join(f), "a1b2c") - - def test_mkString(self): - streamToTest = stream(('a', 'b', 'c')) - mock = MagicMock() - joiner = "," - streamToTest.join = mock - streamToTest.mkString(joiner) - mock.assert_called_once_with(joiner) - - def test_reversedNominal(self): - s = slist([1, 2, 3]) - self.assertListEqual(s.reversed().toList(), [3, 2, 1]) - - def test_reversedException(self): - s = stream(xrange(1, 2, 3)) - with self.assertRaises(TypeError): - s.reversed() - - -""" -Allow for these test cases to be run from the command line -""" -if __name__ == '__main__': - all_tests = unittest.TestLoader().loadTestsFromModule(sys.modules[__name__]) - unittest.TextTestRunner(verbosity=2).run(all_tests) diff --git a/py2/pyxtension/tests/test_abstractSynchronizedBufferedStream.py b/py2/pyxtension/tests/test_abstractSynchronizedBufferedStream.py deleted file mode 100644 index 54107e9..0000000 --- a/py2/pyxtension/tests/test_abstractSynchronizedBufferedStream.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -# Author: ASU -- -# Purpose: -# Created: 11/18/2015 -from unittest import TestCase, main - -from pyxtension.streams import AbstractSynchronizedBufferedStream, slist - -__author__ = 'ASU' - - -class TestAbstractSynchronizedBufferedStream(TestCase): - def test_nominal(self): - class TestSyncStream(AbstractSynchronizedBufferedStream): - def __init__(self): - super(TestSyncStream, self).__init__() - self._counter = 4 - - def _getNextBuffer(self): - self._counter -= 1 - if self._counter > 0: - return slist(range(self._counter)) - return slist() - - test_stream = TestSyncStream() - self.assertListEqual(test_stream.toList(), [0, 1, 2, 0, 1, 0]) - -if __name__ == '__main__': - main() diff --git a/py2/pyxtension/tests/test_synchronizedBufferedStream.py b/py2/pyxtension/tests/test_synchronizedBufferedStream.py deleted file mode 100644 index ef85d43..0000000 --- a/py2/pyxtension/tests/test_synchronizedBufferedStream.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -# Author: ASU -- -# Purpose: -# Created: 11/18/2015 -from unittest import TestCase, main - -from pyxtension.streams import SynchronizedBufferedStream, slist - -__author__ = 'ASU' - - -class TestSynchronizedBufferedStream(TestCase): - def test_nominal(self): - s = SynchronizedBufferedStream((slist(xrange(i)) for i in xrange(1, 4))) - self.assertListEqual(s.toList(), [0, 0, 1, 0, 1, 2]) - - -if __name__ == '__main__': - main() diff --git a/py3/pyxtension/Json.py b/py3/pyxtension/Json.py deleted file mode 100644 index b45e4f6..0000000 --- a/py3/pyxtension/Json.py +++ /dev/null @@ -1,363 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -# Author: ASU -- -# Purpose: utility library - -""" -Python module that gives you a dictionary whose values are both gettable and settable using both attribute and getitem syntax -""" -import copy -import json - -from typing import MutableMapping - -from pyxtension.streams import * - -__author__ = 'andrei.suiu@gmail.com' -supermethod = lambda self: super(self.__class__, self) - - -class JsonList(slist): - @classmethod - def __decide(cls, j): - if isinstance(j, dict): - return Json(j) - elif isinstance(j, (list, tuple)) and not isinstance(j, JsonList): - return JsonList(list(map(Json._toJ, j))) - elif isinstance(j, stream): - return JsonList(j.map(Json._toJ).toList()) - else: - return j - - def __init__(self, *args): - slist.__init__(self, stream(*args).map(lambda j: JsonList.__decide(j))) - - def toOrig(self): - return [isinstance(t, (Json, JsonList)) and t.toOrig() or t for t in self] - - def toString(self): - return json.dumps(self) - - -K = TypeVar('K') -V = TypeVar('V') - - -class Json(sdict, dict, MutableMapping[K, V]): - FORBIDEN_METHODS = ('__methods__', '__members__') # Introduced due to PyCharm debugging accessing these methods - - @classmethod - def __myAttrs(cls): - return set(dir(cls)) - - @staticmethod - def load(fp, *args, **kwargs): - """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing - a JSON document) to a Python object. - - If the contents of ``fp`` is encoded with an ASCII based encoding other - than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must - be specified. Encodings that are not ASCII based (such as UCS-2) are - not allowed, and should be wrapped with - ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` - object and passed to ``loads()`` - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - ``object_pairs_hook`` is an optional function that will be called with the - result of any object literal decoded with an ordered list of pairs. The - return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders that rely on the - order that the key and value pairs are decoded (for example, - collections.OrderedDict will remember the order of insertion). If - ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg; otherwise ``JSONDecoder`` is used. - """ - return Json.loads(fp.read(), *args, **kwargs) - - @staticmethod - def loads(*args, **kwargs): - """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON - document) to a Python object. - - If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding - other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name - must be specified. Encodings that are not ASCII based (such as UCS-2) - are not allowed and should be decoded to ``unicode`` first. - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - ``object_pairs_hook`` is an optional function that will be called with the - result of any object literal decoded with an ordered list of pairs. The - return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders that rely on the - order that the key and value pairs are decoded (for example, - collections.OrderedDict will remember the order of insertion). If - ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. - - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN, null, true, false. - This can be used to raise an exception if invalid JSON numbers - are encountered. - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg; otherwise ``JSONDecoder`` is used. - """ - d = json.loads(*args, **kwargs) - if isinstance(d, dict): - return Json(d) - elif isinstance(d, list): - return JsonList(d) - else: - raise NotImplementedError("Unknown JSON format: {}".format(d.__class__)) - - @staticmethod - def fromString(s, *args, **kwargs): - return Json.loads(s, *args, **kwargs) - - __decide = lambda self, j: isinstance(j, dict) and Json(j) or (isinstance(j, list) and slist(j) or j) - - @classmethod - def _toJ(cls, j): - if isinstance(j, Json): - return j - elif isinstance(j, dict): - return Json(j) - elif isinstance(j, JsonList): - return j - elif isinstance(j, list): - return JsonList(j) - else: - return j - - def __init__(self, *args, **kwargs): - if not kwargs and len(args) == 1 and isinstance(args[0], (str, bytes)): - d = json.loads(args[0]) - assert isinstance(d, dict) - sdict.__init__(self, d) - elif len(args) >= 2 and isinstance(args[0], (tuple, list)): - sdict.__init__(self, args) - else: - sdict.__init__(self, *args, **kwargs) - - def __getitem__(self, name): - """ - This is called when the Dict is accessed by []. E.g. - some_instance_of_Dict['a']; - If the name is in the dict, we return it. Otherwise we set both - the attr and item to a new instance of Dict. - """ - if name in self: - d = sdict.__getitem__(self, name) - if isinstance(d, dict) and not isinstance(d, Json): - j = Json(d) - sdict.__setitem__(self, name, j) - return j - elif isinstance(d, list) and not isinstance(d, JsonList): - j = JsonList(d) - sdict.__setitem__(self, name, j) - return j - elif isinstance(d, set) and not isinstance(d, sset): - j = sset(d) - sdict.__setitem__(self, name, j) - return j - else: - return d - else: - j = Json() - sdict.__setitem__(self, name, j) - return j - - def __getattr__(self, item): - if item in self.FORBIDEN_METHODS: - raise AttributeError("Forbidden methods access to %s. Introduced due to PyCharm debugging problem." % str( - self.FORBIDEN_METHODS)) - - return self.__getitem__(item) - - def __setattr__(self, key, value): - if key not in self.__myAttrs(): - self[key] = value - else: - raise AttributeError("'%s' object attribute '%s' is read-only" % (str(self.__class__), key)) - - def __iter__(self): - return super(Json, self).__iter__() - - def items(self): - return stream(dict.items(self)).map(lambda kv: (kv[0], Json._toJ(kv[1]))) - - def keys(self): - return stream(dict.keys(self)) - - def values(self): - return stream(dict.values(self)).map(Json._toJ) - - def __str__(self): - return json.dumps(self.toOrig(), separators=(',', ':'), default=lambda k: str(k)) - - def dump(self, *args, **kwargs): - """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a - ``.write()``-supporting file-like object). - - If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - If ``ensure_ascii`` is true (the default), all non-ASCII characters in the - output are escaped with ``\\uXXXX`` sequences, and the result is a ``str`` - instance consisting of ASCII characters only. If ``ensure_ascii`` is - ``False``, some chunks written to ``fp`` may be ``unicode`` instances. - This usually happens because the input contains unicode strings or the - ``encoding`` parameter is used. Unless ``fp.write()`` explicitly - understands ``unicode`` (as in ``codecs.getwriter``) this is likely to - cause an error. - - If ``check_circular`` is false, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. Since the default item separator is ``', '``, the - output might include trailing whitespace when ``indent`` is specified. - You can use ``separators=(',', ': ')`` to avoid this. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - If *sort_keys* is ``True`` (default: ``False``), then the output of - dictionaries will be sorted by key. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. - """ - return json.dump(self.toOrig(), *args, **kwargs) - - def dumps(self, *args, **kwargs): - """Serialize ``self`` to a JSON formatted ``str``. - - If ``skipkeys`` is false then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - If ``ensure_ascii`` is false, all non-ASCII characters are not escaped, and - the return value may be a ``unicode`` instance. See ``dump`` for details. - - If ``check_circular`` is false, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in - strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. Since the default item separator is ``', '``, the - output might include trailing whitespace when ``indent`` is specified. - You can use ``separators=(',', ': ')`` to avoid this. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - If *sort_keys* is ``True`` (default: ``False``), then the output of - dictionaries will be sorted by key. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. - - """ - return json.dumps(self.toOrig(), *args, **kwargs) - - def toString(self): - """ - :return: deterministic sorted output string, that can be compared - :rtype: str - """ - return str(self) - - """To be removed and make Json serializable""" - - def __eq__(self, y): - return super(Json, self).__eq__(y) - - def __reduce__(self): - return self.__reduce_ex__(2) - - def __reduce_ex__(self, protocol): - return str(self) - - def copy(self): - return Json(super(Json, self).copy()) - - def __deepcopy__(self, memo): - return Json(copy.deepcopy(self.toOrig(), memo)) - - def __delattr__(self, name): - if name in self: - return supermethod(self).__delitem__(name) - else: - raise AttributeError("%s instance has no attribute %s" % (str(self.__class__), name)) - - def toOrig(self): - """ - Converts Json to a native dict - :return: stream dictionary - :rtype: sdict - """ - return sdict( - self.items() - .map(lambda kv: (kv[0], isinstance(kv[1], (Json, JsonList)) and kv[1].toOrig() or kv[1])) - ) - - -class FrozenJson(Json): - def __init__(self, *args, **kwargs): - super(FrozenJson, self).__init__(*args, **kwargs) - - def __setattr__(self, key, value): - raise TypeError("Can not update a FrozenJson instance by (key,value): ({},{})".format(key, value)) - - def __hash__(self): - return hash(self.toString()) diff --git a/py3/pyxtension/__init__.py b/py3/pyxtension/__init__.py deleted file mode 100644 index 49b8bb3..0000000 --- a/py3/pyxtension/__init__.py +++ /dev/null @@ -1,68 +0,0 @@ -__author__ = 'andrei.suiu@gmail.com' - -from typing import Type, Any - - -class ValidateError(ValueError): - def __init__(self, args): - ValueError.__init__(self, args) - - -def validate(expr, msg="Invalid argument", exc: 'Type[Exception]' = ValidateError): - """ - If the expression val does not evaluate to True, then raise a ValidationError with msg - """ - if not expr: - raise exc(msg) - -class PydanticValidated: - @classmethod - def __get_validators__(cls): - yield cls._pydantic_validator - - @classmethod - def _pydantic_validator(cls, v): - if not isinstance(v, cls): - raise TypeError(f'{repr(v)} is of type {type(v)} but is expected to be of {cls}') - return v - - -class frozendict(dict): - __slots__ = ( - "_hash", - ) - - def __new__(cls: Type['frozendict'], *args: Any, **kwargs: Any) -> 'frozendict': - new = super().__new__(cls, *args, **kwargs) - new._hash = None - return new - - def __hash__(self, *args, **kwargs): - """Calculate the hash if all values are hashable, otherwise raises a TypeError.""" - - if self._hash is not None: - _hash = self._hash - else: - try: - fs = frozenset(self.items()) - _hash = hash(fs) - except TypeError: - raise TypeError("Dictionary values are not hashable") - self._hash = _hash - - return _hash - - def _immutable(self, *args, **kws): - raise TypeError('object is immutable') - - def copy(self) -> 'frozendict': - """ Return the object itself, as it's immutable. """ - return self - - __setitem__ = _immutable - __delitem__ = _immutable - clear = _immutable - update = _immutable - setdefault = _immutable - pop = _immutable - popitem = _immutable diff --git a/py3/pyxtension/fileutils.py b/py3/pyxtension/fileutils.py deleted file mode 100644 index 29b0d1f..0000000 --- a/py3/pyxtension/fileutils.py +++ /dev/null @@ -1,304 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -# Author: ASU -- -# Purpose: utility library -import bz2 -import csv -import gzip -import io -import lzma -import sys -import time -from bz2 import BZ2File -from collections import OrderedDict -from gzip import GzipFile -from math import floor, log10 -from pathlib import Path -from typing import Optional, Callable, TextIO, Union, BinaryIO, TypeVar, Iterable, Dict, Any, Generator, Iterator - -from pydantic import validate_arguments - -from pyxtension import PydanticValidated - -_K = TypeVar('_K') - -__author__ = 'andrei.suiu@gmail.com' - - -@validate_arguments -def openByExtension(filename: Union[Path, str], mode: str = 'r', buffering: int = -1, - compresslevel: int = 9, **kwargs) -> Union[TextIO, BinaryIO, GzipFile, BZ2File]: - """ - :return: Returns an opened file-like object, decompressing/compressing data depending on file extension - """ - if filename.endswith('.gz'): - return gzip.open(filename, mode, compresslevel=compresslevel, **kwargs) - elif filename.endswith('.bz2'): - return bz2.open(filename, mode, compresslevel=compresslevel, **kwargs) - elif filename.endswith('.xz'): - my_filters = [ - {"id": lzma.FILTER_LZMA2, "preset": compresslevel | lzma.PRESET_EXTREME} - ] - return lzma.open(filename, mode=mode, filters=my_filters, **kwargs) - else: - return open(filename, mode, buffering=buffering, **kwargs) - - -open_by_ext = openByExtension -smart_open = openByExtension - - -class Progbar(object): - """Displays a progress bar. - - # Arguments - target: Total number of steps expected, None if unknown. - width: Progress bar width on screen. - verbose: Verbosity mode, 0 (silent), 1 (verbose), 2 (semi-verbose) - interval: Minimum visual progress update interval (in seconds). - This class was inspired from keras.utils.Progbar - """ - - def __init__(self, target: Optional[int], - width: int = 30, - verbose: bool = False, - interval: float = 0.5, - stdout: TextIO = sys.stdout, - timer: Callable[[], float] = time.time, - dynamic_display: Optional[bool] = None): - self.target = target - self.width = width - self.verbose = verbose - self.interval = interval - self.stdout = stdout - if dynamic_display is None: - self._dynamic_display = ((hasattr(self.stdout, 'isatty') and self.stdout.isatty()) - or 'ipykernel' in sys.modules - or (hasattr(self.stdout, 'name') - and self.stdout.name in ('', '')) - ) - else: - self._dynamic_display = dynamic_display - self._total_width = 0 - self._seen_so_far = 0 - self._values = OrderedDict() - self._timer = timer - self._start = self._timer() - self._last_update = 0 - - def update(self, current, values=None): - """Updates the progress bar. - - # Arguments - current: Index of current step. - values: List of tuples: - `(name, value_for_last_step)`. - """ - values = values or [] - for k, v in values: - if k not in self._values: - self._values[k] = [v * (current - self._seen_so_far), - current - self._seen_so_far] - else: - self._values[k][0] += v * (current - self._seen_so_far) - self._values[k][1] += (current - self._seen_so_far) - self._seen_so_far = current - - now = self._timer() - info = ' - %.0fs' % (now - self._start) - if (now - self._last_update < self.interval and - self.target is not None and current < self.target): - return - - prev_total_width = self._total_width - - if self.target is not None: - numdigits = int(floor(log10(self.target))) + 1 - barstr = '%%%dd/%d [' % (numdigits, self.target) - bar = barstr % current - prog = float(current) / self.target - prog_width = int(self.width * prog) - if prog_width > 0: - bar += ('=' * (prog_width - 1)) - if current < self.target: - bar += '>' - else: - bar += '=' - bar += ('.' * (self.width - prog_width)) - bar += ']' - else: - bar = '%7d/Unknown' % current - - if current: - time_per_unit = (now - self._start) / current - else: - time_per_unit = 0 - if self.target is not None and current < self.target: - eta = time_per_unit * (self.target - current) - if eta > 3600: - eta_format = ('%d:%02d:%02d' % - (eta // 3600, (eta % 3600) // 60, eta % 60)) - elif eta > 60: - eta_format = '%d:%02d' % (eta // 60, eta % 60) - else: - eta_format = '%ds' % eta - - info = ' - ETA: %s' % eta_format - else: - if time_per_unit >= 1: - info += ' %.0fs/step' % time_per_unit - elif time_per_unit >= 1e-3: - info += ' %.0fms/step' % (time_per_unit * 1e3) - else: - info += ' %.0fus/step' % (time_per_unit * 1e6) - - for k in self._values: - info += ' - %s:' % k - if isinstance(self._values[k], list): - avg = self._values[k][0] / max(1, self._values[k][1]) - # avg = mean( ) - if abs(avg) > 1e-3: - info += ' %.3f' % avg - else: - info += ' %.3e' % avg - else: - info += ' %s' % self._values[k] - - self._total_width += len(info) - if prev_total_width > self._total_width: - info += (' ' * (prev_total_width - self._total_width)) - - display_str = bar + info - - if self._dynamic_display: - prev_total_width = self._total_width - self._total_width = len(display_str) - # ASU: if \r doesn't work, use \b - to move cursor one char back - display_str = '\r' + display_str + ' ' * max(0, prev_total_width - len(display_str)) - else: - display_str = display_str + '\n' - if self.target is not None and current >= self.target: - display_str += '\n' - self.stdout.write(display_str) - self.stdout.flush() - - if self.verbose: - if self.target is None or current >= self.target: - for k in self._values: - info += ' - %s:' % k - avg = self._values[k][0] / max(1, self._values[k][1]) - # avg = mean() - if avg > 1e-3: - info += ' %.3f' % avg - else: - info += ' %.3e' % avg - - display_str = info - if self._dynamic_display: - prev_total_width = self._total_width - self._total_width = len(display_str) - # ASU: if \r doesn't work, use \b - to move cursor one char back - display_str = '\r' + display_str + ' ' * max(0, prev_total_width - len(display_str)) - else: - display_str = display_str + '\n' - self.stdout.write(display_str) - self.stdout.flush() - - self._last_update = now - - def add(self, n, values=None): - self.update(self._seen_so_far + n, values) - - def __call__(self, el: _K) -> _K: - """ - It's intended to be used from a mapper over a stream of values. - It returns the same el - # Example: - >>> from pyxtension.fileutils import Progbar - >>> stream(range(3)).map(Progbar(3)).size() - 1/3 [=========>....................] - ETA: 0s - 2/3 [===================>..........] - ETA: 0s - 3/3 [==============================] - 0s 100ms/step - """ - self.add(1, None) - return el - - -class ReversedCSVReader(Iterable[Dict[str, Any]], PydanticValidated): - def __init__(self, fpath: Path, buf_size: int = 4 * 1024, opener: Any = gzip.open): - """ - :param opener: Callable[..., IO] should accept next parameters ([filename],mode:str, newline:str) - The filename argument can be an actual filename (a str or bytes object), - or an existing file object to read from or write to. - """ - self._fpath = fpath - self._opener = opener - self._buf_size = buf_size - self._fh = None - - def _itr(self) -> Generator[Dict[str, Any], None, None]: - with self._opener(self._fpath, mode="rt", newline='') as in_csv_file: - self._fh = in_csv_file - reader = csv.reader(in_csv_file, delimiter=',', quotechar='"') - input_stream = iter(reader) - columns = next(input_stream) - nr_columns = len(columns) - for row in input_stream: - yield {columns[i]: row[i] for i in range(nr_columns)} - self._fh = None - - def __iter__(self) -> Iterator[_K]: - return iter(self._itr()) - - def __reversed__(self): - return self._reversed_itr() - - def _reversed_byte_reader(self): - with self._opener(self._fpath, "rb") as in_csv_file: - self._fh = in_csv_file - in_csv_file.seek(0, io.SEEK_END) - f_size = in_csv_file.tell() - cur_pos = f_size - while cur_pos > 0: - new_cur_pos = max(0, cur_pos - self._buf_size) - read_sz = cur_pos - new_cur_pos - cur_pos = new_cur_pos - if read_sz: - in_csv_file.seek(new_cur_pos, io.SEEK_SET) - buf = in_csv_file.read(read_sz) - for b in reversed(buf): - yield b - self._fh = None - - def _split_stream_to_unicode_strings(self, s: Iterable): - buf = [] - for b in s: - if b == ord(b'\n'): - if buf: - yield bytes(reversed(buf)).decode('utf-8').strip() - buf = [] - else: - buf.append(b) - if buf: - yield bytes(reversed(buf)).decode('utf-8').strip() - - def _reversed_itr(self) -> Generator[Dict[str, Any], None, None]: - with self._opener(self._fpath, "rt", newline='') as in_csv_file: - reader = csv.reader(in_csv_file, delimiter=',', quotechar='"') - input_stream = iter(reader) - columns = next(input_stream) - nr_columns = len(columns) - reversed_bytes_itr = self._reversed_byte_reader() - prev_row = None # we return only prev row to avoid returning first row that contains column definitions - for unicode_string in self._split_stream_to_unicode_strings(reversed_bytes_itr): - row = unicode_string.split(",") - if prev_row is not None: - yield {columns[i]: prev_row[i] for i in range(nr_columns)} - prev_row = row - - def close(self): - """ - Attention! This will forcefully close the file and the already started generators won't work anymore. - """ - if self._fh is not None: - self._fh.close() diff --git a/py3/pyxtension/models.py b/py3/pyxtension/models.py deleted file mode 100644 index 2ac9a01..0000000 --- a/py3/pyxtension/models.py +++ /dev/null @@ -1,67 +0,0 @@ -# Author: ASU -- -from typing import Any, Callable, cast, Optional - -from json_composite_encoder import JSONCompositeEncoder -from pydantic import BaseModel, Extra - - -class ExtModel(BaseModel): - """ - Extended Model with custom JSON encoder. - Extends the standard Pydantic model functionality by allowing arbitrary types and providing custom encoding. - """ - def json( - self, - *, - include=None, - exclude=None, - by_alias: bool = False, - skip_defaults: bool = None, - exclude_unset: bool = False, - exclude_defaults: bool = False, - exclude_none: bool = False, - encoder: Optional[Callable[[Any], Any]] = None, - **dumps_kwargs: Any, - ) -> str: - """ - Note: we don't override the dict() method since it doesn't need custom encoder, so only here we can apply custom encoders; - - Generate a JSON representation of the model, `include` and `exclude` arguments as per `dict()`. - `encoder` is an optional function to supply as `default` to json.dumps(), other arguments as per `json.dumps()`. - """ - if skip_defaults is not None: - exclude_unset = skip_defaults - encoder = cast(Callable[[Any], Any], encoder or self.__json_encoder__) - data = self.dict( - include=include, - exclude=exclude, - by_alias=by_alias, - exclude_unset=exclude_unset, - exclude_defaults=exclude_defaults, - exclude_none=exclude_none, - ) - if self.__custom_root_type__: - # below is a hardcoding workaround instead of original utils.ROOT_KEY as Pydantic doesn't have it on Unix - data = data["__root__"] - composite_encoder_builder = JSONCompositeEncoder.Builder(encoders=self.__config__.json_encoders) - # Note: using a default arg instead of cls would not call encoder for elements that derive from base types like str or float; - return self.__config__.json_dumps(data, default=encoder, cls=composite_encoder_builder, **dumps_kwargs) - - class Config: - arbitrary_types_allowed = True - - -class ImmutableExtModel(ExtModel): - class Config: - allow_mutation = False - arbitrary_types_allowed = True - extra = Extra.forbid - - -class Singleton(type): - _instances = {} - - def __call__(cls, *args, **kwargs): - if cls not in cls._instances: - cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) - return cls._instances[cls] \ No newline at end of file diff --git a/py3/pyxtension/racelib.py b/py3/pyxtension/racelib.py deleted file mode 100644 index 1af03ff..0000000 --- a/py3/pyxtension/racelib.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -# Author: ASU -- -# Purpose: Concurrent utility classes (name coming from RACEconditionLIBrary) -# Created: 11/26/2015 -import threading -import time - -from typing import Union, Callable - -__author__ = 'andrei.suiu@gmail.com' - - -class ContextLock(): - def __init__(self, lock: threading.Lock): - self.__lock = lock - - def __enter__(self): - self.__lock.acquire() - - def __exit__(self, exc_type, exc_value, traceback): - self.__lock.release() - return False - -class TimePerformanceLogger: - """ - Used to measure the performance of a code block run within a With Statement Context Manager - """ - - def __init__(self, logger: Callable[[float], None] = lambda sec: print("Finished in %.02f sec" % sec)): - """ - :param logger: logger function that would get number of seconds as argument - """ - self._logger = logger - - def __enter__(self): - self._t1 = time.time() - - def __exit__(self, exc_type, exc_value, traceback): - self._logger(time.time() - self._t1) - if exc_type: - return False - return True - - -class CountLogger: - """ - Used to log partial progress of streams - """ - - def __init__(self, log_interval: int = 1000, - msg: str = "\rProcessed %d out of %d in %.01f sec. ETA: %.01f sec", - total: int = -1, - func: Union[Callable[[str], None], Callable[[str, str], None]] = print, - use_end: bool = True): - self._cnt = 0 - self._n = log_interval - self._t0 = None - self._msg = msg - self._func = func - self._use_end = use_end - self._total = total - - def __call__(self, e): - if self._t0 is None: - self._t0 = time.time() - self._cnt += 1 - if self._cnt % self._n == 0: - elapsed = time.time() - self._t0 - eta = elapsed / self._cnt * (self._total-self._cnt) if self._total>0 else float('NaN') - msg = self._msg % (self._cnt, self._total, elapsed, eta) - kwargs = {'end': ''} if self._use_end else {} - self._func(msg, **kwargs) - return e diff --git a/py3/pyxtension/tests/data/ADABTC.agg-60s.tick.csv.gz b/py3/pyxtension/tests/data/ADABTC.agg-60s.tick.csv.gz deleted file mode 100644 index 81552cc..0000000 Binary files a/py3/pyxtension/tests/data/ADABTC.agg-60s.tick.csv.gz and /dev/null differ diff --git a/py3/pyxtension/tests/test_Json.py b/py3/pyxtension/tests/test_Json.py deleted file mode 100644 index dd89cc9..0000000 --- a/py3/pyxtension/tests/test_Json.py +++ /dev/null @@ -1,399 +0,0 @@ -import copy -import json -import sys -import types -import unittest - -from pyxtension.Json import Json, JsonList, FrozenJson -from pyxtension.streams import stream, sdict - -__author__ = 'andrei.suiu@gmail.com' - - -class JsonTestCase(unittest.TestCase): - def setUp(self): - self.d = Json((("a", 2), (3, 4), ("d", {"d2": 4}))) - - def testConstructor(self): - self.assertEqual(Json('{"a":2,"4":"5"}'), {"a": 2, "4": "5"}) - self.assertEqual(Json((("a", 2), ("4", "5"))), {"a": 2, "4": "5"}) - self.assertEqual(Json(b'{"a":2,"4":"5"}'), {"a": 2, "4": "5"}) - - def testBase(self): - self.assertEqual(self.d.a, 2) - self.assertEqual(self.d["a"], 2) - self.assertEqual(self.d.b.c, {}) - self.assertEqual(self.d.d.d2, 4) - self.assertIsInstance(self.d.keys(), stream) - self.assertIsInstance(self.d.values(), stream) - - def test_strBasics(self): - self.assertEqual(json.dumps({"a": "4"}, separators=(',', ':')), str(Json({"a": "4"}))) - j = Json((("a", 2), ("3", 4), ("d", {"d2": 4}))) - serializedJson = str(j) - newObj = json.loads(serializedJson) - self.assertDictEqual(newObj, j) - - def test_repr_from_dict(self): - d = {'a': 'a'} - j = Json(d) - self.assertEqual(repr(j), repr(d)) - - def test_repr_used_setattr(self): - j = Json() - j.a = 'a' - self.assertEqual(repr(j), repr({'a': 'a'})) - - def test_forbiden_attrs(self): - j = Json() - with self.assertRaises(AttributeError): - j.__methods__() - - def testUpdateItems(self): - d = Json((("a", 2), (3, 4), ("d", {"d2": 4}))) - d.d.d2 = 3 - self.assertEqual(d.d.d2, 3) - - def testSpecialKeys(self): - d = Json((("__init__", 2), (3, 4), ("d", {"d2": 4}))) - self.assertEqual(d["__init__"], 2) - self.assertNotEqual(d.__init__, 2) - self.assertIsInstance(d.__init__, types.MethodType) - - def testIteritems(self): - b = self.d.items().toList() - self.assertEqual(self.d.keys().toList(), self.d.toList()) - self.assertEqual(b[2][1].d2, 4) - self.assertIsInstance(b[2][1], Json) - self.assertIsInstance(self.d.items(), stream) - self.assertEqual(self.d.items().toList(), [('a', 2), (3, 4), ('d', {'d2': 4})]) - self.assertEqual(self.d.items()[2][1].d2, 4) - self.assertIsInstance(self.d.items(), stream) - self.assertEqual(self.d.items().sorted(key=str).toList(), [('a', 2), ('d', {'d2': 4}), (3, 4)]) - self.assertEqual(Json({1: 1, 2: 2, 3: 3}).values().sum(), 6) - - def testJsonList(self): - jlist = Json({'a': [1, 2, {'b': [{'c': 3}, {'d': 4}]}]}) - self.assertEqual(jlist.a[2], {'b': [{'c': 3}, {'d': 4}]}) - self.assertEqual(jlist.a[2].b[1].d, 4) - - def testJsonSetValues(self): - self.d.c = "set" - self.assertEqual(self.d.c, "set") - - def test_toOrigNominal(self): - j = Json() - j.a = Json({'b': 'c'}) - j.toString() - j.toOrig() - repr(j) - d = j.toOrig() - - self.assertIsInstance(d, sdict) - self.assertDictEqual(d, {'a': {'b': 'c'}}) - - def test_NoneValueRemainsNone(self): - j = Json({'a': None}) - self.assertIs(j.a, None) - - def test_ConvertSetToList(self): - j = Json() - j.st = set((1, 2)) - d = j.toOrig() - self.assertIsInstance(d, sdict) - self.assertDictEqual({'st': set([1, 2])}, d) - - def test_serializeDeserialize(self): - serialized = '{"command":"put","details":{"cookie":"cookie1","platform":"fb"}}' - j = Json(serialized) - self.assertEqual(serialized, j.toString()) - -TEST_VAL = [1, 2, 3] -TEST_DICT = {'a': {'b': {'c': TEST_VAL}}} -TEST_DICT_STR = str(TEST_DICT) - - -class TestsFromAddict(unittest.TestCase): - def test_set_one_level_item(self): - some_dict = {'a': TEST_VAL} - prop = Json() - prop['a'] = TEST_VAL - self.assertDictEqual(prop, some_dict) - - def test_set_two_level_items(self): - some_dict = {'a': {'b': TEST_VAL}} - prop = Json() - prop['a']['b'] = TEST_VAL - self.assertDictEqual(prop, some_dict) - - def test_set_three_level_items(self): - prop = Json() - prop['a']['b']['c'] = TEST_VAL - self.assertDictEqual(prop, TEST_DICT) - - def test_set_one_level_property(self): - prop = Json() - prop.a = TEST_VAL - self.assertDictEqual(prop, {'a': TEST_VAL}) - - def test_set_two_level_properties(self): - prop = Json() - prop.a.b = TEST_VAL - self.assertDictEqual(prop, {'a': {'b': TEST_VAL}}) - - def test_set_three_level_properties(self): - prop = Json() - prop.a.b.c = TEST_VAL - self.assertDictEqual(prop, TEST_DICT) - - def test_init_with_dict(self): - self.assertDictEqual(TEST_DICT, Json(TEST_DICT)) - - def test_init_with_kws(self): - prop = Json(a=2, b={'a': 2}, c=[{'a': 2}]) - self.assertDictEqual(prop, {'a': 2, 'b': {'a': 2}, 'c': [{'a': 2}]}) - - def test_init_with_tuples(self): - prop = Json((0, 1), (1, 2), (2, 3)) - self.assertDictEqual(prop, {0: 1, 1: 2, 2: 3}) - - def test_init_with_list(self): - prop = Json([(0, 1), (1, 2), (2, 3)]) - self.assertDictEqual(prop, {0: 1, 1: 2, 2: 3}) - - def test_init_with_generator(self): - prop = Json(((i, i + 1) for i in range(3))) - self.assertDictEqual(prop, {0: 1, 1: 2, 2: 3}) - - def test_init_raises(self): - def init(): - Json(5) - - self.assertRaises(TypeError, init) - - def test_init_with_empty_stuff(self): - a = Json({}) - b = Json([]) - self.assertDictEqual(a, {}) - self.assertDictEqual(b, {}) - - def test_init_with_list_of_dicts(self): - a = Json({'a': [{'b': 2}]}) - self.assertIsInstance(a.a[0], Json) - self.assertEqual(a.a[0].b, 2) - - def test_getitem(self): - prop = Json(TEST_DICT) - self.assertEqual(prop['a']['b']['c'], TEST_VAL) - - def test_getattr(self): - prop = Json(TEST_DICT) - self.assertEqual(prop.a.b.c, TEST_VAL) - - def test_isinstance(self): - self.assertTrue(isinstance(Json(), dict)) - - def test_str(self): - prop = Json(TEST_DICT) - self.assertEqual(str(prop), json.dumps(TEST_DICT, separators=(',', ':'))) - - def test_delitem(self): - prop = Json({'a': 2}) - del prop['a'] - self.assertDictEqual(prop, {}) - - def test_delitem_nested(self): - prop = Json(TEST_DICT) - del prop['a']['b']['c'] - self.assertDictEqual(prop, {'a': {'b': {}}}) - - def test_delattr(self): - prop = Json({'a': 2}) - del prop.a - self.assertDictEqual(prop, {}) - - def test_delattr_nested(self): - prop = Json(TEST_DICT) - del prop.a.b.c - self.assertDictEqual(prop, {'a': {'b': {}}}) - - def test_delitem_delattr(self): - prop = Json(TEST_DICT) - del prop.a['b'] - self.assertDictEqual(prop, {'a': {}}) - - def test_complex_nested_structure(self): - prop = Json() - prop.a = [[Json(), 2], [[]], [1, [2, 3], 0]] - self.assertDictEqual(prop, {'a': [[{}, 2, ], [[]], [1, [2, 3], 0]]}) - - def test_tuple_key(self): - prop = Json() - prop[(1, 2)] = 2 - self.assertDictEqual(prop, {(1, 2): 2}) - self.assertEqual(prop[(1, 2)], 2) - - def test_set_prop_invalid(self): - prop = Json() - - def set_keys(): - prop.keys = 2 - - def set_items(): - prop.items = 3 - - self.assertRaises(AttributeError, set_keys) - self.assertRaises(AttributeError, set_items) - self.assertDictEqual(prop, {}) - - def test_dir_with_members(self): - prop = Json({'__members__': 1}) - dir(prop) - self.assertTrue('__members__' in prop.keys()) - - def test_to_dict(self): - nested = {'a': [{'a': 0}, 2], 'b': {}, 'c': 2} - prop = Json(nested) - regular = prop.toOrig() - self.assertDictEqual(regular, prop) - self.assertDictEqual(regular, nested) - self.assertNotIsInstance(regular, Json) - with self.assertRaises(AttributeError): - regular.a - - def get_attr_deep(): - return regular['a'][0].a - - self.assertRaises(AttributeError, get_attr_deep) - - def test_to_dict_with_tuple(self): - nested = {'a': ({'a': 0}, {2: 0})} - prop = Json(nested) - regular = prop.toOrig() - self.assertDictEqual(regular, prop) - self.assertDictEqual(regular, nested) - self.assertIsInstance(regular['a'], tuple) - self.assertNotIsInstance(regular['a'][0], Json) - - def test_update(self): - old = Json() - old.child.a = 'old a' - old.child.b = 'old b' - old.foo = 'no dict' - - new = Json() - new.child.b = 'new b' - new.child.c = 'new c' - new.foo.now_my_papa_is_a_dict = True - - old.update(new) - - reference = {'foo': {'now_my_papa_is_a_dict': True}, - 'child': {'c': 'new c', 'b': 'new b'}} - - self.assertDictEqual(old, reference) - - def test_update_with_lists(self): - org = Json() - org.a = [1, 2, {'a': 'superman'}] - someother = Json() - someother.b = [{'b': 123}] - org.update(someother) - - correct = {'a': [1, 2, {'a': 'superman'}], - 'b': [{'b': 123}]} - - org.update(someother) - self.assertDictEqual(org, correct) - self.assertIsInstance(org.b[0], Json) - - def test_copy(self): - class MyMutableObject(object): - def __init__(self): - self.attribute = None - - foo = MyMutableObject() - foo.attribute = True - - a = Json() - a.immutable = 42 - a.mutable = foo - - b = a.copy() - - # immutable object should not change - b.immutable = 21 - self.assertEqual(a.immutable, 42) - - # mutable object should change - b.mutable.attribute = False - self.assertEqual(a.mutable.attribute, b.mutable.attribute) - - # changing child of b should not affect a - b.child = "new stuff" - self.assertTrue(isinstance(a.child, Json)) - - def test_deepcopy(self): - class MyMutableObject(object): - def __init__(self): - self.attribute = None - - foo = MyMutableObject() - foo.attribute = True - - a = Json() - a.child.immutable = 42 - a.child.mutable = foo - - b = copy.deepcopy(a) - - # immutable object should not change - b.child.immutable = 21 - self.assertEqual(a.child.immutable, 42) - - # mutable object should not change - b.child.mutable.attribute = False - self.assertTrue(a.child.mutable.attribute) - - # changing child of b should not affect a - b.child = "new stuff" - self.assertTrue(isinstance(a.child, Json)) - - def test_equal_objects_nominal(self): - j1 = Json({'a': 1, 'b': {'c': 'd'}}) - j2 = Json({'a': 1, 'b': {'c': 'd'}}) - j3 = Json({'a': 1, 'b': {'c': 'e'}}) - self.assertEqual(j1, j2) - self.assertNotEqual(j1, j3) - - def test_JsonList_converts_tuples(self): - jl = JsonList([(Json(), 2), [[]], [1, (2, 3), 0]]) - self.assertListEqual(jl, [[{}, 2, ], [[]], [1, [2, 3], 0]]) - - def test_FrozenJson_nominal(self): - frozenJson = FrozenJson({'a': 'b'}) - self.assertEqual(frozenJson.a, 'b') - with self.assertRaises(TypeError): - frozenJson.a = 'c' - with self.assertRaises(TypeError): - frozenJson.b = 'c' - - def test_FrozenJson_hash(self): - d1 = {'a': 'b'} - fj1 = FrozenJson(d1) - d1['b'] = 'c' - fj2 = FrozenJson(d1) - del d1['b'] - fj3 = FrozenJson(d1) - self.assertEqual(fj1, fj3) - self.assertNotEqual(fj1, fj2) - self.assertSetEqual(set([fj1, fj2, fj3]), set([fj1, fj2])) - self.assertTrue(set([fj1, fj2]) <= set([fj2, fj3])) - - -""" -Allow for these test cases to be run from the command line -""" -if __name__ == '__main__': - all_tests = unittest.TestLoader().loadTestsFromModule(sys.modules[__name__]) - unittest.TextTestRunner(verbosity=2).run(all_tests) diff --git a/py3/pyxtension/tests/test_fileutils.py b/py3/pyxtension/tests/test_fileutils.py deleted file mode 100644 index e936e2b..0000000 --- a/py3/pyxtension/tests/test_fileutils.py +++ /dev/null @@ -1,150 +0,0 @@ -#!/usr/bin/env python -# coding:utf-8 -# Author: ASU -- -# Purpose: -# Created: 9/22/2019 -import gzip -import io -from operator import itemgetter -from pathlib import Path -from unittest import TestCase, main - -from pyxtension.fileutils import Progbar, ReversedCSVReader -from pyxtension.streams import stream - -__author__ = 'andrei.suiu@gmail.com' - - -class TestFileutils(TestCase): - TEST_VALUES_S = [None, - [['key1', 1], ['key2', 1e-4]], - [['key3', 1], ['key2', 1e-4]]] - - def test_Progbar_update(self): - times = [x * 0.1 for x in range(20, 4, -1)] - timer = lambda: times.pop() - verbose = False - target = len(self.TEST_VALUES_S) - 1 - out = io.StringIO() - bar = Progbar(target, width=30, verbose=verbose, interval=0.05, stdout=out, timer=timer) - for current, values in enumerate(self.TEST_VALUES_S): - bar.update(current, values=values) - expected = ("0/2 [..............................] - ETA: 0s\n" - "1/2 [==============>...............] - ETA: 0s - key1: 1.000 - key2: 1.000e-04\n" - "2/2 [==============================] - 0s 150ms/step - key1: 1.000 - key2: 1.000e-04 - key3: 1.000\n\n" - ) - self.assertEqual(expected, out.getvalue()) - target = None - out = io.StringIO() - bar = Progbar(target, width=30, verbose=verbose, interval=0.05, stdout=out, timer=timer) - for current, values in enumerate(self.TEST_VALUES_S): - bar.update(current, values=values) - expected = (" 0/Unknown - 0s 0us/step\n" - " 1/Unknown - 0s 200ms/step - key1: 1.000 - key2: 1.000e-04\n" - " 2/Unknown - 0s 150ms/step - key1: 1.000 - key2: 1.000e-04 - key3: 1.000\n") - self.assertEqual(expected, out.getvalue()) - - def test_Progbar_update_verbose(self): - times = [x * 0.1 for x in range(20, 4, -1)] - timer = lambda: times.pop() - verbose = True - target = len(self.TEST_VALUES_S) - 1 - out = io.StringIO() - bar = Progbar(target, width=30, verbose=verbose, interval=0.05, stdout=out, timer=timer, dynamic_display=True) - for current, values in enumerate(self.TEST_VALUES_S): - bar.update(current, values=values) - expected = ( - '\r0/2 [..............................] - ETA: 0s\r1/2 [==============>...............] - ETA: 0s - key1: 1.000 - key2: 1.000e-04 ' - '\r2/2 [==============================] - 0s 150ms/step - key1: 1.000 - key2: 1.000e-04 - key3: 1.000 \n' - '\r - 0s 150ms/step - key1: 1.000 - key2: 1.000e-04 - key3: 1.000 - key1: 1.000 - key2: 1.000e-04 - key3: 1.000' - ) - - self.assertEqual(expected, out.getvalue()) - target = None - out = io.StringIO() - bar = Progbar(target, width=30, verbose=verbose, interval=0.05, stdout=out, timer=timer, dynamic_display=True) - for current, values in enumerate(self.TEST_VALUES_S): - bar.update(current, values=values) - expected = ('\r 0/Unknown - 0s 0us/step' - '\r - 0s 0us/step ' - '\r 1/Unknown - 0s 200ms/step - key1: 1.000 - key2: 1.000e-04' - '\r - 0s 200ms/step - key1: 1.000 - key2: 1.000e-04 - key1: 1.000 - key2: 1.000e-04' - '\r 2/Unknown - 0s 150ms/step - key1: 1.000 - key2: 1.000e-04 - key3: 1.000 ' - '\r - 0s 150ms/step - key1: 1.000 - key2: 1.000e-04 - key3: 1.000 - key1: 1.000 - key2: 1.000e-04 - key3: 1.000') - self.assertEqual(expected, out.getvalue()) - - def test_Progbar_add(self): - times = [x * 0.1 for x in range(20, 4, -1)] - timer = lambda: times.pop() - verbose = False - target = len(self.TEST_VALUES_S) - out = io.StringIO() - bar = Progbar(target, width=30, verbose=verbose, interval=0.05, stdout=out, timer=timer) - for current, values in enumerate(self.TEST_VALUES_S): - bar.add(1, values=values) - expected = ('1/3 [=========>....................] - ETA: 0s\n' - '2/3 [===================>..........] - ETA: 0s - key1: 1.000 - key2: 1.000e-04\n' - '3/3 [==============================] - 0s 100ms/step - key1: 1.000 - key2: 1.000e-04 - key3: 1.000\n\n') - self.assertEqual(expected, out.getvalue()) - - def test_Progbar_mapper_use_unknown_size(self): - times = [x * 0.1 for x in range(200, 4, -1)] - timer = lambda: times.pop() - verbose = False - target = None - out = io.StringIO() - bar = Progbar(target, width=30, verbose=verbose, interval=0.05, stdout=out, timer=timer, dynamic_display=True) - for value in range(3): - self.assertEqual(value, bar(value)) - expected = ('\r 1/Unknown - 0s 100ms/step' - '\r 2/Unknown - 0s 100ms/step ' - '\r 3/Unknown - 0s 100ms/step ' - ) - self.assertEqual(expected, out.getvalue()) - - def test_Progbar_mapper_use_known_size(self): - times = [x * 0.1 for x in range(200, 4, -1)] - timer = lambda: times.pop() - verbose = False - target = 3 - out = io.StringIO() - bar = Progbar(target, width=30, verbose=verbose, interval=0.05, stdout=out, timer=timer, dynamic_display=True) - self.assertListEqual(list(range(target)), stream(range(target)).map(bar).toList()) - expected = ('\r1/3 [=========>....................] - ETA: 0s' - '\r2/3 [===================>..........] - ETA: 0s ' - '\r3/3 [==============================] - 0s 100ms/step \n' - ) - self.assertEqual(expected, out.getvalue()) - - def test_Progbar_mapper_use_dynamic_nominal(self): - times = [x * 0.1 for x in range(200, 4, -1)] - timer = lambda: times.pop() - verbose = False - target = 3 - out = io.StringIO() - bar = Progbar(target, width=30, verbose=verbose, interval=0.05, stdout=out, timer=timer, dynamic_display=True) - self.assertListEqual(list(range(target)), stream(range(target)).map(bar).toList()) - expected = ('\r1/3 [=========>....................] - ETA: 0s' - '\r2/3 [===================>..........] - ETA: 0s ' - '\r3/3 [==============================] - 0s 100ms/step \n') - self.assertEqual(expected, out.getvalue()) - -class TestReversedCSVReader(TestCase): - TESTS_ROOT = Path(__file__).absolute().parent - - def test_reversed_itr(self): - reader = ReversedCSVReader(self.TESTS_ROOT / 'data' / 'ADABTC.agg-60s.tick.csv.gz', buf_size=128) - in_order = list(reversed(list(stream(reader).map(itemgetter('time'))))) - rev_order = stream(reader).reversed().map(itemgetter('time')).toList() - self.assertListEqual(in_order, rev_order) - - def test_opener_nominal(self): - opener = lambda filename, mode, newline=None:gzip.open(filename, mode, newline=newline) - reader = ReversedCSVReader(self.TESTS_ROOT / 'data' / 'ADABTC.agg-60s.tick.csv.gz', buf_size=128, opener=opener) - in_order = list(reversed(list(stream(reader).map(itemgetter('time'))))) - rev_order = stream(reader).reversed().map(itemgetter('time')).toList() - self.assertListEqual(in_order, rev_order) - - -if __name__ == '__main__': - main() diff --git a/py3/pyxtension/tests/test_frozendict.py b/py3/pyxtension/tests/test_frozendict.py deleted file mode 100644 index 13a5402..0000000 --- a/py3/pyxtension/tests/test_frozendict.py +++ /dev/null @@ -1,42 +0,0 @@ -__author__ = 'andrei.suiu@gmail.com' - -from unittest import TestCase, main - -from pyxtension import frozendict - - -class TestFrozendict(TestCase): - def test_immutable(self): - fd = frozendict(a=1, b=2) - with self.assertRaises(TypeError): - fd["a"] = 2 - with self.assertRaises(TypeError): - fd.update({1:2}) - with self.assertRaises(TypeError): - del fd["a"] - with self.assertRaises(TypeError): - fd.clear() - - def test_empty(self): - fd_empty = frozendict({}) - self.assertTrue(fd_empty == frozendict([]) == frozendict({}, **{})) - - def test_setattr(self): - fd = frozendict(a=1, b=2) - with self.assertRaises(AttributeError): - fd._initialized = True - - def test_copy(self): - fd = frozendict(a=1, b=2) - fd2 = fd.copy() - self.assertIs(fd, fd2) - - def test_clone(self): - fd = frozendict(a=1, b=2) - fd2 = frozendict(dict(fd)) - self.assertEqual(fd, fd2) - self.assertEqual(hash(fd), hash(fd2)) - - -if __name__ == '__main__': - main() diff --git a/py3/pyxtension/tests/test_models.py b/py3/pyxtension/tests/test_models.py deleted file mode 100644 index ae3030d..0000000 --- a/py3/pyxtension/tests/test_models.py +++ /dev/null @@ -1,33 +0,0 @@ -# Author: ASU -- -from unittest import TestCase - -from tsx import TS - -from py3.pyxtension.models import ExtModel - - -class TestExtModel(TestCase): - def test_json(self): - class CustomFloat(float): - pass - - class A(ExtModel): - ts: TS - - class Config: - json_encoders = { - TS: TS.as_iso.fget - } - - class B(A): - cf: CustomFloat - - class Config: - json_encoders = { - CustomFloat: lambda cf: cf + 0.5, } - - ts = TS("2023-04-12T00:00:00Z") - a = B(ts=ts, cf=CustomFloat(1.)) - result = a.json() - expected = '{"ts": "2023-04-12T00:00:00Z", "cf": 1.5}' - self.assertEqual(result, expected) diff --git a/py3/pyxtension/tests/test_synchronizedBufferedStream.py b/py3/pyxtension/tests/test_synchronizedBufferedStream.py deleted file mode 100644 index bf595ea..0000000 --- a/py3/pyxtension/tests/test_synchronizedBufferedStream.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -# Author: ASU -- -# Purpose: -# Created: 11/18/2015 -from unittest import TestCase, main - -from pyxtension.streams import SynchronizedBufferedStream, slist - -__author__ = 'andrei.suiu@gmail.com' - - -class TestSynchronizedBufferedStream(TestCase): - def test_nominal(self): - s = SynchronizedBufferedStream((slist(range(i)) for i in range(1, 4))) - self.assertListEqual(s.toList(), [0, 0, 1, 0, 1, 2]) - - -if __name__ == '__main__': - main() diff --git a/py3/pyxtension/tests/test_throttler.py b/py3/pyxtension/tests/test_throttler.py deleted file mode 100644 index 95aeb7c..0000000 --- a/py3/pyxtension/tests/test_throttler.py +++ /dev/null @@ -1,17 +0,0 @@ -from unittest import TestCase -from unittest.mock import patch, MagicMock, call - -from pyxtension.streams import stream -from pyxtension.throttler import Throttler - - -class TestThrottler(TestCase): - @patch('time.sleep', return_value=None) - def test_throttle(self, sleep_mock): - time_func = MagicMock() - time_func.side_effect = [1, 1, 1, 6, 6, 8, 9, 11, 12, 13] - throttler = Throttler(2, 3, time_func) # max 2 requests per 3 seconds - lst = stream(range(10)).map(throttler.throttle).to_list() - self.assertListEqual(sleep_mock.call_args_list, - [call(3), call(3), call(3), call(1), call(2), call(1), call(2), call(2)]) - self.assertEqual(lst, list(range(10))) diff --git a/py3/pyxtension/throttler.py b/py3/pyxtension/throttler.py deleted file mode 100644 index c0186b9..0000000 --- a/py3/pyxtension/throttler.py +++ /dev/null @@ -1,23 +0,0 @@ -from collections import deque -import time -from typing import Any, Callable - - -class Throttler: - def __init__(self, max_req: int, period: float, time_func: Callable[[], float] = time.time) -> None: - self._max_req = max_req - self._period = period - self._request_timestamps = deque(maxlen=max_req) - self._req_cnt = 0 - self._time_func = time_func - - def throttle(self, val: Any) -> Any: - t1 = self._time_func() - self._req_cnt += 1 - self._request_timestamps.append(t1) - if len(self._request_timestamps) >= self._max_req: - t0 = self._request_timestamps[0] - dt = t1 - t0 - if dt < self._period: - time.sleep(self._period - dt) - return val diff --git a/requirements-dev.txt b/requirements-dev.txt index 1ee7864..358177c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,3 @@ -r requirements.txt twine>=4.0.1 -tsx>=0.0.1 \ No newline at end of file +pyxtension>=1.15.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 40d37d1..fa15a3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ tqdm>=4.62.0;python_version>="3" pydantic>=1.8.2;python_version>="3" tblib>=1.7.0;python_version>="3" -json-composite-encoder>=1.0.0 \ No newline at end of file +throttlex>=1.0.0 \ No newline at end of file diff --git a/run_tests.py b/run_tests.py index d515595..c25308b 100644 --- a/run_tests.py +++ b/run_tests.py @@ -1,22 +1,15 @@ #!/usr/bin/env python -# coding:utf-8 # Author: ASU -- -# Purpose: -# Created: 11/5/2017 import os import sys import unittest -import io -__author__ = 'ASU' + if __name__ == '__main__': testLoader = unittest.TestLoader() pymajorVersion = sys.version_info[0] - packageDir = os.path.join(os.path.dirname(__file__), "py%d" % pymajorVersion, "pyxtension") + packageDir = os.path.join(os.path.dirname(__file__), "streamerate") testsDir = os.path.join(packageDir, "tests") - #sys.path.append(packageDir) - - # textTestResult = unittest.TextTestResult(io.StringIO(),'',verbosity=1) trunner = unittest.TextTestRunner(sys.stdout, descriptions=True, verbosity=0) testSuite = testLoader.discover(start_dir=testsDir, pattern="test_*.py", top_level_dir=testsDir) res = trunner.run(testSuite) diff --git a/setup.py b/setup.py index 6ea7950..9630c36 100644 --- a/setup.py +++ b/setup.py @@ -1,115 +1,110 @@ -#!/usr/bin/env python -# coding:utf-8 -# Author: ASU -- -# Purpose: -# Created: 12/1/2015 -import os -import sys -from os.path import join -from shutil import copy, rmtree - -__author__ = 'ASU' - -# Bump up this version -VERSION = '1.15.0' +from codecs import open +from os import path from setuptools import setup -from setuptools.command.install import install -from wheel.bdist_wheel import bdist_wheel - -py_modules = ['Json', 'streams', 'racelib', 'fileutils', '__init__'] -basedir = os.path.dirname(__file__) -dest_package_dir = join(basedir, "pyxtension") -try: - os.makedirs(dest_package_dir) -except os.error: - pass - -pyMajorVersion = str(sys.version_info[0]) -if "--py2" in sys.argv: - pyMajorVersion = '2' +__author__ = 'ASU' -src_dir = join(basedir, 'py' + pyMajorVersion, 'pyxtension') -for fname in [f for f in os.listdir(src_dir) if f.endswith(".py")]: - copy(join(src_dir, fname), dest_package_dir) +# Bump up this version +VERSION = '1.0.0' -# ToDo: check if there's still BUG in twine, as if falsely reports in README.md -# line 34: Error: Unexpected indentation. +basedir = path.abspath(path.dirname(__file__)) -long_description = open('README.rst', "rt").read() +# Get the long description from the README file +with open(path.join(basedir, 'README.md'), encoding='utf-8') as f: + long_description = f.read() with open("requirements.txt") as fp: install_requires = fp.read().strip().split("\n") -extras_require = { - 'dev': ['mock;python_version<"3"'], - 'test': ['mock;python_version<"3"'] -} - -if pyMajorVersion == "2": - python_requires = '>=2.7, <3' -elif pyMajorVersion == "3": - python_requires = '>=3.6, <4' -else: - raise Exception("Unknown Python version") - - -class InstallCommand(install, object): - user_options = install.user_options + [('py2', None, "Forces to build Py2 package even if run from Py3")] - - def initialize_options(self): - super(InstallCommand, self).initialize_options() - self.py2 = None - - -class BdistWheelCommand(bdist_wheel, object): - user_options = bdist_wheel.user_options + [('py2', None, "Forces to build Py2 package even if run from Py3")] - - def initialize_options(self): - super(BdistWheelCommand, self).initialize_options() - self.py2 = None - - def finalize_options(self): - super(BdistWheelCommand, self).finalize_options() - # self.root_is_pure = False - - def get_tag(self): - python, abi, plat = super(BdistWheelCommand, self).get_tag() - # We don't contain any python source - if pyMajorVersion == "2": - python, abi = 'py2', 'none' - return python, abi, plat - - -parameters = dict(name='pyxtension', - version=VERSION, - description='Extension library for Python', - long_description=long_description, - long_description_content_type="text/markdown", - author='Andrei Suiu', - author_email='andrei.suiu@gmail.com', - url='https://github.com/asuiu/pyxtension', - packages=['pyxtension'], - python_requires=python_requires, - install_requires=install_requires, - extras_require=extras_require, - data_files=[(".", ["requirements.txt",])], - cmdclass={ - 'install': InstallCommand, - 'bdist_wheel': BdistWheelCommand - }, - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", ]) +print("List of dependencies : {0}".format(str(install_requires))) + +parameters = dict( + name='streamerate', + + # Versions should comply with PEP440. For a discussion on single-sourcing + # the version across setup.py and the project code, see + # https://packaging.python.org/en/latest/single_source_version.html + version=VERSION, + + description='streamerate: a fluent and expressive Python library for chainable iterable processing, inspired by Java 8 streams.', + long_description=long_description, + long_description_content_type="text/markdown", + + # The project's main homepage. + url='https://github.com/asuiu/streamerate', + + author='Andrei Suiu', + author_email='andrei.suiu@gmail.com', + + license='MIT', + + classifiers=[ + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable + 'Development Status :: 5 - Production/Stable', + + # Indicate who your project is intended for + 'Intended Audience :: Developers', + 'Topic :: System :: Logging', + 'Topic :: Software Development :: Libraries', + 'Topic :: Internet :: Log Analysis', + + # Pick your license as you wish (should match "license" above) + 'License :: OSI Approved :: Apache Software License', + + # Specify the Python versions you support basedir. + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy" + ], + + # What does your project relate to? + keywords='stream itertools fluent functional chainable iterable processing', + + # You can just specify the packages manually basedir if your project is + # simple. Or you can use find_packages(). + # packages=find_packages(exclude=['dist', 'docs', 'build', 'tests']), + packages=['streamerate'], + + # Alternatively, if you want to distribute just a my_module.py, uncomment + # this: + # py_modules=["my_module"], + + # List run-time dependencies basedir. These will be installed by pip when + # your project is installed. For an analysis of "install_requires" vs pip's + # requirements files see: + # https://packaging.python.org/en/latest/requirements.html + install_requires=install_requires, + + # List additional groups of dependencies basedir (e.g. development + # dependencies). You can install these using the following syntax, + # for example: + # $ pip install -e .[dev,test] + extras_require={}, + + # If there are data files included in your packages that need to be + # installed, specify them basedir. If using Python 2.6 or less, then these + # have to be included in MANIFEST.in as well. + package_data={}, + + # Although 'package_data' is the preferred approach, in some case you may + # need to place data files outside of your packages. See: + # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa + # In this case, 'data_file' will be installed into '/my_data' + data_files=[], + + # To provide executable scripts, use entry points in preference to the + # "scripts" keyword. Entry points provide cross-platform support and allow + # pip to create the appropriate form of executable for the target platform. + entry_points={}, +) setup(**parameters) - -# clean-up -rmtree(dest_package_dir) diff --git a/streamerate/__init__.py b/streamerate/__init__.py new file mode 100644 index 0000000..7a837f0 --- /dev/null +++ b/streamerate/__init__.py @@ -0,0 +1 @@ +from .streams import stream, slist, sset, sdict, defaultstreamdict, smap, sfilter, iter_except, TqdmMapper, AbstractSynchronizedBufferedStream, buffered_stream diff --git a/py3/pyxtension/streams.py b/streamerate/streams.py similarity index 94% rename from py3/pyxtension/streams.py rename to streamerate/streams.py index c5bb372..85b60f2 100644 --- a/py3/pyxtension/streams.py +++ b/streamerate/streams.py @@ -1,7 +1,5 @@ -#!/usr/bin/python -# coding:utf-8 # Author: ASU -- -# Purpose: utility library for >=Python3.6 +# Purpose: utility library for >=Python3.8 import collections import io import itertools @@ -11,7 +9,7 @@ import struct import sys import threading -from abc import ABC +from abc import ABC, abstractmethod from collections import abc, defaultdict from functools import partial, reduce from itertools import groupby @@ -25,16 +23,7 @@ NamedTuple, Optional, overload, Set, Tuple, TypeVar, Union from tblib import pickling_support - -from pyxtension import validate, PydanticValidated -from pyxtension.throttler import Throttler - -ifilter = filter -imap = map -izip = zip -xrange = range -from pyxtension.fileutils import openByExtension - +from throttlex import Throttler from tqdm import tqdm __author__ = 'andrei.suiu@gmail.com' @@ -59,15 +48,20 @@ def __iter__(self) -> Iterator[_T_co]: return iter(self._f()) -class EndQueue: +class _EndQueue: pass -class MapException: +class _MapException: def __init__(self, exc_info): self.exc_info = exc_info +class _QElement(NamedTuple): + i: int + el: Any + + class TqdmMapper: def __init__(self, *args, **kwargs) -> None: @@ -81,12 +75,6 @@ def __call__(self, el: _K) -> _K: self._tqdm.update() return el - -class _QElement(NamedTuple): - i: int - el: Any - - class _IStream(Iterable[_K], ABC): @staticmethod def _init_itr(itr: Optional[Union[Iterator[_K], Callable[[], Iterable[_K]]]] = None) -> Tuple[ @@ -111,47 +99,47 @@ def _init_itr(itr: Optional[Union[Iterator[_K], Callable[[], Iterable[_K]]]] = N def __fastmap_thread(f, qin, qout): while True: el = qin.get() - if isinstance(el, EndQueue): + if isinstance(el, _EndQueue): qin.put(el) return try: newEl = f(el) qout.put(newEl) except: - qout.put(MapException(sys.exc_info())) + qout.put(_MapException(sys.exc_info())) @staticmethod def __mtmap_thread(f, qin, qout): """ - :type qin: Queue[Union[_QElement, EndQueue]] - :type qout: Queue[Union[_QElement, MapException]] + :type qin: Queue[Union[_QElement, _EndQueue]] + :type qout: Queue[Union[_QElement, _MapException]] """ while True: q_el = qin.get() - if isinstance(q_el, EndQueue): + if isinstance(q_el, _EndQueue): qin.put(q_el) return try: newEl = f(q_el.el) qout.put(_QElement(q_el.i, newEl)) except: - qout.put(MapException(sys.exc_info())) + qout.put(_MapException(sys.exc_info())) @staticmethod def __fastFlatMap_thread(f, qin, qout): while True: itr = qin.get() - if isinstance(itr, EndQueue): + if isinstance(itr, _EndQueue): qin.put(itr) - qout.put(EndQueue()) + qout.put(_EndQueue()) return try: newItr = f(itr) for el in newItr: qout.put(el) except: - qout.put(MapException(sys.exc_info())) + qout.put(_MapException(sys.exc_info())) def __fastmap_generator(self, f: Callable[[_K], _V], poolSize: int, bufferSize: int): qin = Queue(bufferSize) @@ -177,25 +165,25 @@ def __fastmap_generator(self, f: Callable[[_K], _V], poolSize: int, bufferSize: try: el = next(itr) except StopIteration: - qin.put(EndQueue()) + qin.put(_EndQueue()) for t in threadPool: t.join() while not qout.empty(): newEl = qout.get() - if isinstance(newEl, MapException): + if isinstance(newEl, _MapException): raise newEl.exc_info[0](newEl.exc_info[1]).with_traceback(newEl.exc_info[2]) yield newEl break else: qin.put(el) newEl = qout.get() - if isinstance(newEl, MapException): + if isinstance(newEl, _MapException): raise newEl.exc_info[0](newEl.exc_info[1]).with_traceback(newEl.exc_info[2]) yield newEl finally: while not qin.empty(): qin.get() - qin.put(EndQueue()) + qin.put(_EndQueue()) while not qout.empty() or not qout.empty(): qout.get() for t in threadPool: @@ -237,19 +225,20 @@ def wait_for_all(): nonlocal cache while not qout.empty(): q_el = qout.get() - if isinstance(q_el, MapException): + if isinstance(q_el, _MapException): raise q_el.exc_info[0](q_el.exc_info[1]).with_traceback(q_el.exc_info[2]) cache[q_el.i] = q_el.el for el in extract_all_from_cache(): yield el - validate(out_i == in_i + 1, "__mtmap_generator Expecting for all elements to be in cache") + if out_i != in_i + 1: + raise RuntimeError("__mtmap_generator Expecting for all elements to be in cache") try: while 1: try: el = next(itr) except StopIteration: - qin.put(EndQueue()) + qin.put(_EndQueue()) for t in threadPool: t.join() for el in wait_for_all(): @@ -259,7 +248,7 @@ def wait_for_all(): in_i += 1 qin.put(_QElement(in_i, el)) q_el = qout.get() - if isinstance(q_el, MapException): + if isinstance(q_el, _MapException): raise q_el.exc_info[0](q_el.exc_info[1]).with_traceback(q_el.exc_info[2]) cache[q_el.i] = q_el.el for el in extract_all_from_cache(): @@ -267,7 +256,7 @@ def wait_for_all(): finally: while not qin.empty(): qin.get() - qin.put(EndQueue()) + qin.put(_EndQueue()) while not qout.empty() or not qout.empty(): qout.get() for t in threadPool: @@ -279,7 +268,7 @@ def __fastFlatMap_input_thread(itr: Iterator[_K], qin: Queue): try: el = next(itr) except StopIteration: - qin.put(EndQueue()) + qin.put(_EndQueue()) return else: qin.put(el) @@ -307,9 +296,9 @@ def __fastFlatMap_generator(self, predicate, poolSize: int, bufferSize: int): qout_counter = 0 while qout_counter < len(threadPool): newEl = qout.get() - if isinstance(newEl, MapException): + if isinstance(newEl, _MapException): raise newEl.exc_info[0](newEl.exc_info[1]).with_traceback(newEl.exc_info[2]) - if isinstance(newEl, EndQueue): + if isinstance(newEl, _EndQueue): qout_counter += 1 if qout_counter >= len(threadPool): inputThread.join() @@ -317,26 +306,26 @@ def __fastFlatMap_generator(self, predicate, poolSize: int, bufferSize: int): t.join() while not qout.empty(): newEl = qout.get() - if isinstance(newEl, MapException): + if isinstance(newEl, _MapException): raise newEl.exc_info[0](newEl.exc_info[1]).with_traceback(newEl.exc_info[2]) yield newEl else: yield newEl @staticmethod - def exc_info_decorator(f: Callable[[_K], _V], el: _K) -> Union[MapException, _V]: + def exc_info_decorator(f: Callable[[_K], _V], el: _K) -> Union[_MapException, _V]: """This decorates f to pass the exception traceback properly""" try: return f(el) except Exception as e: pickling_support.install(e) - return MapException(sys.exc_info()) + return _MapException(sys.exc_info()) def __mp_pool_generator(self, f: Callable[[_K], _V], poolSize: int, bufferSize: int) -> Generator[_V, None, None]: p = Pool(poolSize) decorated_f_with_exc_passing = partial(self.exc_info_decorator, f) for el in p.imap(decorated_f_with_exc_passing, self, chunksize=bufferSize): - if isinstance(el, MapException): + if isinstance(el, _MapException): raise el.exc_info[0](el.exc_info[1]).with_traceback(el.exc_info[2]) yield el p.close() @@ -348,7 +337,7 @@ def __mp_fast_pool_generator(self, f: Callable[[_K], _V], poolSize: int, bufferS try: decorated_f_with_exc_passing = partial(self.exc_info_decorator, f) for el in p.imap_unordered(decorated_f_with_exc_passing, iter(self), chunksize=bufferSize): - if isinstance(el, MapException): + if isinstance(el, _MapException): raise el.exc_info[0](el.exc_info[1]).with_traceback(el.exc_info[2]) yield el except GeneratorExit: @@ -513,6 +502,9 @@ def fastFlatMap(self, predicate: Callable[[_K], Iterable[_V]] = _IDENTITY_FUNC, raise ValueError("bufferSize should be an integer between 1 and 2^12. Received: %s" % str(poolSize)) return stream(lambda: self.__fastFlatMap_generator(predicate, poolSize, bufferSize)) + def map_stream(self, f: Callable[['_IStream[_K]'], _T]) -> _T: + return f(self) + def enumerate(self) -> 'stream[Tuple[int,_K]]': return stream(zip(range(0, sys.maxsize), self)) @@ -682,10 +674,6 @@ def toSumCounter(self: 'stream[Tuple[_T,_V]]') -> 'sdict[_T,_V]': res[k] = v return res - def toJson(self) -> 'JsonList': - from pyxtension.Json import JsonList - return JsonList(self) - @overload def __getitem__(self, i: slice) -> 'stream[_K]': ... @@ -1110,7 +1098,19 @@ def indexIgnorer(indexSet, _stream): return stream(lambda: indexIgnorer(indexSet, self)) -class stream(_IStream, Iterable[_K], PydanticValidated): +class _PydanticValidated: + @classmethod + def __get_validators__(cls): + yield cls._pydantic_validator + + @classmethod + def _pydantic_validator(cls, v): + if not isinstance(v, cls): + raise TypeError(f'{repr(v)} is of type {type(v)} but is expected to be of {cls}') + return v + + +class stream(_IStream, Iterable[_K], _PydanticValidated): def __init__(self, itr: Optional[Union[Iterator[_K], Callable[[], Iterable[_K]]]] = None): self._itr, self._f = self._init_itr(itr) @@ -1170,25 +1170,21 @@ def __binaryChunksStreamGenerator(fs, format=" 'stream[_V]': - ''' + """ :param statHandler: statistics handler, will be called before every yield with a tuple (n,size) - ''' - if isinstance(readStream, str): - readStream = openByExtension(readStream, mode='r', buffering=2 ** 12) + """ return stream(stream.__binaryChunksStreamGenerator(readStream, format, statHandler)) @staticmethod - def loadFromPickled(file: Union[BinaryIO, str], - format: str = " 'stream[_V]': - ''' + def loadFromPickled(file: BinaryIO, format: str = " 'stream[_V]': + """ :param file: should be path or binary file stream + :param format: format of the header :param statHandler: statistics handler, will be called before every yield with a tuple (n,size) - ''' - if isinstance(file, str): - file = openByExtension(file, mode='r', buffering=2 ** 12) + """ return stream.readFromBinaryChunkStream(file, format, statHandler).map(pickle.loads) @@ -1199,34 +1195,29 @@ class AbstractSynchronizedBufferedStream(stream): """ def __init__(self): - self.__queue = slist() + self.__queue = collections.deque() self.__lock = threading.RLock() - self.__idx = -1 super().__init__() def __next__(self): - self.__lock.acquire() - try: - val = self.__queue[self.__idx] - except IndexError: - self.__queue = self._getNextBuffer() - assert isinstance(self.__queue, slist) - if len(self.__queue) == 0: - raise StopIteration - val = self.__queue[0] - self.__idx = 0 - - self.__idx += 1 - self.__lock.release() - return val + with self.__lock: + try: + val = self.__queue.popleft() + except IndexError: + self.__queue.extend(self._getNextBuffer()) + if len(self.__queue) == 0: + raise StopIteration + val = self.__queue.popleft() + + return val def __iter__(self): return self - def _getNextBuffer(self): + @abstractmethod + def _getNextBuffer(self) -> Iterable[_K]: """ :return: a list of items for the buffer - :rtype: slist[T] """ raise NotImplementedError @@ -1237,16 +1228,16 @@ def __repr__(self): return object.__repr__(self) -class SynchronizedBufferedStream(AbstractSynchronizedBufferedStream): - def __init__(self, iteratorOverBuffers: 'Iterator[slist[_T]]'): - self.__iteratorOverBuffers = iter(iteratorOverBuffers) - super(SynchronizedBufferedStream, self).__init__() +class buffered_stream(AbstractSynchronizedBufferedStream): + def __init__(self, buffers: 'Iterable[Iterable[_T]]'): + self.__buffers = iter(buffers) + super(buffered_stream, self).__init__() - def _getNextBuffer(self) -> 'slist[_T]': + def _getNextBuffer(self) -> Iterable[_T]: try: - return next(self.__iteratorOverBuffers) + return next(self.__buffers) except StopIteration: - return slist() + return [] class sset(set, MutableSet[_K], _IStream): @@ -1444,10 +1435,6 @@ def tqdm(self, desc: Optional[str] = None, total: Optional[int] = None, leave: b return super().tqdm(desc, total, leave, file, ncols, mininterval, maxinterval, ascii, unit, unit_scale, dynamic_ncols, smoothing, initial, position, postfix, gui, **kwargs) - def toJson(self) -> 'sdict[_K,_V]': - from pyxtension.Json import Json - return Json(self) - class defaultstreamdict(sdict): @property diff --git a/py3/pyxtension/tests/__init__.py b/streamerate/tests/__init__.py similarity index 100% rename from py3/pyxtension/tests/__init__.py rename to streamerate/tests/__init__.py diff --git a/py3/pyxtension/tests/test_Streams.py b/streamerate/tests/test_Streams.py similarity index 98% rename from py3/pyxtension/tests/test_Streams.py rename to streamerate/tests/test_Streams.py index 84490cc..4f1b9e8 100644 --- a/py3/pyxtension/tests/test_Streams.py +++ b/streamerate/tests/test_Streams.py @@ -12,9 +12,9 @@ from unittest.mock import MagicMock from pydantic import validate_arguments, ValidationError +from pyxtension.Json import JsonList, Json -from pyxtension.Json import Json, JsonList -from pyxtension.streams import defaultstreamdict, sdict, slist, sset, stream, TqdmMapper +from streamerate.streams import defaultstreamdict, sdict, slist, sset, stream, TqdmMapper ifilter = filter xrange = range @@ -1103,11 +1103,15 @@ def test_pydantic_slist_validation(self): def f(x: slist[int]): return x - s = stream([1, 2]) - self.assertEqual(f(s.toList()), [1, 2]) - with self.assertRaises(ValidationError): - f({1, 2}) - #self.assertEqual(f({1, 2}), [1, 2], "Expect pydantic to convert automatically set to list") + s = stream([1.49, '2']).toList() + converted = f(s) + self.assertEqual(converted, [1, 2]) + self.assertIsInstance(converted, list) + try: + self.assertEqual(f({1, 2}), [1, 2], "Expect pydantic to convert automatically set to list") + except ValidationError: + # This is also a valid behavior on some platforms & Pydantic versions + pass with self.assertRaises(ValidationError): f(dict()) with self.assertRaises(ValidationError): @@ -1117,6 +1121,13 @@ def test_to_list(self): s = stream(range(3)) self.assertListEqual(s.to_list(), [0, 1, 2]) + def test_map_stream(self): + s = stream((("a", 2), (3, 4))) + d = s.map_stream(dict) + self.assertIsInstance(d, dict) + self.assertDictEqual(d, {'a': 2, 3: 4}) + + """ Allow for these test cases to be run from the command line diff --git a/py3/pyxtension/tests/test_abstractSynchronizedBufferedStream.py b/streamerate/tests/test_buffered_stream.py similarity index 66% rename from py3/pyxtension/tests/test_abstractSynchronizedBufferedStream.py rename to streamerate/tests/test_buffered_stream.py index c79c8ca..5fa78e9 100644 --- a/py3/pyxtension/tests/test_abstractSynchronizedBufferedStream.py +++ b/streamerate/tests/test_buffered_stream.py @@ -5,7 +5,7 @@ # Created: 11/18/2015 from unittest import TestCase, main -from pyxtension.streams import AbstractSynchronizedBufferedStream, slist +from streamerate.streams import AbstractSynchronizedBufferedStream, slist, buffered_stream __author__ = 'andrei.suiu@gmail.com' @@ -20,11 +20,16 @@ def __init__(self): def _getNextBuffer(self): self._counter -= 1 if self._counter > 0: - return slist(range(self._counter)) - return slist() + return range(self._counter) + return [] test_stream = TestSyncStream() self.assertListEqual(test_stream.toList(), [0, 1, 2, 0, 1, 0]) +class TestBufferedStream(TestCase): + def test_nominal(self): + s = buffered_stream((slist(range(i)) for i in range(1, 4))) + self.assertListEqual(s.toList(), [0, 0, 1, 0, 1, 2]) + if __name__ == '__main__': main() diff --git a/streamerate/tests/test_import.py b/streamerate/tests/test_import.py new file mode 100644 index 0000000..3706d5a --- /dev/null +++ b/streamerate/tests/test_import.py @@ -0,0 +1,12 @@ +__author__ = 'andrei.suiu@gmail.com' + +import unittest + +from streamerate import stream + + +class SlistTestCase(unittest.TestCase): + def test_slist_nominal(self): + l = [1, 2, 3] + s = stream(range(1, 4)).toList() + self.assertListEqual(s, l) \ No newline at end of file