diff --git a/README.rst b/README.rst index 7327112..c10d93d 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,8 @@ sequences (anything that supports indexing such as lists, arrays, etc.). It was concieved with **delayed evaluation** in mind to help setup and test chained transformation pipelines very quickly. It also supports **slice based indexing** and **assignment** when possible so that you can forget that -you are not working with lists directly! +you are not working with lists directly! Finally it features **prefetching** with either +threads or processes to maximize computation speed. Delayed (aka lazy or ondemand) evaluation is easily understood by looking at this example: @@ -79,6 +80,18 @@ With seqtools: 502 +Batteries included! +------------------- + +The library comes with a set of helper functions to help manipulate sequences: +`concatenation `_, +`batching `_, +`reindexing `_, +`interleaving `_, +`prefetching `_ +and others (suggestions are also welcome). + + Installation ------------ @@ -96,7 +109,8 @@ The documentation is hosted at https://seqtools-doc.readthedocs.io Similar libraries ----------------- -- `Fuel `_ is a higher level library - targeted toward Machine Learning and dataset manipulation. +These libaries provide comparable functionalities, but mostly for iterable containers: + - `torchvision.transforms `_ and `torch.utils.data `_. +- `TensorPack `_ diff --git a/docs/batch.png b/docs/batch.png new file mode 100644 index 0000000..6ffabb2 Binary files /dev/null and b/docs/batch.png differ diff --git a/docs/collate.png b/docs/collate.png new file mode 100644 index 0000000..1647821 Binary files /dev/null and b/docs/collate.png differ diff --git a/docs/concatenate.png b/docs/concatenate.png new file mode 100644 index 0000000..0b0c2f1 Binary files /dev/null and b/docs/concatenate.png differ diff --git a/docs/conf.py b/docs/conf.py index 0603cec..884ed93 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -53,7 +53,7 @@ master_doc = 'index' # General information about the project. -project = 'seqtools' +project = 'SeqTools' copyright = '2017-2018, Nicolas Granger' author = 'Nicolas Granger' @@ -64,7 +64,7 @@ # The full version, including alpha/beta/rc tags. release = pkg_resources.require("seqtools")[0].version # The short X.Y version. -version = "master" if "dev" in release else '.'.join(release.split('.', 2)[:2]) +version = release # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -99,7 +99,7 @@ html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] html_theme_options = { 'collapse_navigation': False, - 'display_version': False + 'display_version': True } # Theme options are theme-specific and customize the look and feel of a theme @@ -175,9 +175,15 @@ # -- Options for Linkcode extension --------------------------------------- -linkcode_revision = "master" if "dev" in release else "v" + version +if "READTHEDOCS" in os.environ: + if os.environ["READTHEDOCS_VERSION"] == "latest": + linkcode_revision = "master" + else: + linkcode_revision = "{{ readthedocs.v1.vcs.version }}" +else: + linkcode_revision = "master" linkcode_url = "https://github.com/nlgranger/SeqTools/blob/" \ - + linkcode_revision + "/{}{}" + + linkcode_revision + "/{filepath}#L{linestart}-L{linestop}" def linkcode_resolve(domain, info): @@ -210,8 +216,8 @@ def linkcode_resolve(domain, info): try: source, lineno = inspect.getsourcelines(obj) except OSError: - linespec = "" + return None else: - linespec = "#L%d-L%d" % (lineno, lineno + len(source) - 1) + linestart, linestop = lineno, lineno + len(source) - 1 - return linkcode_url.format(filepath, linespec) + return linkcode_url.format(filepath=filepath, linestart=linestart, linestop=linestop) diff --git a/docs/cycle.png b/docs/cycle.png new file mode 100644 index 0000000..74cc085 Binary files /dev/null and b/docs/cycle.png differ diff --git a/docs/gather.png b/docs/gather.png new file mode 100644 index 0000000..c658b07 Binary files /dev/null and b/docs/gather.png differ diff --git a/docs/index.rst b/docs/index.rst index 3ca869b..0a3a8f5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,7 +25,8 @@ sequences (anything that supports indexing such as lists, arrays, etc.). It was concieved with **delayed evaluation** in mind to help setup and test chained transformation pipelines very quickly. It also supports **slice based indexing** and **assignment** when possible so that you can forget that -you are not working with lists directly! +you are not working with lists directly! Finally it features **prefetching** with either +threads or processes to maximize computation speed. Delayed (aka lazy or ondemand) evaluation is easily understood by looking at this example: @@ -84,10 +85,23 @@ With seqtools: 502 +Batteries included! +------------------- + +The library comes with a set of helper functions to help manipulate sequences: +`concatenation `_, +`batching `_, +`reindexing `_, +`interleaving `_, +`prefetching `_ +and others (suggestions are also welcome). + + Similar libraries ----------------- -- `Fuel `_ is a higher level library - targeted toward Machine Learning and dataset manipulation. +These libaries provide comparable functionalities, but mostly for iterable containers: + - `torchvision.transforms `_ and `torch.utils.data `_. +- `TensorPack `_ diff --git a/docs/interleaving.png b/docs/interleaving.png new file mode 100644 index 0000000..3ae5714 Binary files /dev/null and b/docs/interleaving.png differ diff --git a/docs/prefetch.png b/docs/prefetch.png new file mode 100644 index 0000000..ffe1496 Binary files /dev/null and b/docs/prefetch.png differ diff --git a/docs/repeat.png b/docs/repeat.png new file mode 100644 index 0000000..fbfb237 Binary files /dev/null and b/docs/repeat.png differ diff --git a/docs/smap.png b/docs/smap.png new file mode 100644 index 0000000..55255a5 Binary files /dev/null and b/docs/smap.png differ diff --git a/seqtools/evaluation.py b/seqtools/evaluation.py index 0cf667c..1dabd31 100644 --- a/seqtools/evaluation.py +++ b/seqtools/evaluation.py @@ -365,8 +365,13 @@ def prefetch(sequence, max_cached=None, nworkers=0, method='thread', timeout=1): .. note:: Exceptions raised in the workers while reading the sequence values will - trigger an :class:`EagerAccessException`. When possible, information on + trigger an :class:`PrefetchException`. When possible, information on the cause of failure will be provided in the exception message. + + .. image:: prefetch.png + :alt: gather + :width: 30% + :align: center """ if method == "thread": return ThreadedSequence(sequence, max_cached, nworkers, timeout) diff --git a/seqtools/indexing.py b/seqtools/indexing.py index f7c5861..4817f70 100644 --- a/seqtools/indexing.py +++ b/seqtools/indexing.py @@ -65,7 +65,13 @@ def __setitem__(self, key, value): def gather(sequence, indexes): - """Returns a view on the sequence reordered by indexes.""" + """Returns a view on the sequence reordered by indexes. + + .. image:: gather.png + :alt: gather + :width: 15% + :align: center + """ return Reindexing(sequence, indexes) @@ -143,7 +149,13 @@ def __iter__(self): def cycle(sequence, limit=None): - """Returns a view of the repeated sequence with an optional size limit.""" + """Returns a view of the repeated sequence with an optional size limit. + + .. image:: cycle.png + :alt: collate + :width: 10% + :align: center + """ if limit is None: return InfiniteCycle(sequence) else: @@ -215,6 +227,11 @@ def interleave(*sequences): >>> arr3 = [.1, .2, .3, .4] >>> list(interleave(arr1, arr2, arr3)) [1, 'a', 0.1, 2, 'b', 0.2, 3, 'c', 0.3, 4, 0.4, 5] + + .. image:: interleaving.png + :alt: interleaving + :width: 30% + :align: center """ return Interleaving(sequences) @@ -317,6 +334,11 @@ def __iter__(self): def repeat(value, times=None): """Returns a sequence repeating the given value with an optional size limit. + + .. image:: repeat.png + :alt: repeat + :width: 10% + :align: center """ if isint(times) and times > 1: return Repetition(value, times) diff --git a/seqtools/mapping.py b/seqtools/mapping.py index 5c4f5c3..ba576ab 100644 --- a/seqtools/mapping.py +++ b/seqtools/mapping.py @@ -74,6 +74,11 @@ def smap(f, *sequence): computing now computing now [5, 5, 5, 5] + + .. image:: smap.png + :alt: smap + :width: 20% + :align: center """ stack = [(file, line, func, ctx[0].strip('\n') if ctx else '?') for _, file, line, func, ctx, _ diff --git a/seqtools/shape.py b/seqtools/shape.py index a6d84a2..f7f19e7 100644 --- a/seqtools/shape.py +++ b/seqtools/shape.py @@ -44,6 +44,11 @@ def collate(sequences): >>> arr = collate([[1, 2, 3, 4], ['a', 'b', 'c', 'd'], [5, 6, 7, 8]]) >>> arr[2] (3, 'c', 7) + + .. image:: collate.png + :alt: collate + :width: 50% + :align: center """ return Collation(sequences) @@ -80,7 +85,13 @@ def __iter__(self): def concatenate(sequences): - """Returns a view on the concatenated sequences.""" + """Returns a view on the concatenated sequences. + + .. image:: concatenate.png + :alt: concatenate + :width: 25% + :align: center + """ return Concatenation(sequences) @@ -151,6 +162,11 @@ def batch(sequence, k, drop_last=False, pad=None, collate_fn=None): :param collate_fn: an optional function that takes a sequence of items and returns a consolidated batch. + + .. image:: batch.png + :alt: batch + :width: 25% + :align: center """ return BatchView(sequence, k, drop_last, pad, collate_fn) @@ -241,7 +257,7 @@ def __setitem__(self, key, value): def split(sequence, edges): - """Splits a sequence into subsequences. + """Splits a sequence into a succession of subsequences. :param sequence: Input sequence. diff --git a/setup.py b/setup.py index c475b81..7537b2a 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ def run_tests(self): license="MPL2", url="https://github.com/nlgranger/SeqTools", packages=['seqtools'], - version='0.8.0', + version='0.8.1', install_requires=[ 'typing;python_version<"3.6"', 'backports.weakref;python_version<"3"',