From 8c9d8777dc9fec368ed62ec0ae52e82435ab5a0e Mon Sep 17 00:00:00 2001 From: Cyrille Rossant Date: Sun, 15 Jun 2014 15:35:10 +0200 Subject: [PATCH] Added tools for generating featured HTML recipes. --- featured/01_numpy_performance.ipynb | 33 ++-- tools/featured.tpl | 296 ++++++++++------------------ tools/genfeatured.py | 61 +++++- tools/gentoc.py | 17 +- tools/util.py | 22 +++ 5 files changed, 204 insertions(+), 225 deletions(-) create mode 100644 tools/util.py diff --git a/featured/01_numpy_performance.ipynb b/featured/01_numpy_performance.ipynb index 2e02ba9..3262e29 100644 --- a/featured/01_numpy_performance.ipynb +++ b/featured/01_numpy_performance.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:015ce6a5abd2c1803c32dae6f3cc89bbb103b97ee5b159129f2454ad27d3ef6f" + "signature": "sha256:b9b65add267b3e6d0058a88dbd981ac0f74d0719f95b4ec1a70421fa85278bfe" }, "nbformat": 3, "nbformat_minor": 0, @@ -12,14 +12,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Featured recipe #1: Get the best performance out of NumPy" + "# Featured Recipe #1: Getting the Best Performance out of NumPy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "> This is the first featured recipe of the [IPython Cookbook](http://ipython-books.github.io/), the definitive guide to high-performance scientific computing and data science in Python." + "> This is the first featured recipe of the [**IPython Cookbook**](http://ipython-books.github.io/), the definitive guide to **high-performance scientific computing** and **data science** in Python." ] }, { @@ -28,7 +28,7 @@ "source": [ "**NumPy** is the cornerstone of the scientific Python software stack. It provides a special data type optimized for vector computations, the `ndarray`. This object is at the core of most algorithms in scientific numerical computing.\n", "\n", - "With NumPy arrays, you can achieve significant performance speedups over native Python, particularly when your computations follow the *Single Instruction, Multiple Data* (SIMD) paradigm. However, it is also possible to unintentionally write non-optimized code with NumPy.\n", + "With NumPy arrays, you can achieve significant performance speedups over native Python, particularly when your computations follow the ***Single Instruction, Multiple Data* (SIMD)** paradigm. However, it is also possible to unintentionally write non-optimized code with NumPy.\n", "\n", "In this featured recipe, we will see some tricks that can help you write optimized NumPy code. We will start by looking at ways to avoid unnecessary array copies in order to save time and memory. In that respect, we will need to dig into the internals of NumPy." ] @@ -37,7 +37,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Understanding the internals of NumPy to avoid unnecessary array copy" + "## Learning to avoid unnecessary array copies" ] }, { @@ -418,7 +418,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "6.\tBroadcasting rules allow you to make computations on arrays with different but compatible shapes. In other words, you don't always need to reshape or tile your arrays to make their shapes match. The following example illustrates two ways of doing an outer product between two vectors: the first method involves array tiling, the second one involves broadcasting. The last method is significantly faster." + "6. **Broadcasting rules** allow you to make computations on arrays with different but compatible shapes. In other words, you don't always need to reshape or tile your arrays to make their shapes match. The following example illustrates two ways of doing an outer product between two vectors: the first method involves array tiling, the second one involves broadcasting. The last method is significantly faster." ] }, { @@ -875,7 +875,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Explanations" + "## How it works?" ] }, { @@ -905,25 +905,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "1.\tComputations on arrays can be written very efficiently in a low-level language like C (and a large part of NumPy is actually written in C). Knowing the address of the memory block and the data type, it is just simple arithmetic to loop over all items, for example. There would be a significant overhead to do that in Python with a list.\n", + "1.\t**Array computations can be written very efficiently in a low-level language like C** (and a large part of NumPy is actually written in C). Knowing the address of the memory block and the data type, it is just simple arithmetic to loop over all items, for example. There would be a significant overhead to do that in Python with a list.\n", "\n", - "2.\tSpatial locality in memory access patterns results in performance gains notably due to the CPU cache. Indeed, the cache loads bytes in chunks from RAM to the CPU registers. Adjacent items are then loaded very efficiently (sequential locality, or locality of reference).\n", + "2.\t**Spatial locality in memory access patterns** results in significant performance gains, notably thanks to the CPU cache. Indeed, the cache loads bytes in chunks from RAM to the CPU registers. Adjacent items are then loaded very efficiently (sequential locality, or locality of reference).\n", "\n", - "3.\tFinally, the fact that items are stored contiguously in memory allows NumPy to take advantage of vectorized instructions of modern CPUs, like Intel's SSE and AVX, AMD's XOP, and so on. For example, multiple consecutive floating point numbers can be loaded in 128, 256 or 512 bits registers for vectorized arithmetical computations implemented as CPU instructions." + "3.\t**Data elements are stored contiguously in memory**, so that NumPy can take advantage of vectorized instructions on modern CPUs, like Intel's SSE and AVX, AMD's XOP, and so on. For example, multiple consecutive floating point numbers can be loaded in 128, 256 or 512 bits registers for vectorized arithmetical computations implemented as CPU instructions." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Additionally, let's mention the fact that NumPy can be linked to highly optimized linear algebra libraries like BLAS and LAPACK, for example through the Intel Math Kernel Library (MKL). A few specific matrix computations may also be multithreaded, taking advantage of the power of modern multicore processors." + "Additionally, let's mention the fact that NumPy can be linked to highly optimized linear algebra libraries like *BLAS* and *LAPACK*, for example through the *Intel Math Kernel Library (MKL)*. A few specific matrix computations may also be multithreaded, taking advantage of the power of modern multicore processors." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In conclusion, storing data in a contiguous block of memory ensures that the architecture of modern CPUs is used optimally, in terms of memory access patterns, CPU cache, and vectorized instructions." + "In conclusion, **storing data in a contiguous block of memory ensures that the architecture of modern CPUs is used optimally, in terms of memory access patterns, CPU cache, and vectorized instructions**." ] }, { @@ -1054,7 +1054,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "> This was a featured recipe from the [IPython Cookbook](http://ipython-books.github.io/), by [Cyrille Rossant](http://cyrille.rossant.net), Packt Publishing, 2014 (400 pages). If you liked this recipe, [pre-order the book now](http://www.packtpub.com/ipython-interactive-computing-and-visualization-cookbook/book)! There's a time-limited 50% discount with the code `PICVCEB`." + "You will find related recipes on the [book's repository](https://github.com/ipython-books/cookbook-code)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> This was a featured recipe from the [IPython Cookbook](http://ipython-books.github.io/), by [Cyrille Rossant](http://cyrille.rossant.net), Packt Publishing, 2014. If you liked this recipe, [pre-order the book now](http://www.packtpub.com/ipython-interactive-computing-and-visualization-cookbook/book)! There's a time-limited 50% discount with the code `PICVCEB`." ] } ], diff --git a/tools/featured.tpl b/tools/featured.tpl index 2aa785e..c33d822 100644 --- a/tools/featured.tpl +++ b/tools/featured.tpl @@ -1,195 +1,105 @@ -{%- extends 'display_priority.tpl' -%} - - -{% block codecell %} -
-{{ super() }} -
-{%- endblock codecell %} - -{% block input_group -%} -
+{%- extends 'basic.tpl' -%} +{% from 'mathjax.tpl' import mathjax %} + + +{%- block header -%} + + + + + + + + +IPython Cookbook %TITLE% + + + + +{% for css in resources.inlining.css -%} + +{% endfor %} + + + + + + + + + + +{{ mathjax() }} + + + + + + +{%- endblock header -%} + + + + +{% block body %} + + +%NAVBAR% + +
+
{{ super() }} -
-{% endblock input_group %} - -{% block output_group %} -
-
-{{ super() }} -
-
-{% endblock output_group %} - -{% block in_prompt -%} -
-In [{{ cell.prompt_number }}]: -
-{%- endblock in_prompt %} - -{% block empty_in_prompt -%} -
-
-{%- endblock empty_in_prompt %} - -{# - output_prompt doesn't do anything in HTML, - because there is a prompt div in each output area (see output block) -#} -{% block output_prompt %} -{% endblock output_prompt %} - -{% block input %} -
-
-{{ cell.input | highlight2html(language=resources.get('language'), metadata=cell.metadata) }} -
-
-{%- endblock input %} - -{% block output %} -
-{%- if output.output_type == 'pyout' -%} -
- Out[{{ cell.prompt_number }}]: -{%- else -%} -
-{%- endif -%}
-{{ super() }} -
-{% endblock output %} - -{% block markdowncell scoped %} -
-{{ self.empty_in_prompt() }} -
-
-{{ cell.source | markdown2html | strip_files_prefix }} -
-
-
-{%- endblock markdowncell %} - -{% block headingcell scoped %} -
-{{ self.empty_in_prompt() }} -
-
-{{ ("#" * cell.level + cell.source) | replace('\n', ' ') | markdown2html | strip_files_prefix | add_anchor }} -
-
-
-{% endblock headingcell %} - -{% block unknowncell scoped %} -unknown type {{ cell.type }} -{% endblock unknowncell %} - -{% block pyout -%} -{%- set extra_class="output_pyout" -%} -{% block data_priority scoped %} -{{ super() }} -{% endblock %} -{%- set extra_class="" -%} -{%- endblock pyout %} - -{% block stream_stdout -%} -
-
-{{- output.text | ansi2html -}}
-
-
-{%- endblock stream_stdout %} - -{% block stream_stderr -%} -
-
-{{- output.text | ansi2html -}}
-
-
-{%- endblock stream_stderr %} - -{% block data_svg scoped -%} -
-{%- if output.svg_filename %} - -{%- endblock data_svg %} - -{% block data_html scoped -%} -
-{{ output.html }} -
-{%- endblock data_html %} - -{% block data_png scoped %} -
-{%- if output.png_filename %} - -
-{%- endblock data_png %} - -{% block data_jpg scoped %} -
-{%- if output.jpeg_filename %} - -
-{%- endblock data_jpg %} - -{% block data_latex scoped %} -
-{{ output.latex }} -
-{%- endblock data_latex %} - -{% block pyerr -%} -
-
-{{- super() -}}
-
-
-{%- endblock pyerr %} - -{%- block traceback_line %} -{{ line | ansi2html }} -{%- endblock traceback_line %} - -{%- block data_text scoped %} -
-
-{{- output.text | ansi2html -}}
-
-
-{%- endblock -%} - -{%- block data_javascript scoped %} -
- -
-{%- endblock -%} +
+ +%FOOTER% + + +{%- endblock body %} + +{% block footer %} + +{% endblock footer %} diff --git a/tools/genfeatured.py b/tools/genfeatured.py index 98f6393..931f126 100644 --- a/tools/genfeatured.py +++ b/tools/genfeatured.py @@ -1,10 +1,63 @@ +import sys import os +import os.path as op +import re +from util import get_recipe_number, get_recipe_name +current_dir = op.dirname(os.path.abspath(__file__)) +code_dir = op.join(current_dir, '../') +featured_dir = op.join(code_dir, 'featured/') +site_dir = op.join(current_dir, '../../ipython-books.github.io') -if __name__ == '__main__': - filename = '../featured/01_numpy_performance.ipynb' +index_path = op.join(site_dir, 'index.html') + +def get_title(notebook_filename): + return get_recipe_name(op.join(featured_dir, notebook_filename)) + +def get_snippet(name): + with open(index_path, 'r') as f: + contents = f.read() + start = contents.index(''.format(name)) + end = contents.index(''.format(name)) + return contents[start:end] + +def get_navbar(): + return get_snippet('NAVBAR') + +def get_footer(): + return get_snippet('FOOTER') + +NAVBAR = get_navbar() +FOOTER = get_footer() - os.system('ipython nbconvert {f} --to html --template featured.tpl'.format( - f=filename)) +def transform_featured(notebook_filename): + number = int(notebook_filename[:2]) + notebook_basename = op.basename(notebook_filename) + input_path = op.realpath(op.join(featured_dir, notebook_filename)) + output_path = op.realpath(op.join(site_dir, + 'featured-{0:02d}'.format(number))) + + # Get the recipe's title. + title = get_title(input_path) + + # Generate the nbconvert command. + command = ('ipython nbconvert {f} --to html ' + '--template featured.tpl --output {of}').format( + f=input_path, + of=output_path) + os.system(command) + + output_path += '.html' + # Replace the templates: title, navbar, footer. + with open(output_path, 'r') as f: + contents = f.read() + contents = contents.replace('%TITLE%', title) + contents = contents.replace('%NAVBAR%', NAVBAR) + contents = contents.replace('%FOOTER%', FOOTER) + with open(output_path, 'w') as f: + f.write(contents) + +if __name__ == '__main__': + transform_featured('01_numpy_performance.ipynb') \ No newline at end of file diff --git a/tools/gentoc.py b/tools/gentoc.py index 76a86ff..c30072b 100644 --- a/tools/gentoc.py +++ b/tools/gentoc.py @@ -7,6 +7,8 @@ import sys import os.path as op +from util import get_recipe_number, get_recipe_name + CHAPTER_NAMES = [ 'A Tour of Interactive Computing with IPython', 'Best practices in Interactive Computing', @@ -25,21 +27,6 @@ 'Symbolic and Numerical Mathematics', ] -def get_recipe_number(file): - return int(file[:2]) - -def get_recipe_name(file): - # Load notebook. - with open(file, 'r') as f: - contents = json.load(f) - cells = contents['worksheets'][0]['cells'] - for cell in cells: - if cell.get('cell_type', None) == 'markdown': - source = cell.get('source', []) - for _ in source: - if _.startswith('# '): - return _[2:].strip() - def get_chapter_number(dir): return int(op.basename(dir)[7:9]) diff --git a/tools/util.py b/tools/util.py new file mode 100644 index 0000000..b8dd058 --- /dev/null +++ b/tools/util.py @@ -0,0 +1,22 @@ +import re +import urlparse +import json +import os +import sys +import os.path as op + +def get_recipe_number(file): + return int(file[:2]) + +def get_recipe_name(file): + # Load notebook. + with open(file, 'r') as f: + contents = json.load(f) + cells = contents['worksheets'][0]['cells'] + for cell in cells: + if cell.get('cell_type', None) == 'markdown': + source = cell.get('source', []) + for _ in source: + if _.startswith('# '): + return _[2:].strip() + \ No newline at end of file