Release version 0.2.7

Co-authored-by: Thomas Hoffmann <[email protected]> Co-authored-by: Dimitri Kartsaklis <[email protected]> Co-authored-by: Nikhil Khatri <[email protected]> Co-authored-by: Charles London <[email protected]> Co-authored-by: Richie Yeung <[email protected]>
CQCL · Oct 11, 2022 · c4e361a · c4e361a
1 parent d911686
commit c4e361a
Show file tree

Hide file tree

Showing 94 changed files with 2,672 additions and 8,135 deletions.
diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml
@@ -54,15 +54,6 @@ jobs:
       uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Determine if depccg tests should be run
-      # only test depccg if it is explicitly changed, since it is very slow
-      # (also ignore it on 3.10 because it doesn't install properly)
-      id: depccg-enabled
-      continue-on-error: true  # this is expected to fail but the job should still succeed
-      run: >
-        ${{ matrix.python-version != '3.10' }}
-        && git fetch --depth=1 origin ${{ github.base_ref || github.event.before }}:before
-        && git diff --name-only before | grep depccg
     - name: Locate pip cache
       id: loc-pip-cache
       run: echo "::set-output name=dir::$(pip cache dir)"
@@ -83,23 +74,6 @@ jobs:
       run: python -c 'import lambeq'
     - name: Install extra dependencies and tester
       run: pip install .[extras] .[test]
-    - name: Install depccg and locate depccg pre-trained model cache
-      id: loc-depccg-cache
-      if: steps.depccg-enabled.outcome == 'success'
-      run: |
-        pip install cython  # must be installed before depccg
-        pip install depccg==2.0.3.2
-        echo "::set-output name=dir::$(python -c 'from depccg.instance_models import MODEL_DIRECTORY, MODELS; print(MODEL_DIRECTORY / MODELS["en"][1])')"
-    - name: Restore depccg pre-trained model from cache
-      id: depccg-cache
-      if: steps.depccg-enabled.outcome == 'success'
-      uses: actions/cache@v2
-      with:
-        path: ${{ steps.loc-depccg-cache.outputs.dir }}
-        key: depccg
-    - name: Download depccg pre-trained model if needed
-      if: steps.depccg-cache.outputs.cache-hit == 'false'
-      run: python -c 'import tarfile, urllib;from depccg.instance_models import MODEL_DIRECTORY;tarfile.open(urllib.request.urlretrieve("https://qnlp.cambridgequantum.com/models/tri_headfirst.tar.gz")[0]).extractall(MODEL_DIRECTORY)'
     - name: Locate bobcat pre-trained model cache
       id: loc-bobcat-cache
       run: echo "::set-output name=dir::$(python -c 'from lambeq.text2diagram.bobcat_parser import get_model_dir; print(get_model_dir("bert"))')"
@@ -117,6 +91,31 @@ jobs:
         --durations=50
         --ignore=${{ env.TEST_DIR }}/text2diagram/test_depccg_parser.py
         --ignore=docs/extract_code_cells.py
+    - name: Determine if depccg tests should be run
+      # only test depccg if it is explicitly changed, since it is very slow
+      id: depccg-enabled
+      continue-on-error: true  # this is expected to fail but the job should still succeed
+      run: >
+        git fetch --depth=1 origin ${{ github.base_ref || github.event.before }}:before
+        && git diff --name-only before | grep depccg
+    - name: Install depccg and locate depccg pre-trained model cache
+      id: loc-depccg-cache
+      if: steps.depccg-enabled.outcome == 'success'
+      run: |
+        pip install cython  # must be installed before depccg
+        pip install depccg==2.0.3.2
+        echo "::set-output name=dir::$(python -c 'from depccg.instance_models import MODEL_DIRECTORY, MODELS; print(MODEL_DIRECTORY / MODELS["en"][1])')"
+        pip install lambeq  # override dependency conflicts
+    - name: Restore depccg pre-trained model from cache
+      id: depccg-cache
+      if: steps.depccg-enabled.outcome == 'success'
+      uses: actions/cache@v2
+      with:
+        path: ${{ steps.loc-depccg-cache.outputs.dir }}
+        key: depccg
+    - name: Download depccg pre-trained model if needed
+      if: steps.depccg-cache.outputs.cache-hit == 'false'
+      run: python -c 'import tarfile, urllib;from depccg.instance_models import MODEL_DIRECTORY;tarfile.open(urllib.request.urlretrieve("https://qnlp.cambridgequantum.com/models/tri_headfirst.tar.gz")[0]).extractall(MODEL_DIRECTORY)'
     - name: Test DepCCGParser
       if: steps.depccg-enabled.outcome == 'success'
       continue-on-error: true

diff --git a/docs/_static/css/table-wrap.css b/docs/_static/css/table-wrap.css
@@ -0,0 +1,8 @@
+/* override table no-wrap */
+.wy-table-responsive table td, .wy-table-responsive table th {
+    white-space: normal;
+}
+table.docutils div.line-block {
+    margin-bottom: 0; 
+}
+
diff --git a/docs/_static/images/classes_ansatz.png b/docs/_static/images/classes_ansatz.png
diff --git a/docs/_static/images/classes_bobcat.png b/docs/_static/images/classes_bobcat.png
diff --git a/docs/_static/images/classes_pregroups.png b/docs/_static/images/classes_pregroups.png
diff --git a/docs/_static/images/classes_reader.png b/docs/_static/images/classes_reader.png
diff --git a/docs/_static/images/classes_rewrite.png b/docs/_static/images/classes_rewrite.png
diff --git a/docs/_static/images/classes_text2diagram.png b/docs/_static/images/classes_text2diagram.png
diff --git a/docs/_static/images/classes_tokeniser.png b/docs/_static/images/classes_tokeniser.png
diff --git a/docs/_static/images/classes_training.png b/docs/_static/images/classes_training.png
diff --git a/docs/_static/images/use_cases.png b/docs/_static/images/use_cases.png
diff --git a/docs/conf.py b/docs/conf.py
@@ -46,7 +46,8 @@
 ]
 
 intersphinx_mapping = {
-    'discopy': ("https://discopy.readthedocs.io/en/main/", None)
+    'discopy': ("https://discopy.readthedocs.io/en/main/", None),
+    'pennylane': ("https://pennylane.readthedocs.io/en/stable/", None),
 }
 
 autodoc_default_options = {
@@ -74,6 +75,9 @@
 # a list of builtin themes.
 #
 html_theme = 'sphinx_rtd_theme'
+html_theme_options = {
+  'navigation_depth': -1
+}
 html_context = {
   'display_github': True,
   'github_user': 'CQCL',
@@ -89,6 +93,10 @@
 html_logo = '_static/images/lambeq_logo.png'
 html_favicon = '_static/images/favicon.ico'
 
+# CSS for allowing text wrapping within table cells
+html_css_files = [
+    'css/table-wrap.css',
+]
 
 def autodoc_skip_member(app, what, name, obj, skip, options):
     if name == 'Symbol':

diff --git a/docs/examples/classical_pipeline.ipynb b/docs/examples/classical_pipeline.ipynb
@@ -33,9 +33,7 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
+   "metadata": {},
    "source": [
     "### Input data"
    ]

diff --git a/docs/glossary.rst b/docs/glossary.rst
@@ -71,16 +71,31 @@ Glossary
     natural language processing (NLP)
         The use of computational methods for solving language-related problems.
 
+    NISQ
+        Noisy Intermediate-Scale Quantum. A term for characterising the current state of quantum hardware, where quantum processors still contain a small number of qubits, and are not advanced enough to reach fault-tolerance nor large enough to profit substantially from quantum supremacy.
+
+    noise
+        Undesired artefacts that cause the measurement outcome of a :term:`quantum circuit` to deviate from the ideal distribution.
+
     parser
         A statistical tool that converts a sentence into a hierarchical representation that reflects the syntactic relationships between the words (a :term:`syntax tree`) based on a specific grammar formalism.
 
+    PennyLane
+        A Python library for differentiable programming of quantum computers, developed by Xanadu, enabling quantum machine learning.
+
+    post-selection
+        The act of conditioning the probability space on a particular event. In practice, this involves disregarding measurement outcomes where a particular qubit does not match the post-selected value.
+
     pregroup grammar
         A grammar formalism developed by Joachim Lambek in 1999 [Lam1999]_ based on the notion of a *pregroup*. Pregroup grammars are closely related to categorial grammars (such as :term:`CCG <Combinatory Categorial Grammar (CCG)>`). In category-theoretic terms, a pregroup grammar forms a :term:`rigid category`, sometimes also referred to as a non-symmetric :term:`compact closed category`.
 
     pytket
         A Python interface for the :term:`tket` compiler.
 
-    qiskit
+    PyTorch
+        An open source machine learning framework primarily developed by Meta AI.
+
+    Qiskit
         An open-source SDK developed by IBM Research for working with quantum computers at the level of circuits, pulses, and algorithms.
 
     quantum circuit
@@ -104,6 +119,9 @@ Glossary
     rigid category
         A :term:`monoidal category` where every object :math:`A` has a left dual :math:`A^l` and a right dual :math:`A^r`, both equipped with :term:`cup` and :term:`cap` morphisms obeying the so-called :term:`snake equations`. A :term:`pregroup grammar` is an example of a rigid category.
 
+    shots
+        A collection of measurement outcomes from a particular :term:`quantum circuit`.
+
     snake equations
         Identities that hold between the dual objects of a :term:`monoidal category` and allow the "yanking" of wires and the rewriting and simplification of diagrams. In ``lambeq`` and :term:`DisCoPy`, the :py:meth:`monoidal.Diagram.normal_form() <discopy.monoidal.Diagram.normal_form>` method uses the snake equations in order to "stretch" the wires of a diagram and provide a normal form for it.
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -51,6 +51,7 @@ If you use ``lambeq`` for your research, please cite the accompanying paper [Kea
    parsing
    string_diagrams
    discopy
+   use-cases
    CONTRIBUTING
 
 .. toctree::
@@ -61,6 +62,7 @@ If you use ``lambeq`` for your research, please cite the accompanying paper [Kea
    ../tutorials/rewrite.ipynb
    ../tutorials/parameterise.ipynb
    training
+   models
    manual_training
    advanced
    ../tutorials/extend-lambeq.ipynb

diff --git a/docs/models.rst b/docs/models.rst
@@ -0,0 +1,142 @@
+.. _sec-models:
+
+Choosing a model
+================
+
+The following sections provide more information on the various models.
+
+.. _sec-numpymodel:
+
+NumpyModel
+----------
+
+A :py:class:`.NumpyModel` uses the unitary and density matrix simulators in DisCoPy, which convert quantum circuits into a tensor network. The resulting tensor network is efficiently contracted using ``opt_einsum``.
+
+Circuits containing only :py:class:`Bra <discopy.quantum.gates.Bra>`, :py:class:`Ket <discopy.quantum.gates.Ket>` and unitary gates are evaluated using DisCoPy's unitary simulator, while circuits containing :py:class:`Encode <discopy.quantum.circuit.Encode>`, :py:class:`Measure <discopy.quantum.circuit.Measure>` or :py:class:`Discard <discopy.quantum.circuit.Discard>` are evaluated using DisCoPy's density matrix simulator.
+
+.. note::
+
+   Note that the unitary simulator converts a circuit with ``n`` output qubits into a tensor of shape ``(2, ) * n``, while the density matrix simulator converts a circuit with ``n`` output qubits and ``m`` output bits into a tensor of shape ``(2, ) * (2 * n + m)``.
+
+In the common use case of using a :py:data:`~lambeq.text2diagram.stairs_reader` or a :py:class:`.TreeReader` with discarding for binary classification, the process involves measuring (:py:class:`Measure <discopy.quantum.circuit.Measure>`) one of the "open" qubits, and discarding (:py:class:`Discard <discopy.quantum.circuit.Discard>`) the rest of them.
+
+One advantage that the :py:class:`.NumpyModel` has over the :py:class:`.TketModel` is that it supports the just-in-time (jit) compilation provided by the library ``jax``. This speeds up the model's diagram evaluation by an order of magnitude. The :py:class:`.NumpyModel` with ``jit`` mode enabled can be instantiated with the following command:
+
+.. code-block:: python
+
+   from lambeq import NumpyModel
+
+   model = NumpyModel.from_diagrams(circuits, use_jit=True)
+
+.. note::
+   Using the :py:class:`.NumpyModel` with ``jit`` mode enabled is not recommended for large models, as it requires a large amount of memory to store the pre-compiled functions for each circuit.
+
+To use the :py:class:`.NumpyModel` with ``jit`` mode, you need to install ``lambeq`` with the extra packages by running the following command:
+
+.. code-block:: bash
+
+   pip install lambeq[extras]
+
+.. note::
+
+   To enable GPU support for ``jax``, follow the installation instructions on the `JAX GitHub repository <https://github.com/google/jax#installation>`_.
+
+:py:class:`.NumpyModel` should be used with the :py:class:`.QuantumTrainer`.
+
+.. rubric:: See also the following use cases:
+
+- :ref:`uc1`
+
+.. _sec-pytorchmodel:
+
+PytorchModel
+------------
+
+:py:class:`.PytorchModel` is the right choice for classical experiments. Here, string diagrams are treated as tensor networks, where boxes represent tensors and edges define the specific tensor contractions. Tensor contractions are optimised by the python package ``opt_einsum``.
+
+To prepare the diagrams for the computation, we use a :py:class:`.TensorAnsatz` that converts a rigid diagram into a tensor diagram. Subclasses of :py:class:`.TensorAnsatz` include the :py:class:`.SpiderAnsatz` and the :py:class:`.MPSAnsatz`, which reduce the size of large tensors by spliting them into chains of many smaller boxes. To prepare a tensor diagram for a sentence, for example:
+
+.. code-block:: python
+
+   from lambeq import AtomicType, BobcatParser, TensorAnsatz
+   from discopy import Dim
+
+   parser = BobcatParser()
+   rigid_diagram = parser.sentence2diagram('This is a tensor network.')
+
+   ansatz = TensorAnsatz({AtomicType.NOUN: Dim(2), AtomicType.SENTENCE: Dim(4)})
+   tensor_diagram = ansatz(rigid_diagram)
+
+After preparing a list of tensor diagrams, we can initialise the model through:
+
+.. code-block:: python
+
+   from lambeq import PytorchModel
+
+   model = PytorchModel.from_diagrams(tensor_diagrams)
+
+The :py:class:`.PytorchModel` is capable of combining tensor networks and neural network architectures. For example, it is possible to feed the output of a tensor diagram into a neural network, by subclassing and modifying the :py:meth:`~lambeq.PytorchModel.forward` method:
+
+.. code-block:: python
+
+   import torch
+   from lambeq import PytorchModel
+
+   class MyCustomModel(PytorchModel):
+      def __init__(self):
+         super().__init__()
+         self.net = torch.nn.Linear(2, 2)
+
+      def forward(self, input):
+         """define a custom forward pass here"""
+         preds = self.get_diagram_output(input)  # performs tensor contraction
+         return self.net(preds)
+
+To simplify training, the :py:class:`.PytorchModel` can be used with the :py:class:`.PytorchTrainer`. A comprehensive tutorial can be found `here <tutorials/trainer_classical.ipynb>`_.
+
+.. note::
+
+   The loss function and the accuracy metric in the tutorial are defined for two-dimensional binary labels: ``[[1,0], [0,1], ...]``. If your data has a different structure, you must implement your custom loss function and evaluation metrics.
+
+.. rubric:: See also the following use cases:
+
+- :ref:`uc4`
+
+.. _sec-tketmodel:
+
+TketModel
+---------
+
+:py:class:`.TketModel` uses ``pytket`` to retrieve shot-based results from a quantum computer, then uses the shot counts to build the resulting tensor.
+
+The ``AerBackend`` can be used with :py:class:`.TketModel` to perform a noisy, architecture-aware simulation of an IBM machine. Other backends supported by ``pytket`` can also be used. To run an experiment on a real quantum computer, for example:
+
+.. code-block:: python
+
+   from lambeq import TketModel
+   from pytket.extensions.quantinuum import QuantinuumBackend
+
+   machine = 'H1-1E'
+   backend = QuantinuumBackend(device_name=machine)
+   backend.login()
+
+   backend_config = {
+    'backend': backend,
+    'compilation': backend.default_compilation_pass(2),
+    'shots': 2048
+   }
+
+   model = TketModel.from_diagrams(all_circuits, backend_config=backend_config)
+
+.. note::
+
+   Note that you need user accounts and allocated resources to run experiments on real machines. However, `IBM Quantum <https://quantum-computing.ibm.com/>`_ provides some limited resources for free.
+
+For initial experiments we recommend using a :py:class:`.NumpyModel`, as it performs noiseless simulations and is orders of magnitude faster.
+
+:py:class:`.TketModel` should be used with the :py:class:`.QuantumTrainer`.
+
+.. rubric:: See also the following use cases:
+
+- :ref:`uc2`
+- :ref:`uc3`
diff --git a/docs/puml/ansatz.puml b/docs/puml/ansatz.puml
@@ -0,0 +1,73 @@
+@startuml
+
+set namespaceseparator none
+skinparam dpi 96
+skinparam shadowing true
+skinparam ArrowColor Black
+skinparam class {
+  backgroundColor Business
+  borderColor Red
+}
+
+abstract class BaseAnsatz {
+    ob_map: dict
+}
+class TensorAnsatz {
+    ob_map: Mapping[Ty, Dim]
+    functor
+}
+class CircuitAnsatz {
+    functor
+    ob_map: Mapping[Ty, int]
+}
+class MPSAnsatz {
+    BOND_TYPE
+    bond_dim: int
+    max_order: int
+    split_functor
+    tensor_functor
+}
+class SpiderAnsatz {
+    max_order: int
+    split_functor
+    tensor_functor
+}
+class IQPAnsatz {
+    discard: bool
+    functor
+    n_layers: int
+    n_single_qubit_params: int
+}
+class Symbol {
+    size: int
+    sort_key(order)
+}
+class sympy.core.symbol.Symbol #back:wheat;line:tomato {}
+
+class discopy.rigid.Ty #back:lightblue;line:black {}
+class discopy.rigid.Ob #back:lightblue;line:black {}
+class discopy.rigid.Functor #back:lightblue;line:black {}
+class discopy.quantum.circuit.Functor #back:lightblue;line:black {}
+class discopy.monoidal.Ty #back:lightblue;line:black {}
+
+discopy.rigid.Ob <|-- discopy.rigid.Ty
+discopy.rigid.Functor <|-- discopy.quantum.circuit.Functor
+
+BaseAnsatz <|-- TensorAnsatz
+BaseAnsatz <|-- CircuitAnsatz
+TensorAnsatz <|-- MPSAnsatz
+TensorAnsatz <|-- SpiderAnsatz
+CircuitAnsatz <|-- IQPAnsatz
+discopy.monoidal.Ty <|-- discopy.rigid.Ty
+
+MPSAnsatz::split_functor *-left- discopy.rigid.Functor
+MPSAnsatz::tensor_functor *-- discopy.rigid.Functor
+SpiderAnsatz::split_functor *-- discopy.rigid.Functor
+SpiderAnsatz::tensor_functor *-- discopy.rigid.Functor
+MPSAnsatz::BOND_TYPE *--left discopy.rigid.Ty
+CircuitAnsatz::functor *-- discopy.quantum.circuit.Functor
+TensorAnsatz::functor *-- discopy.quantum.circuit.Functor
+IQPAnsatz::functor *-- discopy.quantum.circuit.Functor
+sympy.core.symbol.Symbol <|-- Symbol
+
+@enduml